From c6fc65f48072c9c6144ea2d145c011317bd03441 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:15 +0200 Subject: [PATCH 01/10] mlxsw: spectrum_router: Add support for resilient nexthop groups Parse the configuration of resilient nexthop groups to existing mlxsw data structures. Unlike non-resilient groups, nexthops without a valid MAC or router interface (RIF) are programmed with a trap action instead of not being programmed at all. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 26 ++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 75c9fc47cd69..db859c2bd810 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2911,7 +2911,8 @@ struct mlxsw_sp_nexthop_group_info { u16 count; int sum_norm_weight; u8 adj_index_valid:1, - gateway:1; /* routes using the group use a gateway */ + gateway:1, /* routes using the group use a gateway */ + is_resilient:1; struct mlxsw_sp_nexthop nexthops[0]; #define nh_rif nexthops[0].rif }; @@ -3905,6 +3906,9 @@ static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh, if (!removing) { nh->action = MLXSW_SP_NEXTHOP_ACTION_FORWARD; nh->should_offload = 1; + } else if (nh->nhgi->is_resilient) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; } else { nh->should_offload = 0; } @@ -4484,6 +4488,15 @@ mlxsw_sp_nexthop_obj_init(struct mlxsw_sp *mlxsw_sp, if (nh_obj->is_reject) mlxsw_sp_nexthop_obj_blackhole_init(mlxsw_sp, nh); + /* In a resilient nexthop group, all the nexthops must be written to + * the adjacency table. Even if they do not have a valid neighbour or + * RIF. + */ + if (nh_grp->nhgi->is_resilient && !nh->should_offload) { + nh->action = MLXSW_SP_NEXTHOP_ACTION_TRAP; + nh->should_offload = 1; + } + return 0; err_type_init: @@ -4500,6 +4513,7 @@ static void mlxsw_sp_nexthop_obj_fini(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh); list_del(&nh->router_list_node); mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); + nh->should_offload = 0; } static int @@ -4509,6 +4523,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, { struct mlxsw_sp_nexthop_group_info *nhgi; struct mlxsw_sp_nexthop *nh; + bool is_resilient = false; unsigned int nhs; int err, i; @@ -4519,6 +4534,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, case NH_NOTIFIER_INFO_TYPE_GRP: nhs = info->nh_grp->num_nh; break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nhs = info->nh_res_table->num_nh_buckets; + is_resilient = true; + break; default: return -EINVAL; } @@ -4529,6 +4548,7 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, nh_grp->nhgi = nhgi; nhgi->nh_grp = nh_grp; nhgi->gateway = mlxsw_sp_nexthop_obj_is_gateway(mlxsw_sp, info); + nhgi->is_resilient = is_resilient; nhgi->count = nhs; for (i = 0; i < nhgi->count; i++) { struct nh_notifier_single_info *nh_obj; @@ -4544,6 +4564,10 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, nh_obj = &info->nh_grp->nh_entries[i].nh; weight = info->nh_grp->nh_entries[i].weight; break; + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + nh_obj = &info->nh_res_table->nhs[i]; + weight = 1; + break; default: err = -EINVAL; goto err_nexthop_obj_init; From 62b67ff33bee9e1adf408ed3c708fdf3c69b15be Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:16 +0200 Subject: [PATCH 02/10] mlxsw: spectrum_router: Add ability to overwrite adjacency entry only when inactive Allow the driver to instruct the device to only overwrite an adjacency entry if its activity is cleared. Currently, adjacency entry is always overwritten, regardless of activity. This will be used by subsequent patches to prevent replacement of active nexthop buckets. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 3 +- .../ethernet/mellanox/mlxsw/spectrum_ipip.c | 9 +++-- .../ethernet/mellanox/mlxsw/spectrum_ipip.h | 3 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 34 ++++++++++++------- .../ethernet/mellanox/mlxsw/spectrum_router.h | 2 +- 5 files changed, 32 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index af2093fc5025..9ff286ba9bf0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -1196,7 +1196,8 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) else mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_eth_update(mlxsw_sp, - adj_index + adj_hash_index, nh); + adj_index + adj_hash_index, nh, + true); } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 6ccca39bae84..23896260720f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -127,14 +127,17 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr) static int mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry) + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); char ratr_pl[MLXSW_REG_RATR_LEN]; + enum mlxsw_reg_ratr_op op; - mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, MLXSW_REG_RATR_TYPE_IPIP, + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_IPIP, adj_index, rif_index); mlxsw_reg_ratr_ipip4_entry_pack(ratr_pl, be32_to_cpu(daddr4)); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index 87bef9880e5e..af7dd19f50e6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -40,7 +40,8 @@ struct mlxsw_sp_ipip_ops { enum mlxsw_sp_l3proto ul_proto; /* Underlay. */ int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_ipip_entry *ipip_entry); + struct mlxsw_sp_ipip_entry *ipip_entry, + bool force); bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index db859c2bd810..63c2c7a84c64 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3419,16 +3419,19 @@ err_mass_update_vr: static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, + bool force) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; char ratr_pl[MLXSW_REG_RATR_LEN]; + enum mlxsw_reg_ratr_op op; u16 rif_index; rif_index = nh->rif ? nh->rif->rif_index : mlxsw_sp->router->lb_rif_index; - mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, - true, MLXSW_REG_RATR_TYPE_ETHERNET, + op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : + MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY_ON_ACTIVITY; + mlxsw_reg_ratr_pack(ratr_pl, op, true, MLXSW_REG_RATR_TYPE_ETHERNET, adj_index, rif_index); switch (nh->action) { case MLXSW_SP_NEXTHOP_ACTION_FORWARD: @@ -3456,7 +3459,7 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, } int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, bool force) { int i; @@ -3464,7 +3467,7 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, int err; err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i, - nh); + nh, force); if (err) return err; } @@ -3474,17 +3477,19 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, + bool force) { const struct mlxsw_sp_ipip_ops *ipip_ops; ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; - return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry); + return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry, + force); } static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, bool force) { int i; @@ -3492,7 +3497,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, int err; err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i, - nh); + nh, force); if (err) return err; } @@ -3501,7 +3506,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh) + struct mlxsw_sp_nexthop *nh, bool force) { /* When action is discard or trap, the nexthop must be * programmed as an Ethernet nexthop. @@ -3509,9 +3514,11 @@ static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, if (nh->type == MLXSW_SP_NEXTHOP_TYPE_ETH || nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD || nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) - return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh); + return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh, + force); else - return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh); + return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh, + force); } static int @@ -3534,7 +3541,8 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, if (nh->update || reallocate) { int err = 0; - err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh); + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, + true); if (err) return err; nh->update = 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 01fd9a3d5944..3bb2a06359a3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -209,7 +209,7 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, u64 *p_counter); int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh); + struct mlxsw_sp_nexthop *nh, bool force); void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh); void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, From 197fdfd107e30a59e96691273b0b175cbf2471b8 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:17 +0200 Subject: [PATCH 03/10] mlxsw: spectrum_router: Pass payload pointer to nexthop update function Have the caller pass a pointer to the payload of the RATR register to the function updating a single nexthop / adjacency entry. In a subsequent patch, this will allow the caller to make sure replacement was successful by querying the state of the adjacency entry after replacement and comparing with the initial request. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_dpipe.c | 3 ++- .../ethernet/mellanox/mlxsw/spectrum_ipip.c | 3 +-- .../ethernet/mellanox/mlxsw/spectrum_ipip.h | 2 +- .../ethernet/mellanox/mlxsw/spectrum_router.c | 27 ++++++++++--------- .../ethernet/mellanox/mlxsw/spectrum_router.h | 3 ++- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c index 9ff286ba9bf0..1a2fef2a5379 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_dpipe.c @@ -1178,6 +1178,7 @@ out: static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) { + char ratr_pl[MLXSW_REG_RATR_LEN]; struct mlxsw_sp *mlxsw_sp = priv; struct mlxsw_sp_nexthop *nh; u32 adj_hash_index = 0; @@ -1197,7 +1198,7 @@ static int mlxsw_sp_dpipe_table_adj_counters_update(void *priv, bool enable) mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh); mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + adj_hash_index, nh, - true); + true, ratr_pl); } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c index 23896260720f..b8b08a6a1d10 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.c @@ -128,11 +128,10 @@ bool mlxsw_sp_l3addr_is_zero(union mlxsw_sp_l3addr addr) static int mlxsw_sp_ipip_nexthop_update_gre4(struct mlxsw_sp *mlxsw_sp, u32 adj_index, struct mlxsw_sp_ipip_entry *ipip_entry, - bool force) + bool force, char *ratr_pl) { u16 rif_index = mlxsw_sp_ipip_lb_rif_index(ipip_entry->ol_lb); __be32 daddr4 = mlxsw_sp_ipip_netdev_daddr4(ipip_entry->ol_dev); - char ratr_pl[MLXSW_REG_RATR_LEN]; enum mlxsw_reg_ratr_op op; op = force ? MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY : diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h index af7dd19f50e6..f0837b42d1d6 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ipip.h @@ -41,7 +41,7 @@ struct mlxsw_sp_ipip_ops { int (*nexthop_update)(struct mlxsw_sp *mlxsw_sp, u32 adj_index, struct mlxsw_sp_ipip_entry *ipip_entry, - bool force); + bool force, char *ratr_pl); bool (*can_offload)(const struct mlxsw_sp *mlxsw_sp, const struct net_device *ol_dev); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 63c2c7a84c64..02200b183bf7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3420,10 +3420,9 @@ err_mass_update_vr: static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, struct mlxsw_sp_nexthop *nh, - bool force) + bool force, char *ratr_pl) { struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry; - char ratr_pl[MLXSW_REG_RATR_LEN]; enum mlxsw_reg_ratr_op op; u16 rif_index; @@ -3459,7 +3458,8 @@ static int __mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, } int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh, bool force) + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) { int i; @@ -3467,7 +3467,7 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, int err; err = __mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index + i, - nh, force); + nh, force, ratr_pl); if (err) return err; } @@ -3478,18 +3478,19 @@ int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, struct mlxsw_sp_nexthop *nh, - bool force) + bool force, char *ratr_pl) { const struct mlxsw_sp_ipip_ops *ipip_ops; ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt]; return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry, - force); + force, ratr_pl); } static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh, bool force) + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) { int i; @@ -3497,7 +3498,7 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, int err; err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i, - nh, force); + nh, force, ratr_pl); if (err) return err; } @@ -3506,7 +3507,8 @@ static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp, } static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh, bool force) + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl) { /* When action is discard or trap, the nexthop must be * programmed as an Ethernet nexthop. @@ -3515,10 +3517,10 @@ static int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, nh->action == MLXSW_SP_NEXTHOP_ACTION_DISCARD || nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) return mlxsw_sp_nexthop_eth_update(mlxsw_sp, adj_index, nh, - force); + force, ratr_pl); else return mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index, nh, - force); + force, ratr_pl); } static int @@ -3526,6 +3528,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group_info *nhgi, bool reallocate) { + char ratr_pl[MLXSW_REG_RATR_LEN]; u32 adj_index = nhgi->adj_index; /* base */ struct mlxsw_sp_nexthop *nh; int i; @@ -3542,7 +3545,7 @@ mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp, int err = 0; err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, - true); + true, ratr_pl); if (err) return err; nh->update = 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 3bb2a06359a3..b85c5f6c2262 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -209,7 +209,8 @@ bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh); int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, u64 *p_counter); int mlxsw_sp_nexthop_eth_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index, - struct mlxsw_sp_nexthop *nh, bool force); + struct mlxsw_sp_nexthop *nh, bool force, + char *ratr_pl); void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh); void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp, From 617a77f044ed7a92bb0c01c939d816f870947d5a Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:18 +0200 Subject: [PATCH 04/10] mlxsw: spectrum_router: Add nexthop bucket replacement support Replace a single nexthop bucket upon receiving a 'NEXTHOP_EVENT_BUCKET_REPLACE' notification. When the 'force' parameter is not set, instruct the device to only overwrite an adjacency entry if its activity is cleared, so as not to break existing flows using the adjacency entry. The device does not provide feedback if the replacement was successful in this case, so the contents of the adjacency entry after the replacement are compared with the replacement request. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 134 ++++++++++++++++++ 1 file changed, 134 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 02200b183bf7..d6e91f1f48cc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4337,6 +4337,8 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } } +#define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */ + static int mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp, const struct nh_notifier_single_info *nh, @@ -4806,6 +4808,134 @@ static void mlxsw_sp_nexthop_obj_del(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_obj_group_destroy(mlxsw_sp, nh_grp); } +static int mlxsw_sp_nexthop_obj_bucket_query(struct mlxsw_sp *mlxsw_sp, + u32 adj_index, char *ratr_pl) +{ + MLXSW_REG_ZERO(ratr, ratr_pl); + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_adjacency_index_low_set(ratr_pl, adj_index); + mlxsw_reg_ratr_adjacency_index_high_set(ratr_pl, adj_index >> 16); + + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl); +} + +static int mlxsw_sp_nexthop_obj_bucket_compare(char *ratr_pl, char *ratr_pl_new) +{ + /* Clear the opcode and activity on both the old and new payload as + * they are irrelevant for the comparison. + */ + mlxsw_reg_ratr_op_set(ratr_pl, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl, 0); + mlxsw_reg_ratr_op_set(ratr_pl_new, MLXSW_REG_RATR_OP_QUERY_READ); + mlxsw_reg_ratr_a_set(ratr_pl_new, 0); + + /* If the contents of the adjacency entry are consistent with the + * replacement request, then replacement was successful. + */ + if (!memcmp(ratr_pl, ratr_pl_new, MLXSW_REG_RATR_LEN)) + return 0; + + return -EINVAL; +} + +static int +mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_nexthop *nh, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + bool force = info->nh_res_bucket->force; + char ratr_pl_new[MLXSW_REG_RATR_LEN]; + char ratr_pl[MLXSW_REG_RATR_LEN]; + u32 adj_index; + int err; + + /* No point in trying an atomic replacement if the idle timer interval + * is smaller than the interval in which we query and clear activity. + */ + force = info->nh_res_bucket->idle_timer_ms < + MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL; + + adj_index = nh->nhgi->adj_index + bucket_index; + err = mlxsw_sp_nexthop_update(mlxsw_sp, adj_index, nh, force, ratr_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to overwrite nexthop bucket"); + return err; + } + + if (!force) { + err = mlxsw_sp_nexthop_obj_bucket_query(mlxsw_sp, adj_index, + ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to query nexthop bucket state after replacement. State might be inconsistent"); + return err; + } + + err = mlxsw_sp_nexthop_obj_bucket_compare(ratr_pl, ratr_pl_new); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket was not replaced because it was active during replacement"); + return err; + } + } + + nh->update = 0; + nh->offloaded = 1; + + return 0; +} + +static int mlxsw_sp_nexthop_obj_bucket_replace(struct mlxsw_sp *mlxsw_sp, + struct nh_notifier_info *info) +{ + u16 bucket_index = info->nh_res_bucket->bucket_index; + struct netlink_ext_ack *extack = info->extack; + struct mlxsw_sp_nexthop_group_info *nhgi; + struct nh_notifier_single_info *nh_obj; + struct mlxsw_sp_nexthop_group *nh_grp; + struct mlxsw_sp_nexthop *nh; + int err; + + nh_grp = mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, info->id); + if (!nh_grp) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop group was not found"); + return -EINVAL; + } + + nhgi = nh_grp->nhgi; + + if (bucket_index >= nhgi->count) { + NL_SET_ERR_MSG_MOD(extack, "Nexthop bucket index out of range"); + return -EINVAL; + } + + nh = &nhgi->nexthops[bucket_index]; + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); + + nh_obj = &info->nh_res_bucket->new_nh; + err = mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to initialize nexthop object for nexthop bucket replacement"); + goto err_nexthop_obj_init; + } + + err = mlxsw_sp_nexthop_obj_bucket_adj_update(mlxsw_sp, nh, info); + if (err) + goto err_nexthop_obj_bucket_adj_update; + + return 0; + +err_nexthop_obj_bucket_adj_update: + mlxsw_sp_nexthop_obj_fini(mlxsw_sp, nh); +err_nexthop_obj_init: + nh_obj = &info->nh_res_bucket->old_nh; + mlxsw_sp_nexthop_obj_init(mlxsw_sp, nh_grp, nh, nh_obj, 1); + /* The old adjacency entry was not overwritten */ + nh->update = 0; + nh->offloaded = 1; + return err; +} + static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -4827,6 +4957,10 @@ static int mlxsw_sp_nexthop_obj_event(struct notifier_block *nb, case NEXTHOP_EVENT_DEL: mlxsw_sp_nexthop_obj_del(router->mlxsw_sp, info); break; + case NEXTHOP_EVENT_BUCKET_REPLACE: + err = mlxsw_sp_nexthop_obj_bucket_replace(router->mlxsw_sp, + info); + break; default: break; } From d7761cb30374d5fcd45dd91ec015b9e7f7d5a120 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:19 +0200 Subject: [PATCH 05/10] mlxsw: spectrum_router: Update hardware flags on nexthop buckets So far, mlxsw only updated hardware flags ('offload' / 'trap') on nexthop objects. For resilient nexthop groups, these flags need to be updated on individual nexthop buckets as well. Update these flags whenever updating the flags of the encapsulating nexthop object and whenever a nexthop bucket is replaced. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index d6e91f1f48cc..862c8667813b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -3762,10 +3762,30 @@ mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, __mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry); } +static void +mlxsw_sp_nexthop_bucket_offload_refresh(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop *nh, + u16 bucket_index) +{ + struct mlxsw_sp_nexthop_group *nh_grp = nh->nhgi->nh_grp; + bool offload = false, trap = false; + + if (nh->offloaded) { + if (nh->action == MLXSW_SP_NEXTHOP_ACTION_TRAP) + trap = true; + else + offload = true; + } + nexthop_bucket_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + bucket_index, offload, trap); +} + static void mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { + int i; + /* Do not update the flags if the nexthop group is being destroyed * since: * 1. The nexthop objects is being deleted, in which case the flags are @@ -3779,6 +3799,18 @@ mlxsw_sp_nexthop_obj_group_offload_refresh(struct mlxsw_sp *mlxsw_sp, nexthop_set_hw_flags(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, nh_grp->nhgi->adj_index_valid, false); + + /* Update flags of individual nexthop buckets in case of a resilient + * nexthop group. + */ + if (!nh_grp->nhgi->is_resilient) + return; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + struct mlxsw_sp_nexthop *nh = &nh_grp->nhgi->nexthops[i]; + + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, i); + } } static void @@ -3832,6 +3864,10 @@ mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp, dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n"); goto set_trap; } + /* Flags of individual nexthop buckets might need to be + * updated. + */ + mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp); return 0; } mlxsw_sp_nexthop_group_normalize(nhgi); @@ -4881,6 +4917,7 @@ mlxsw_sp_nexthop_obj_bucket_adj_update(struct mlxsw_sp *mlxsw_sp, nh->update = 0; nh->offloaded = 1; + mlxsw_sp_nexthop_bucket_offload_refresh(mlxsw_sp, nh, bucket_index); return 0; } From 75d495b02982f5fa7eeb3fec9d9616bb7ab958bd Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:20 +0200 Subject: [PATCH 06/10] mlxsw: reg: Add Router Adjacency Table Activity Dump Register The RATRAD register is used to dump and optionally clear activity bits of router adjacency table entries. Will be used by the next patch to query and clear the activity of nexthop buckets in a resilient nexthop group. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 55 +++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index d33c79ad1810..900b4bf5bb5b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -8130,6 +8130,60 @@ mlxsw_reg_rtdp_ipip4_pack(char *payload, u16 irif, mlxsw_reg_rtdp_ipip_expected_gre_key_set(payload, expected_gre_key); } +/* RATRAD - Router Adjacency Table Activity Dump Register + * ------------------------------------------------------ + * The RATRAD register is used to dump and optionally clear activity bits of + * router adjacency table entries. + */ +#define MLXSW_REG_RATRAD_ID 0x8022 +#define MLXSW_REG_RATRAD_LEN 0x210 + +MLXSW_REG_DEFINE(ratrad, MLXSW_REG_RATRAD_ID, MLXSW_REG_RATRAD_LEN); + +enum { + /* Read activity */ + MLXSW_REG_RATRAD_OP_READ_ACTIVITY, + /* Read and clear activity */ + MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY, +}; + +/* reg_ratrad_op + * Access: Operation + */ +MLXSW_ITEM32(reg, ratrad, op, 0x00, 30, 2); + +/* reg_ratrad_ecmp_size + * ecmp_size is the amount of sequential entries from adjacency_index. Valid + * ranges: + * Spectrum-1: 32-64, 512, 1024, 2048, 4096 + * Spectrum-2/3: 32-128, 256, 512, 1024, 2048, 4096 + * Access: Index + */ +MLXSW_ITEM32(reg, ratrad, ecmp_size, 0x00, 0, 13); + +/* reg_ratrad_adjacency_index + * Index into the adjacency table. + * Access: Index + */ +MLXSW_ITEM32(reg, ratrad, adjacency_index, 0x04, 0, 24); + +/* reg_ratrad_activity_vector + * Activity bit per adjacency index. + * Bits higher than ecmp_size are reserved. + * Access: RO + */ +MLXSW_ITEM_BIT_ARRAY(reg, ratrad, activity_vector, 0x10, 0x200, 1); + +static inline void mlxsw_reg_ratrad_pack(char *payload, u32 adjacency_index, + u16 ecmp_size) +{ + MLXSW_REG_ZERO(ratrad, payload); + mlxsw_reg_ratrad_op_set(payload, + MLXSW_REG_RATRAD_OP_READ_CLEAR_ACTIVITY); + mlxsw_reg_ratrad_ecmp_size_set(payload, ecmp_size); + mlxsw_reg_ratrad_adjacency_index_set(payload, adjacency_index); +} + /* RIGR-V2 - Router Interface Group Register Version 2 * --------------------------------------------------- * The RIGR_V2 register is used to add, remove and query egress interface list @@ -12114,6 +12168,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rtar), MLXSW_REG(ratr), MLXSW_REG(rtdp), + MLXSW_REG(ratrad), MLXSW_REG(rdpm), MLXSW_REG(ricnt), MLXSW_REG(rrcr), From debd2b3bf5735ec935f53f01834df6dbec35c8d3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:21 +0200 Subject: [PATCH 07/10] mlxsw: spectrum_router: Periodically update activity of nexthop buckets The kernel periodically checks the idle time of nexthop buckets to determine if they are idle and can be re-populated with a new nexthop. When the resilient nexthop group is offloaded to hardware, the kernel will not see activity on nexthop buckets unless it is reported from hardware. Therefore, periodically (every 1 second) query the hardware for activity of adjacency entries used as part of a resilient nexthop group and report it to the nexthop code. The activity is only queried if resilient nexthop groups are in use. The delayed work is canceled otherwise. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 100 ++++++++++++++++++ .../ethernet/mellanox/mlxsw/spectrum_router.h | 2 + 2 files changed, 102 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 862c8667813b..1d74341596da 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2913,6 +2913,7 @@ struct mlxsw_sp_nexthop_group_info { u8 adj_index_valid:1, gateway:1, /* routes using the group use a gateway */ is_resilient:1; + struct list_head list; /* member in nh_res_grp_list */ struct mlxsw_sp_nexthop nexthops[0]; #define nh_rif nexthops[0].rif }; @@ -4373,8 +4374,85 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, } } +static void +mlxsw_sp_nh_grp_activity_get(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp, + unsigned long *activity) +{ + char *ratrad_pl; + int i, err; + + ratrad_pl = kmalloc(MLXSW_REG_RATRAD_LEN, GFP_KERNEL); + if (!ratrad_pl) + return; + + mlxsw_reg_ratrad_pack(ratrad_pl, nh_grp->nhgi->adj_index, + nh_grp->nhgi->count); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ratrad), ratrad_pl); + if (err) + goto out; + + for (i = 0; i < nh_grp->nhgi->count; i++) { + if (!mlxsw_reg_ratrad_activity_vector_get(ratrad_pl, i)) + continue; + bitmap_set(activity, i, 1); + } + +out: + kfree(ratrad_pl); +} + #define MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL 1000 /* ms */ +static void +mlxsw_sp_nh_grp_activity_update(struct mlxsw_sp *mlxsw_sp, + const struct mlxsw_sp_nexthop_group *nh_grp) +{ + unsigned long *activity; + + activity = bitmap_zalloc(nh_grp->nhgi->count, GFP_KERNEL); + if (!activity) + return; + + mlxsw_sp_nh_grp_activity_get(mlxsw_sp, nh_grp, activity); + nexthop_res_grp_activity_update(mlxsw_sp_net(mlxsw_sp), nh_grp->obj.id, + nh_grp->nhgi->count, activity); + + bitmap_free(activity); +} + +static void +mlxsw_sp_nh_grp_activity_work_schedule(struct mlxsw_sp *mlxsw_sp) +{ + unsigned int interval = MLXSW_SP_NH_GRP_ACTIVITY_UPDATE_INTERVAL; + + mlxsw_core_schedule_dw(&mlxsw_sp->router->nh_grp_activity_dw, + msecs_to_jiffies(interval)); +} + +static void mlxsw_sp_nh_grp_activity_work(struct work_struct *work) +{ + struct mlxsw_sp_nexthop_group_info *nhgi; + struct mlxsw_sp_router *router; + bool reschedule = false; + + router = container_of(work, struct mlxsw_sp_router, + nh_grp_activity_dw.work); + + mutex_lock(&router->lock); + + list_for_each_entry(nhgi, &router->nh_res_grp_list, list) { + mlxsw_sp_nh_grp_activity_update(router->mlxsw_sp, nhgi->nh_grp); + reschedule = true; + } + + mutex_unlock(&router->lock); + + if (!reschedule) + return; + mlxsw_sp_nh_grp_activity_work_schedule(router->mlxsw_sp); +} + static int mlxsw_sp_nexthop_obj_single_validate(struct mlxsw_sp *mlxsw_sp, const struct nh_notifier_single_info *nh, @@ -4632,6 +4710,15 @@ mlxsw_sp_nexthop_obj_group_info_init(struct mlxsw_sp *mlxsw_sp, goto err_group_refresh; } + /* Add resilient nexthop groups to a list so that the activity of their + * nexthop buckets will be periodically queried and cleared. + */ + if (nhgi->is_resilient) { + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + mlxsw_sp_nh_grp_activity_work_schedule(mlxsw_sp); + list_add(&nhgi->list, &mlxsw_sp->router->nh_res_grp_list); + } + return 0; err_group_refresh: @@ -4650,8 +4737,15 @@ mlxsw_sp_nexthop_obj_group_info_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop_group *nh_grp) { struct mlxsw_sp_nexthop_group_info *nhgi = nh_grp->nhgi; + struct mlxsw_sp_router *router = mlxsw_sp->router; int i; + if (nhgi->is_resilient) { + list_del(&nhgi->list); + if (list_empty(&mlxsw_sp->router->nh_res_grp_list)) + cancel_delayed_work(&router->nh_grp_activity_dw); + } + for (i = nhgi->count - 1; i >= 0; i--) { struct mlxsw_sp_nexthop *nh = &nhgi->nexthops[i]; @@ -9652,6 +9746,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp, if (err) goto err_ll_op_ctx_init; + INIT_LIST_HEAD(&mlxsw_sp->router->nh_res_grp_list); + INIT_DELAYED_WORK(&mlxsw_sp->router->nh_grp_activity_dw, + mlxsw_sp_nh_grp_activity_work); + INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list); err = __mlxsw_sp_router_init(mlxsw_sp); if (err) @@ -9775,6 +9873,7 @@ err_ipips_init: err_rifs_init: __mlxsw_sp_router_fini(mlxsw_sp); err_router_init: + cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); mlxsw_sp_router_ll_op_ctx_fini(router); err_ll_op_ctx_init: mlxsw_sp_router_xm_fini(mlxsw_sp); @@ -9806,6 +9905,7 @@ void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) mlxsw_sp_ipips_fini(mlxsw_sp); mlxsw_sp_rifs_fini(mlxsw_sp); __mlxsw_sp_router_fini(mlxsw_sp); + cancel_delayed_work_sync(&mlxsw_sp->router->nh_grp_activity_dw); mlxsw_sp_router_ll_op_ctx_fini(mlxsw_sp->router); mlxsw_sp_router_xm_fini(mlxsw_sp); mutex_destroy(&mlxsw_sp->router->lock); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index b85c5f6c2262..be7708a375e1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -80,6 +80,8 @@ struct mlxsw_sp_router { struct mlxsw_sp_router_xm *xm; const struct mlxsw_sp_adj_grp_size_range *adj_grp_size_ranges; size_t adj_grp_size_ranges_count; + struct delayed_work nh_grp_activity_dw; + struct list_head nh_res_grp_list; }; struct mlxsw_sp_fib_entry_priv { From 03490a8239156933366f5595250fe3dc5d0e18aa Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:22 +0200 Subject: [PATCH 08/10] mlxsw: spectrum_router: Enable resilient nexthop groups to be programmed Now that mlxsw supports resilient nexthop groups, allow them to be programmed after validating that their configuration conforms to the device's limitations (e.g., number of buckets is within predefined range). Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlxsw/spectrum_router.c | 86 ++++++++++++++++++- 1 file changed, 85 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 1d74341596da..6ccaa194733b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -4519,11 +4519,86 @@ mlxsw_sp_nexthop_obj_group_validate(struct mlxsw_sp *mlxsw_sp, return 0; } +static int +mlxsw_sp_nexthop_obj_res_group_size_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + unsigned int alloc_size; + bool valid_size = false; + int err, i; + + if (nh_res_table->num_nh_buckets < 32) { + NL_SET_ERR_MSG_MOD(extack, "Minimum number of buckets is 32"); + return -EINVAL; + } + + for (i = 0; i < mlxsw_sp->router->adj_grp_size_ranges_count; i++) { + const struct mlxsw_sp_adj_grp_size_range *size_range; + + size_range = &mlxsw_sp->router->adj_grp_size_ranges[i]; + + if (nh_res_table->num_nh_buckets >= size_range->start && + nh_res_table->num_nh_buckets <= size_range->end) { + valid_size = true; + break; + } + } + + if (!valid_size) { + NL_SET_ERR_MSG_MOD(extack, "Invalid number of buckets"); + return -EINVAL; + } + + err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp, + MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, + nh_res_table->num_nh_buckets, + &alloc_size); + if (err || nh_res_table->num_nh_buckets != alloc_size) { + NL_SET_ERR_MSG_MOD(extack, "Number of buckets does not fit allocation size of any KVDL partition"); + return -EINVAL; + } + + return 0; +} + +static int +mlxsw_sp_nexthop_obj_res_group_validate(struct mlxsw_sp *mlxsw_sp, + const struct nh_notifier_res_table_info *nh_res_table, + struct netlink_ext_ack *extack) +{ + int err; + u16 i; + + err = mlxsw_sp_nexthop_obj_res_group_size_validate(mlxsw_sp, + nh_res_table, + extack); + if (err) + return err; + + for (i = 0; i < nh_res_table->num_nh_buckets; i++) { + const struct nh_notifier_single_info *nh; + int err; + + nh = &nh_res_table->nhs[i]; + err = mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + extack); + if (err) + return err; + } + + return 0; +} + static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, unsigned long event, struct nh_notifier_info *info) { - if (event != NEXTHOP_EVENT_REPLACE) + struct nh_notifier_single_info *nh; + + if (event != NEXTHOP_EVENT_REPLACE && + event != NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE && + event != NEXTHOP_EVENT_BUCKET_REPLACE) return 0; switch (info->type) { @@ -4534,6 +4609,14 @@ static int mlxsw_sp_nexthop_obj_validate(struct mlxsw_sp *mlxsw_sp, return mlxsw_sp_nexthop_obj_group_validate(mlxsw_sp, info->nh_grp, info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: + return mlxsw_sp_nexthop_obj_res_group_validate(mlxsw_sp, + info->nh_res_table, + info->extack); + case NH_NOTIFIER_INFO_TYPE_RES_BUCKET: + nh = &info->nh_res_bucket->new_nh; + return mlxsw_sp_nexthop_obj_group_entry_validate(mlxsw_sp, nh, + info->extack); default: NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); return -EOPNOTSUPP; @@ -4551,6 +4634,7 @@ static bool mlxsw_sp_nexthop_obj_is_gateway(struct mlxsw_sp *mlxsw_sp, return info->nh->gw_family || info->nh->is_reject || mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL); case NH_NOTIFIER_INFO_TYPE_GRP: + case NH_NOTIFIER_INFO_TYPE_RES_TABLE: /* Already validated earlier. */ return true; default: From 861584724c44e63bfb684090c70ade660dae6c69 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:23 +0200 Subject: [PATCH 09/10] selftests: mlxsw: Test unresolved neigh trap with resilient nexthop groups The number of nexthop buckets in a resilient nexthop group never changes, so when the gateway address of a nexthop cannot be resolved, the nexthop buckets are programmed to trap packets to the CPU in order to trigger resolution. For example: # ip nexthop add id 1 via 198.51.100.1 dev swp3 # ip nexthop add id 10 group 1 type resilient buckets 32 # ip nexthop bucket get id 10 index 0 id 10 index 0 idle_time 1.44 nhid 1 trap Where 198.51.100.1 is a made-up IP. Test that in this case packets are indeed trapped to the CPU via the unresolved neigh trap. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../net/mlxsw/devlink_trap_l3_exceptions.sh | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 1fedfc9da434..42d44e27802c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -446,6 +446,35 @@ __invalid_nexthop_test() log_test "Unresolved neigh: nexthop does not exist: $desc" } +__invalid_nexthop_bucket_test() +{ + local desc=$1; shift + local dip=$1; shift + local via_add=$1; shift + local trap_name="unresolved_neigh" + + RET=0 + + # Check that route to nexthop that does not exist triggers + # unresolved_neigh + ip nexthop add id 1 via $via_add dev $rp2 + ip nexthop add id 10 group 1 type resilient buckets 32 + ip route add $dip nhid 10 + + t0_packets=$(devlink_trap_rx_packets_get $trap_name) + ping_do $h1 $dip + t1_packets=$(devlink_trap_rx_packets_get $trap_name) + + if [[ $t0_packets -eq $t1_packets ]]; then + check_err 1 "Trap counter did not increase" + fi + + ip route del $dip nhid 10 + ip nexthop del id 10 + ip nexthop del id 1 + log_test "Unresolved neigh: nexthop bucket does not exist: $desc" +} + unresolved_neigh_test() { __host_miss_test "IPv4" 198.51.100.1 @@ -453,6 +482,8 @@ unresolved_neigh_test() __invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4 __invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \ 2001:db8:2::4 + __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4 + __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4 } vrf_without_routes_create() From ffd3e9b07b9ee3242fa5f5bc76385b6a0e69437d Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 24 Mar 2021 22:14:24 +0200 Subject: [PATCH 10/10] selftests: mlxsw: Add resilient nexthop groups configuration tests Test that unsupported resilient nexthop group configurations are rejected and that offload / trap indication is correctly set on nexthop buckets in a resilient group. Signed-off-by: Ido Schimmel Reviewed-by: Petr Machata Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/rtnetlink.sh | 82 +++++++++++++++++++ tools/testing/selftests/net/forwarding/lib.sh | 5 ++ 2 files changed, 87 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index ed346da5d3cb..a217f9f6775b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -33,6 +33,7 @@ ALL_TESTS=" nexthop_obj_invalid_test nexthop_obj_offload_test nexthop_obj_group_offload_test + nexthop_obj_bucket_offload_test nexthop_obj_blackhole_offload_test nexthop_obj_route_offload_test devlink_reload_test @@ -739,11 +740,28 @@ nexthop_obj_invalid_test() ip nexthop add id 1 dev $swp1 ip nexthop add id 2 dev $swp1 + ip nexthop add id 3 via 192.0.2.3 dev $swp1 ip nexthop add id 10 group 1/2 check_fail $? "managed to configure a nexthop group with device-only nexthops when should not" + ip nexthop add id 10 group 3 type resilient buckets 7 + check_fail $? "managed to configure a too small resilient nexthop group when should not" + + ip nexthop add id 10 group 3 type resilient buckets 129 + check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not" + + ip nexthop add id 10 group 1/2 type resilient buckets 32 + check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not" + + ip nexthop add id 10 group 3 type resilient buckets 32 + check_err $? "failed to configure a valid resilient nexthop group" + ip nexthop replace id 3 dev $swp1 + check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not" + log_test "nexthop objects - invalid configurations" + ip nexthop del id 10 + ip nexthop del id 3 ip nexthop del id 2 ip nexthop del id 1 @@ -858,6 +876,70 @@ nexthop_obj_group_offload_test() simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 } +nexthop_obj_bucket_offload_test() +{ + # Test offload indication of nexthop buckets + RET=0 + + simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64 + simple_if_init $swp2 + setup_wait + + ip nexthop add id 1 via 192.0.2.2 dev $swp1 + ip nexthop add id 2 via 2001:db8:1::2 dev $swp1 + ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0 + ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked as offloaded when should" + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked as offloaded when should" + + # Invalidate nexthop id 1 + ip neigh replace 192.0.2.2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 1 + check_err $? "IPv4 nexthop buckets not marked with trap when should" + + # Invalidate nexthop id 2 + ip neigh replace 2001:db8:1::2 nud failed dev $swp1 + busywait "$TIMEOUT" wait_for_trap \ + ip nexthop bucket show nhid 2 + check_err $? "IPv6 nexthop buckets not marked with trap when should" + + # Revalidate nexthop id 1 by changing its configuration + ip nexthop replace id 1 via 192.0.2.3 dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 1 + check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop" + + # Revalidate nexthop id 2 by changing its neighbour + ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \ + dev $swp1 + busywait "$TIMEOUT" wait_for_offload \ + ip nexthop bucket show nhid 2 + check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour" + + log_test "nexthop bucket offload indication" + + ip neigh del 2001:db8:1::2 dev $swp1 + ip neigh del 192.0.2.3 dev $swp1 + ip neigh del 192.0.2.2 dev $swp1 + ip nexthop del id 10 + ip nexthop del id 2 + ip nexthop del id 1 + + simple_if_fini $swp2 + simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64 +} + nexthop_obj_blackhole_offload_test() { # Test offload indication of blackhole nexthop objects diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index be71012b8fc5..05c05e02bade 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -353,6 +353,11 @@ wait_for_offload() "$@" | grep -q offload } +wait_for_trap() +{ + "$@" | grep -q trap +} + until_counter_is() { local expr=$1; shift