From 509541b440e64e155201dd304411e2daa152fe8f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:22 +0300 Subject: [PATCH 01/10] net/mlx5e: Factor out IPsec ASO update function The ASO update is common operation which is going to be used in next patch, so as a preparation, let's refactor the code for future reuse. As part of this refactoring, not used function argument was removed too. Link: https://lore.kernel.org/r/d04770b959822fed51c22c13e798f04d760a682e.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../mlx5/core/en_accel/ipsec_offload.c | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 5342b0b07681..43cfa4df1311 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -275,26 +275,21 @@ void mlx5_accel_esp_modify_xfrm(struct mlx5e_ipsec_sa_entry *sa_entry, memcpy(&sa_entry->attrs, attrs, sizeof(sa_entry->attrs)); } -static void -mlx5e_ipsec_aso_update_esn(struct mlx5e_ipsec_sa_entry *sa_entry, - const struct mlx5_accel_esp_xfrm_attrs *attrs) +static void mlx5e_ipsec_aso_update(struct mlx5e_ipsec_sa_entry *sa_entry, + struct mlx5_wqe_aso_ctrl_seg *data) { - struct mlx5_wqe_aso_ctrl_seg data = {}; + data->data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6; + data->condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | + MLX5_ASO_ALWAYS_TRUE << 4; - data.data_mask_mode = MLX5_ASO_DATA_MASK_MODE_BITWISE_64BIT << 6; - data.condition_1_0_operand = MLX5_ASO_ALWAYS_TRUE | MLX5_ASO_ALWAYS_TRUE - << 4; - data.data_offset_condition_operand = MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; - data.bitwise_data = cpu_to_be64(BIT_ULL(54)); - data.data_mask = data.bitwise_data; - - mlx5e_ipsec_aso_query(sa_entry, &data); + mlx5e_ipsec_aso_query(sa_entry, data); } static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, u32 mode_param) { struct mlx5_accel_esp_xfrm_attrs attrs = {}; + struct mlx5_wqe_aso_ctrl_seg data = {}; if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) { sa_entry->esn_state.esn++; @@ -305,7 +300,13 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &attrs); mlx5_accel_esp_modify_xfrm(sa_entry, &attrs); - mlx5e_ipsec_aso_update_esn(sa_entry, &attrs); + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + data.bitwise_data = cpu_to_be64(BIT_ULL(54)); + data.data_mask = data.bitwise_data; + + mlx5e_ipsec_aso_update(sa_entry, &data); } static void mlx5e_ipsec_handle_event(struct work_struct *_work) From 2da961d21663a72610f36a4f69b1090f2c61f23f Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:23 +0300 Subject: [PATCH 02/10] net/mlx5e: Prevent zero IPsec soft/hard limits Hardware triggers limit events when the packets arrive and are processed through the device. In case zero was configured as a limit, the HW won't be able to arm event as it happens at the end of execution pipeline. Let's prevent such configuration. Link: https://lore.kernel.org/r/80d0ba33e21fb28b1b91d306d1da39df3d990b68.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 91fa0a366316..c2e4f30d1f76 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -283,6 +283,11 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one"); return -EINVAL; } + + if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) { + NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0"); + return -EINVAL; + } break; default: NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type"); From d05971a413d3c3dd207c604a6dbfc702070da63e Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:24 +0300 Subject: [PATCH 03/10] net/mlx5e: Add SW implementation to support IPsec 64 bit soft and hard limits The CX7 cards which support IPsec packet offload use 32 bits to configure soft and hard packet limits. This is not enough as the software part using 64 bits. The needed functionality of supporting 64 bits is implemented through mlx5 abstraction layer, which will ensure that HW is reconfigured on-demand every 2^31 packets. To simulate the 64 bit IPsec soft/hard limits, we divide the soft/hard limits to multiple interrupts (rounds). Each round counts 2^31 packets. Once the counter is less than or equal to 2^31, the soft event is raised and software sets the bit 31 of the counter and decrement the round counter. Link: https://lore.kernel.org/r/5a86c890b6dccb6865acf9042a8b03f899d1f3f9.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.c | 114 +++++++++++++++-- .../mellanox/mlx5/core/en_accel/ipsec.h | 17 ++- .../mlx5/core/en_accel/ipsec_offload.c | 119 ++++++++++++++++-- 3 files changed, 227 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index c2e4f30d1f76..3612cdd37b5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -87,25 +87,113 @@ static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_accel_esp_xfrm_attrs *attrs) { struct xfrm_state *x = sa_entry->x; + s64 start_value, n; - attrs->hard_packet_limit = x->lft.hard_packet_limit; + attrs->lft.hard_packet_limit = x->lft.hard_packet_limit; + attrs->lft.soft_packet_limit = x->lft.soft_packet_limit; if (x->lft.soft_packet_limit == XFRM_INF) return; - /* Hardware decrements hard_packet_limit counter through - * the operation. While fires an event when soft_packet_limit - * is reached. It emans that we need substitute the numbers - * in order to properly count soft limit. + /* Compute hard limit initial value and number of rounds. * - * As an example: - * XFRM user sets soft limit is 2 and hard limit is 9 and - * expects to see soft event after 2 packets and hard event - * after 9 packets. In our case, the hard limit will be set - * to 9 and soft limit is comparator to 7 so user gets the - * soft event after 2 packeta + * The counting pattern of hardware counter goes: + * value -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * 2^31 | (2^31-1) -> 2^31-1 + * [..] + * 2^31 | (2^31-1) -> 0 + * + * The pattern is created by using an ASO operation to atomically set + * bit 31 after the down counter clears bit 31. This is effectively an + * atomic addition of 2**31 to the counter. + * + * We wish to configure the counter, within the above pattern, so that + * when it reaches 0, it has hit the hard limit. This is defined by this + * system of equations: + * + * hard_limit == start_value + n * 2^31 + * n >= 0 + * start_value < 2^32, start_value >= 0 + * + * These equations are not single-solution, there are often two choices: + * hard_limit == start_value + n * 2^31 + * hard_limit == (start_value+2^31) + (n-1) * 2^31 + * + * The algorithm selects the solution that keeps the counter value + * above 2^31 until the final iteration. */ - attrs->soft_packet_limit = - x->lft.hard_packet_limit - x->lft.soft_packet_limit; + + /* Start by estimating n and compute start_value */ + n = attrs->lft.hard_packet_limit / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* Choose the best of the two solutions: */ + if (n >= 1) + n -= 1; + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31); + + /* The best solution means: when there are multiple iterations we must + * start above 2^31 and count down to 2**31 to get the interrupt. + */ + attrs->lft.hard_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_hard = (u64)n; + + /* Compute soft limit initial value and number of rounds. + * + * The soft_limit is achieved by adjusting the counter's + * interrupt_value. This is embedded in the counting pattern created by + * hard packet calculations above. + * + * We wish to compute the interrupt_value for the soft_limit. This is + * defined by this system of equations: + * + * soft_limit == start_value - soft_value + n * 2^31 + * n >= 0 + * soft_value < 2^32, soft_value >= 0 + * for n == 0 start_value > soft_value + * + * As with compute_hard_n_value() the equations are not single-solution. + * The algorithm selects the solution that has: + * 2^30 <= soft_limit < 2^31 + 2^30 + * for the interior iterations, which guarantees a large guard band + * around the counter hard limit and next interrupt. + */ + + /* Start by estimating n and compute soft_value */ + n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31); + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - + x->lft.soft_packet_limit; + + /* Compare against constraints and adjust n */ + if (n < 0) + n = 0; + else if (start_value >= BIT_ULL(32)) + n -= 1; + else if (start_value < 0) + n += 1; + + /* Choose the best of the two solutions: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30)) + n += 1; + + /* Note that the upper limit of soft_value happens naturally because we + * always select the lowest soft_value. + */ + + /* Computed values solve the system of equations: */ + start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value; + + /* The best solution means: when there are multiple iterations we must + * not fall below 2^30 as that would get too close to the false + * hard_limit and when we reach an interior iteration for soft_limit it + * has to be far away from 2**32-1 which is the counter reset point + * after the +2^31 to accommodate latency. + */ + attrs->lft.soft_packet_limit = lower_32_bits(start_value); + attrs->lft.numb_rounds_soft = (u64)n; } void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 68ae5230eb75..0c58c3583b0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -60,6 +60,13 @@ struct upspec { u8 proto; }; +struct mlx5_ipsec_lft { + u64 hard_packet_limit; + u64 soft_packet_limit; + u64 numb_rounds_hard; + u64 numb_rounds_soft; +}; + struct mlx5_accel_esp_xfrm_attrs { u32 esn; u32 spi; @@ -85,8 +92,7 @@ struct mlx5_accel_esp_xfrm_attrs { u32 replay_window; u32 authsize; u32 reqid; - u64 hard_packet_limit; - u64 soft_packet_limit; + struct mlx5_ipsec_lft lft; }; enum mlx5_ipsec_cap { @@ -170,6 +176,12 @@ struct mlx5e_ipsec_modify_state_work { struct mlx5_accel_esp_xfrm_attrs attrs; }; +struct mlx5e_ipsec_limits { + u64 round; + u8 soft_limit_hit : 1; + u8 fix_limit : 1; +}; + struct mlx5e_ipsec_sa_entry { struct mlx5e_ipsec_esn_state esn_state; struct xfrm_state *x; @@ -181,6 +193,7 @@ struct mlx5e_ipsec_sa_entry { u32 enc_key_id; struct mlx5e_ipsec_rule ipsec_rule; struct mlx5e_ipsec_modify_state_work modify_work; + struct mlx5e_ipsec_limits limits; }; struct mlx5_accel_pol_xfrm_attrs { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 43cfa4df1311..684de9739e69 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -8,6 +8,7 @@ enum { MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET, + MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET, }; u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) @@ -100,15 +101,15 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, if (attrs->dir == XFRM_DEV_OFFLOAD_OUT) MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_INC_SN); - if (attrs->hard_packet_limit != XFRM_INF) { + if (attrs->lft.hard_packet_limit != XFRM_INF) { MLX5_SET(ipsec_aso, aso_ctx, remove_flow_pkt_cnt, - lower_32_bits(attrs->hard_packet_limit)); + attrs->lft.hard_packet_limit); MLX5_SET(ipsec_aso, aso_ctx, hard_lft_arm, 1); } - if (attrs->soft_packet_limit != XFRM_INF) { + if (attrs->lft.soft_packet_limit != XFRM_INF) { MLX5_SET(ipsec_aso, aso_ctx, remove_flow_soft_lft, - lower_32_bits(attrs->soft_packet_limit)); + attrs->lft.soft_packet_limit); MLX5_SET(ipsec_aso, aso_ctx, soft_lft_arm, 1); } @@ -309,6 +310,110 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_aso_update(sa_entry, &data); } +static void mlx5e_ipsec_aso_update_hard(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_wqe_aso_ctrl_seg data = {}; + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_PKT_CNT_OFFSET; + data.bitwise_data = cpu_to_be64(BIT_ULL(57) + BIT_ULL(31)); + data.data_mask = data.bitwise_data; + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_aso_update_soft(struct mlx5e_ipsec_sa_entry *sa_entry, + u32 val) +{ + struct mlx5_wqe_aso_ctrl_seg data = {}; + + data.data_offset_condition_operand = + MLX5_IPSEC_ASO_REMOVE_FLOW_SOFT_LFT_OFFSET; + data.bitwise_data = cpu_to_be64(val); + data.data_mask = cpu_to_be64(U32_MAX); + mlx5e_ipsec_aso_update(sa_entry, &data); +} + +static void mlx5e_ipsec_handle_limits(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs; + struct mlx5e_ipsec *ipsec = sa_entry->ipsec; + struct mlx5e_ipsec_aso *aso = ipsec->aso; + bool soft_arm, hard_arm; + u64 hard_cnt; + + lockdep_assert_held(&sa_entry->x->lock); + + soft_arm = !MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm); + hard_arm = !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm); + if (!soft_arm && !hard_arm) + /* It is not lifetime event */ + return; + + hard_cnt = MLX5_GET(ipsec_aso, aso->ctx, remove_flow_pkt_cnt); + if (!hard_cnt || hard_arm) { + /* It is possible to see packet counter equal to zero without + * hard limit event armed. Such situation can be if packet + * decreased, while we handled soft limit event. + * + * However it will be HW/FW bug if hard limit event is raised + * and packet counter is not zero. + */ + WARN_ON_ONCE(hard_arm && hard_cnt); + + /* Notify about hard limit */ + xfrm_state_check_expire(sa_entry->x); + return; + } + + /* We are in soft limit event. */ + if (!sa_entry->limits.soft_limit_hit && + sa_entry->limits.round == attrs->lft.numb_rounds_soft) { + sa_entry->limits.soft_limit_hit = true; + /* Notify about soft limit */ + xfrm_state_check_expire(sa_entry->x); + + if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) + goto hard; + + if (attrs->lft.soft_packet_limit > BIT_ULL(31)) { + /* We cannot avoid a soft_value that might have the high + * bit set. For instance soft_value=2^31+1 cannot be + * adjusted to the low bit clear version of soft_value=1 + * because it is too close to 0. + * + * Thus we have this corner case where we can hit the + * soft_limit with the high bit set, but cannot adjust + * the counter. Thus we set a temporary interrupt_value + * at least 2^30 away from here and do the adjustment + * then. + */ + mlx5e_ipsec_aso_update_soft(sa_entry, + BIT_ULL(31) - BIT_ULL(30)); + sa_entry->limits.fix_limit = true; + return; + } + + sa_entry->limits.fix_limit = true; + } + +hard: + if (sa_entry->limits.round == attrs->lft.numb_rounds_hard) { + mlx5e_ipsec_aso_update_soft(sa_entry, 0); + attrs->lft.soft_packet_limit = XFRM_INF; + return; + } + + mlx5e_ipsec_aso_update_hard(sa_entry); + sa_entry->limits.round++; + if (sa_entry->limits.round == attrs->lft.numb_rounds_soft) + mlx5e_ipsec_aso_update_soft(sa_entry, + attrs->lft.soft_packet_limit); + if (sa_entry->limits.fix_limit) { + sa_entry->limits.fix_limit = false; + mlx5e_ipsec_aso_update_soft(sa_entry, BIT_ULL(31) - 1); + } +} + static void mlx5e_ipsec_handle_event(struct work_struct *_work) { struct mlx5e_ipsec_work *work = @@ -339,10 +444,8 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) mlx5e_ipsec_update_esn_state(sa_entry, mode_param); } - if (attrs->soft_packet_limit != XFRM_INF) - if (!MLX5_GET(ipsec_aso, aso->ctx, soft_lft_arm) || - !MLX5_GET(ipsec_aso, aso->ctx, hard_lft_arm)) - xfrm_state_check_expire(sa_entry->x); + if (attrs->lft.soft_packet_limit != XFRM_INF) + mlx5e_ipsec_handle_limits(sa_entry); unlock: spin_unlock(&sa_entry->x->lock); From 76e463f6508b5909887df3c360ca55f1caa3b94a Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:25 +0300 Subject: [PATCH 04/10] net/mlx5e: Overcome slow response for first IPsec ASO WQE First ASO WQE causes to cache miss in hardware, which can't return result immediately. It causes to the situation where such WQE is polled earlier than it is needed. Add logic to retry ASO CQ polling operation. Link: https://lore.kernel.org/r/eb92a758c533ff3f058e0dcb4f8d2324355304ad.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 684de9739e69..6971e5e36820 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -568,6 +568,7 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_wqe_aso_ctrl_seg *ctrl; struct mlx5e_hw_objs *res; struct mlx5_aso_wqe *wqe; + unsigned long expires; u8 ds_cnt; int ret; @@ -589,7 +590,12 @@ int mlx5e_ipsec_aso_query(struct mlx5e_ipsec_sa_entry *sa_entry, mlx5e_ipsec_aso_copy(ctrl, data); mlx5_aso_post_wqe(aso->aso, false, &wqe->ctrl); - ret = mlx5_aso_poll_cq(aso->aso, false); + expires = jiffies + msecs_to_jiffies(10); + do { + ret = mlx5_aso_poll_cq(aso->aso, false); + if (ret) + usleep_range(2, 10); + } while (ret && time_is_after_jiffies(expires)); spin_unlock_bh(&aso->lock); return ret; } From 3e1c957f9a3b248f47f8b39b607002d948fd17d4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:26 +0300 Subject: [PATCH 05/10] xfrm: don't require advance ESN callback for packet offload In packet offload mode, the hardware is responsible to manage replay window and advance ESN. In that mode, there won't any call to .xdo_dev_state_advance_esn callback. So relax current check for existence of that callback. Link: https://lore.kernel.org/r/9f3dfc3fef2cfcd191f0c5eee7cf0aa74e7f7786.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Acked-by: Steffen Klassert Signed-off-by: Leon Romanovsky --- net/xfrm/xfrm_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 95f1436bf6a2..bef28c6187eb 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -287,7 +287,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, return (is_packet_offload) ? -EINVAL : 0; } - if (x->props.flags & XFRM_STATE_ESN && + if (!is_packet_offload && x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; From f4979e2667c56f031a3c2e84364dbcde01c2cdf3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:27 +0300 Subject: [PATCH 06/10] net/mlx5e: Remove ESN callbacks if it is not supported There is no need in implementation of .xdo_dev_state_advance_esn() and setting work as it will never be called in packet offload mode. Link: https://lore.kernel.org/r/2fc9fade32e31f03b100d6086a82ad36269349dc.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.c | 51 ++++++++++++++----- .../mellanox/mlx5/core/en_accel/ipsec.h | 1 - .../mlx5/core/en_accel/ipsec_offload.c | 3 -- 3 files changed, 38 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 3612cdd37b5a..067704307851 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -56,11 +56,6 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) u32 seq_bottom = 0; u8 overlap; - if (!(sa_entry->x->props.flags & XFRM_STATE_ESN)) { - sa_entry->esn_state.trigger = 0; - return false; - } - replay_esn = sa_entry->x->replay_esn; if (replay_esn->seq >= replay_esn->replay_window) seq_bottom = replay_esn->seq - replay_esn->replay_window + 1; @@ -70,7 +65,6 @@ static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, htonl(seq_bottom)); - sa_entry->esn_state.trigger = 1; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { sa_entry->esn_state.overlap = 0; return true; @@ -229,7 +223,7 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, aes_gcm->icv_len = x->aead->alg_icv_len; /* esn */ - if (sa_entry->esn_state.trigger) { + if (x->props.flags & XFRM_STATE_ESN) { attrs->esn_trigger = true; attrs->esn = sa_entry->esn_state.esn; attrs->esn_overlap = sa_entry->esn_state.overlap; @@ -394,6 +388,22 @@ static void _update_xfrm_state(struct work_struct *work) mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs); } +static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + + if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO || + x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return; + + if (x->props.flags & XFRM_STATE_ESN) { + sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn; + return; + } + + sa_entry->set_iv_op = mlx5e_ipsec_set_iv; +} + static int mlx5e_xfrm_add_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { @@ -425,7 +435,8 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, goto err_xfrm; /* check esn */ - mlx5e_ipsec_update_esn_state(sa_entry); + if (x->props.flags & XFRM_STATE_ESN) + mlx5e_ipsec_update_esn_state(sa_entry); mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); /* create hw context */ @@ -446,11 +457,17 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, if (err) goto err_add_rule; - if (x->xso.dir == XFRM_DEV_OFFLOAD_OUT) - sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ? - mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv; + mlx5e_ipsec_set_esn_ops(sa_entry); - INIT_WORK(&sa_entry->modify_work.work, _update_xfrm_state); + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + if (x->props.flags & XFRM_STATE_ESN) + INIT_WORK(&sa_entry->modify_work.work, + _update_xfrm_state); + break; + default: + break; + } out: x->xso.offload_handle = (unsigned long)sa_entry; return 0; @@ -485,7 +502,15 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x) if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) goto sa_entry_free; - cancel_work_sync(&sa_entry->modify_work.work); + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + if (x->props.flags & XFRM_STATE_ESN) + cancel_work_sync(&sa_entry->modify_work.work); + break; + default: + break; + } + mlx5e_accel_ipsec_fs_del_rule(sa_entry); mlx5_ipsec_free_sa_ctx(sa_entry); sa_entry_free: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 0c58c3583b0f..e4a606364a45 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -160,7 +160,6 @@ struct mlx5e_ipsec { struct mlx5e_ipsec_esn_state { u32 esn; - u8 trigger: 1; u8 overlap: 1; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 6971e5e36820..a2e9af5e51e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -225,9 +225,6 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry, void *obj; int err; - if (!attrs->esn_trigger) - return 0; - general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC)) return -EINVAL; From 7db21ef4566ef7a24e46808e8668a603531163c3 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:28 +0300 Subject: [PATCH 07/10] net/mlx5e: Set IPsec replay sequence numbers "ip xfrm state ..." command allows users to configure replay sequence numbers with replay-seq* arguments for RX and replay-oseq* for TX. Add the needed driver logic to support setting them. Link: https://lore.kernel.org/r/a9b17827eff2b29a4951225efa684a6cd38f74fe.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.c | 48 +++++++++++++++---- .../mellanox/mlx5/core/en_accel/ipsec.h | 14 ++++-- .../mlx5/core/en_accel/ipsec_offload.c | 22 +++++---- 3 files changed, 60 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index 067704307851..b8058f89365e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -52,18 +52,46 @@ static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x) static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) { - struct xfrm_replay_state_esn *replay_esn; + struct xfrm_state *x = sa_entry->x; u32 seq_bottom = 0; + u32 esn, esn_msb; u8 overlap; - replay_esn = sa_entry->x->replay_esn; - if (replay_esn->seq >= replay_esn->replay_window) - seq_bottom = replay_esn->seq - replay_esn->replay_window + 1; + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_PACKET: + switch (x->xso.dir) { + case XFRM_DEV_OFFLOAD_IN: + esn = x->replay_esn->seq; + esn_msb = x->replay_esn->seq_hi; + break; + case XFRM_DEV_OFFLOAD_OUT: + esn = x->replay_esn->oseq; + esn_msb = x->replay_esn->oseq_hi; + break; + default: + WARN_ON(true); + return false; + } + break; + case XFRM_DEV_OFFLOAD_CRYPTO: + /* Already parsed by XFRM core */ + esn = x->replay_esn->seq; + break; + default: + WARN_ON(true); + return false; + } overlap = sa_entry->esn_state.overlap; - sa_entry->esn_state.esn = xfrm_replay_seqhi(sa_entry->x, - htonl(seq_bottom)); + if (esn >= x->replay_esn->replay_window) + seq_bottom = esn - x->replay_esn->replay_window + 1; + + if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO) + esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom)); + + sa_entry->esn_state.esn = esn; + sa_entry->esn_state.esn_msb = esn_msb; if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) { sa_entry->esn_state.overlap = 0; @@ -224,10 +252,10 @@ void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry, /* esn */ if (x->props.flags & XFRM_STATE_ESN) { - attrs->esn_trigger = true; - attrs->esn = sa_entry->esn_state.esn; - attrs->esn_overlap = sa_entry->esn_state.overlap; - attrs->replay_window = x->replay_esn->replay_window; + attrs->replay_esn.trigger = true; + attrs->replay_esn.esn = sa_entry->esn_state.esn; + attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb; + attrs->replay_esn.overlap = sa_entry->esn_state.overlap; } attrs->dir = x->xso.dir; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index e4a606364a45..8d5ce65def9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -67,8 +67,15 @@ struct mlx5_ipsec_lft { u64 numb_rounds_soft; }; +struct mlx5_replay_esn { + u32 replay_window; + u32 esn; + u32 esn_msb; + u8 overlap : 1; + u8 trigger : 1; +}; + struct mlx5_accel_esp_xfrm_attrs { - u32 esn; u32 spi; u32 flags; struct aes_gcm_keymat aes_gcm; @@ -85,11 +92,9 @@ struct mlx5_accel_esp_xfrm_attrs { struct upspec upspec; u8 dir : 2; - u8 esn_overlap : 1; - u8 esn_trigger : 1; u8 type : 2; u8 family; - u32 replay_window; + struct mlx5_replay_esn replay_esn; u32 authsize; u32 reqid; struct mlx5_ipsec_lft lft; @@ -160,6 +165,7 @@ struct mlx5e_ipsec { struct mlx5e_ipsec_esn_state { u32 esn; + u32 esn_msb; u8 overlap: 1; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index a2e9af5e51e1..c974c6153d89 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -76,15 +76,17 @@ static void mlx5e_ipsec_packet_setup(void *obj, u32 pdn, void *aso_ctx; aso_ctx = MLX5_ADDR_OF(ipsec_obj, obj, ipsec_aso); - if (attrs->esn_trigger) { + if (attrs->replay_esn.trigger) { MLX5_SET(ipsec_aso, aso_ctx, esn_event_arm, 1); if (attrs->dir == XFRM_DEV_OFFLOAD_IN) { MLX5_SET(ipsec_aso, aso_ctx, window_sz, - attrs->replay_window / 64); + attrs->replay_esn.replay_window / 64); MLX5_SET(ipsec_aso, aso_ctx, mode, MLX5_IPSEC_ASO_REPLAY_PROTECTION); - } + } + MLX5_SET(ipsec_aso, aso_ctx, mode_parameter, + attrs->replay_esn.esn); } /* ASO context */ @@ -136,10 +138,10 @@ static int mlx5_create_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry) salt_iv_p = MLX5_ADDR_OF(ipsec_obj, obj, implicit_iv); memcpy(salt_iv_p, &aes_gcm->seq_iv, sizeof(aes_gcm->seq_iv)); /* esn */ - if (attrs->esn_trigger) { + if (attrs->replay_esn.trigger) { MLX5_SET(ipsec_obj, obj, esn_en, 1); - MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); - MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb); + MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap); } MLX5_SET(ipsec_obj, obj, dekn, sa_entry->enc_key_id); @@ -252,8 +254,8 @@ static int mlx5_modify_ipsec_obj(struct mlx5e_ipsec_sa_entry *sa_entry, MLX5_SET64(ipsec_obj, obj, modify_field_select, MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP | MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB); - MLX5_SET(ipsec_obj, obj, esn_msb, attrs->esn); - MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->esn_overlap); + MLX5_SET(ipsec_obj, obj, esn_msb, attrs->replay_esn.esn_msb); + MLX5_SET(ipsec_obj, obj, esn_overlap, attrs->replay_esn.overlap); /* general object fields set */ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT); @@ -290,7 +292,7 @@ static void mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry, struct mlx5_wqe_aso_ctrl_seg data = {}; if (mode_param < MLX5E_IPSEC_ESN_SCOPE_MID) { - sa_entry->esn_state.esn++; + sa_entry->esn_state.esn_msb++; sa_entry->esn_state.overlap = 0; } else { sa_entry->esn_state.overlap = 1; @@ -434,7 +436,7 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) if (ret) goto unlock; - if (attrs->esn_trigger && + if (attrs->replay_esn.trigger && !MLX5_GET(ipsec_aso, aso->ctx, esn_event_arm)) { u32 mode_param = MLX5_GET(ipsec_aso, aso->ctx, mode_parameter); From 20fbdab21e2e21bc1e9f0a0dd736c087a9fe0382 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:29 +0300 Subject: [PATCH 08/10] net/mlx5e: Reduce contention in IPsec workqueue IPsec workqueue shouldn't be declared as ordered queue with one work per-CPU, and can be safely changed to be unordered with default number of works per-CPU. Link: https://lore.kernel.org/r/5dc224a4decd09c14f645d38173e1a1710802cd8.1680162300.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index b8058f89365e..fa66f4f3cba7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -561,8 +561,8 @@ void mlx5e_ipsec_init(struct mlx5e_priv *priv) xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC); ipsec->mdev = priv->mdev; - ipsec->wq = alloc_ordered_workqueue("mlx5e_ipsec: %s", 0, - priv->netdev->name); + ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0, + priv->netdev->name); if (!ipsec->wq) goto err_wq; From 4562116f8a565fdeadb0af5503cb7edb8e9d5761 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:30 +0300 Subject: [PATCH 09/10] net/mlx5e: Generalize IPsec work structs IPsec logic has two work structs which are submitted to same workqueue. As a preparation to addition of new work which needs to be submitted too, let's generalize struct mlx5e_ipsec_work. Link: https://lore.kernel.org/r/285a1550242363de181bab3a07a69296f66ad9a8.1680162300.git.leonro@nvidia.com Reviewed-by: Raed Salem Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.c | 86 ++++++++++++------- .../mellanox/mlx5/core/en_accel/ipsec.h | 11 +-- .../mlx5/core/en_accel/ipsec_offload.c | 19 ++-- 3 files changed, 68 insertions(+), 48 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index fa66f4f3cba7..ef7f942b5ac7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -406,14 +406,16 @@ static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev, return 0; } -static void _update_xfrm_state(struct work_struct *work) +static void mlx5e_ipsec_modify_state(struct work_struct *_work) { - struct mlx5e_ipsec_modify_state_work *modify_work = - container_of(work, struct mlx5e_ipsec_modify_state_work, work); - struct mlx5e_ipsec_sa_entry *sa_entry = container_of( - modify_work, struct mlx5e_ipsec_sa_entry, modify_work); + struct mlx5e_ipsec_work *work = + container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry; + struct mlx5_accel_esp_xfrm_attrs *attrs; - mlx5_accel_esp_modify_xfrm(sa_entry, &modify_work->attrs); + attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs; + + mlx5_accel_esp_modify_xfrm(sa_entry, attrs); } static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) @@ -432,6 +434,36 @@ static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry) sa_entry->set_iv_op = mlx5e_ipsec_set_iv; } +static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_work *work; + + switch (x->xso.type) { + case XFRM_DEV_OFFLOAD_CRYPTO: + if (!(x->props.flags & XFRM_STATE_ESN)) + return 0; + break; + default: + return 0; + } + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) + return -ENOMEM; + + work->data = kzalloc(sizeof(*sa_entry), GFP_KERNEL); + if (!work->data) { + kfree(work); + return -ENOMEM; + } + + INIT_WORK(&work->work, mlx5e_ipsec_modify_state); + work->sa_entry = sa_entry; + sa_entry->work = work; + return 0; +} + static int mlx5e_xfrm_add_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { @@ -467,10 +499,15 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, mlx5e_ipsec_update_esn_state(sa_entry); mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs); + + err = mlx5_ipsec_create_work(sa_entry); + if (err) + goto err_xfrm; + /* create hw context */ err = mlx5_ipsec_create_sa_ctx(sa_entry); if (err) - goto err_xfrm; + goto release_work; err = mlx5e_accel_ipsec_fs_add_rule(sa_entry); if (err) @@ -486,16 +523,6 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, goto err_add_rule; mlx5e_ipsec_set_esn_ops(sa_entry); - - switch (x->xso.type) { - case XFRM_DEV_OFFLOAD_CRYPTO: - if (x->props.flags & XFRM_STATE_ESN) - INIT_WORK(&sa_entry->modify_work.work, - _update_xfrm_state); - break; - default: - break; - } out: x->xso.offload_handle = (unsigned long)sa_entry; return 0; @@ -504,6 +531,9 @@ err_add_rule: mlx5e_accel_ipsec_fs_del_rule(sa_entry); err_hw_ctx: mlx5_ipsec_free_sa_ctx(sa_entry); +release_work: + kfree(sa_entry->work->data); + kfree(sa_entry->work); err_xfrm: kfree(sa_entry); NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy"); @@ -530,17 +560,13 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x) if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) goto sa_entry_free; - switch (x->xso.type) { - case XFRM_DEV_OFFLOAD_CRYPTO: - if (x->props.flags & XFRM_STATE_ESN) - cancel_work_sync(&sa_entry->modify_work.work); - break; - default: - break; - } + if (sa_entry->work) + cancel_work_sync(&sa_entry->work->work); mlx5e_accel_ipsec_fs_del_rule(sa_entry); mlx5_ipsec_free_sa_ctx(sa_entry); + kfree(sa_entry->work->data); + kfree(sa_entry->work); sa_entry_free: kfree(sa_entry); } @@ -626,16 +652,18 @@ static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x) static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x) { struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x); - struct mlx5e_ipsec_modify_state_work *modify_work = - &sa_entry->modify_work; + struct mlx5e_ipsec_work *work = sa_entry->work; + struct mlx5e_ipsec_sa_entry *sa_entry_shadow; bool need_update; need_update = mlx5e_ipsec_update_esn_state(sa_entry); if (!need_update) return; - mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &modify_work->attrs); - queue_work(sa_entry->ipsec->wq, &modify_work->work); + sa_entry_shadow = work->data; + memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow)); + mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs); + queue_work(sa_entry->ipsec->wq, &work->work); } static void mlx5e_xfrm_update_curlft(struct xfrm_state *x) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index 8d5ce65def9f..ab48fb9b4698 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -136,8 +136,8 @@ struct mlx5e_ipsec_tx; struct mlx5e_ipsec_work { struct work_struct work; - struct mlx5e_ipsec *ipsec; - u32 id; + struct mlx5e_ipsec_sa_entry *sa_entry; + void *data; }; struct mlx5e_ipsec_aso { @@ -176,11 +176,6 @@ struct mlx5e_ipsec_rule { struct mlx5_fc *fc; }; -struct mlx5e_ipsec_modify_state_work { - struct work_struct work; - struct mlx5_accel_esp_xfrm_attrs attrs; -}; - struct mlx5e_ipsec_limits { u64 round; u8 soft_limit_hit : 1; @@ -197,7 +192,7 @@ struct mlx5e_ipsec_sa_entry { u32 ipsec_obj_id; u32 enc_key_id; struct mlx5e_ipsec_rule ipsec_rule; - struct mlx5e_ipsec_modify_state_work modify_work; + struct mlx5e_ipsec_work *work; struct mlx5e_ipsec_limits limits; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index c974c6153d89..5fddb86bb35e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -417,18 +417,12 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) { struct mlx5e_ipsec_work *work = container_of(_work, struct mlx5e_ipsec_work, work); + struct mlx5e_ipsec_sa_entry *sa_entry = work->data; struct mlx5_accel_esp_xfrm_attrs *attrs; - struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5e_ipsec_aso *aso; - struct mlx5e_ipsec *ipsec; int ret; - sa_entry = xa_load(&work->ipsec->sadb, work->id); - if (!sa_entry) - goto out; - - ipsec = sa_entry->ipsec; - aso = ipsec->aso; + aso = sa_entry->ipsec->aso; attrs = &sa_entry->attrs; spin_lock(&sa_entry->x->lock); @@ -448,7 +442,6 @@ static void mlx5e_ipsec_handle_event(struct work_struct *_work) unlock: spin_unlock(&sa_entry->x->lock); -out: kfree(work); } @@ -456,6 +449,7 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event, void *data) { struct mlx5e_ipsec *ipsec = container_of(nb, struct mlx5e_ipsec, nb); + struct mlx5e_ipsec_sa_entry *sa_entry; struct mlx5_eqe_obj_change *object; struct mlx5e_ipsec_work *work; struct mlx5_eqe *eqe = data; @@ -470,13 +464,16 @@ static int mlx5e_ipsec_event(struct notifier_block *nb, unsigned long event, if (type != MLX5_GENERAL_OBJECT_TYPES_IPSEC) return NOTIFY_DONE; + sa_entry = xa_load(&ipsec->sadb, be32_to_cpu(object->obj_id)); + if (!sa_entry) + return NOTIFY_DONE; + work = kmalloc(sizeof(*work), GFP_ATOMIC); if (!work) return NOTIFY_DONE; INIT_WORK(&work->work, mlx5e_ipsec_handle_event); - work->ipsec = ipsec; - work->id = be32_to_cpu(object->obj_id); + work->data = sa_entry; queue_work(ipsec->wq, &work->work); return NOTIFY_OK; From b2f7b01d36a9b94fbd7489bd1228025ea7e7a2f4 Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 30 Mar 2023 11:02:31 +0300 Subject: [PATCH 10/10] net/mlx5e: Simulate missing IPsec TX limits hardware functionality ConnectX-7 devices don't have ability to send TX hard/soft limits events. As a possible workaround, let's rely on existing infrastructure and use periodic check of cached flow counter. In these periodic checks, we call to xfrm_state_check_expire() to check and mark state accordingly. Once the state is marked as XFRM_STATE_EXPIRED, the SA flow rule is changed to drop all the traffic. Link: https://lore.kernel.org/r/94a5d82c0c399747117d8a558f9beebfbcf26154.1680162300.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- .../mellanox/mlx5/core/en_accel/ipsec.c | 65 ++++++++++++++++++- .../mellanox/mlx5/core/en_accel/ipsec.h | 8 +++ .../mellanox/mlx5/core/en_accel/ipsec_fs.c | 31 +++++++-- 3 files changed, 99 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c index ef7f942b5ac7..def01bfde610 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c @@ -40,6 +40,8 @@ #include "ipsec.h" #include "ipsec_rxtx.h" +#define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000) + static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x) { return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle; @@ -50,6 +52,28 @@ static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x) return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle; } +static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work) +{ + struct mlx5e_ipsec_dwork *dwork = + container_of(_work, struct mlx5e_ipsec_dwork, dwork.work); + struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry; + struct xfrm_state *x = sa_entry->x; + + spin_lock(&x->lock); + xfrm_state_check_expire(x); + if (x->km.state == XFRM_STATE_EXPIRED) { + sa_entry->attrs.drop = true; + mlx5e_accel_ipsec_fs_modify(sa_entry); + } + spin_unlock(&x->lock); + + if (sa_entry->attrs.drop) + return; + + queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork, + MLX5_IPSEC_RESCHED); +} + static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry) { struct xfrm_state *x = sa_entry->x; @@ -464,6 +488,31 @@ static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry) return 0; } +static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct xfrm_state *x = sa_entry->x; + struct mlx5e_ipsec_dwork *dwork; + + if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) + return 0; + + if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT) + return 0; + + if (x->lft.soft_packet_limit == XFRM_INF && + x->lft.hard_packet_limit == XFRM_INF) + return 0; + + dwork = kzalloc(sizeof(*dwork), GFP_KERNEL); + if (!dwork) + return -ENOMEM; + + dwork->sa_entry = sa_entry; + INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit); + sa_entry->dwork = dwork; + return 0; +} + static int mlx5e_xfrm_add_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { @@ -504,10 +553,14 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, if (err) goto err_xfrm; + err = mlx5e_ipsec_create_dwork(sa_entry); + if (err) + goto release_work; + /* create hw context */ err = mlx5_ipsec_create_sa_ctx(sa_entry); if (err) - goto release_work; + goto release_dwork; err = mlx5e_accel_ipsec_fs_add_rule(sa_entry); if (err) @@ -523,6 +576,10 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x, goto err_add_rule; mlx5e_ipsec_set_esn_ops(sa_entry); + + if (sa_entry->dwork) + queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork, + MLX5_IPSEC_RESCHED); out: x->xso.offload_handle = (unsigned long)sa_entry; return 0; @@ -531,6 +588,8 @@ err_add_rule: mlx5e_accel_ipsec_fs_del_rule(sa_entry); err_hw_ctx: mlx5_ipsec_free_sa_ctx(sa_entry); +release_dwork: + kfree(sa_entry->dwork); release_work: kfree(sa_entry->work->data); kfree(sa_entry->work); @@ -563,8 +622,12 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x) if (sa_entry->work) cancel_work_sync(&sa_entry->work->work); + if (sa_entry->dwork) + cancel_delayed_work_sync(&sa_entry->dwork->dwork); + mlx5e_accel_ipsec_fs_del_rule(sa_entry); mlx5_ipsec_free_sa_ctx(sa_entry); + kfree(sa_entry->dwork); kfree(sa_entry->work->data); kfree(sa_entry->work); sa_entry_free: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index ab48fb9b4698..52890d7dce6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -93,6 +93,7 @@ struct mlx5_accel_esp_xfrm_attrs { struct upspec upspec; u8 dir : 2; u8 type : 2; + u8 drop : 1; u8 family; struct mlx5_replay_esn replay_esn; u32 authsize; @@ -140,6 +141,11 @@ struct mlx5e_ipsec_work { void *data; }; +struct mlx5e_ipsec_dwork { + struct delayed_work dwork; + struct mlx5e_ipsec_sa_entry *sa_entry; +}; + struct mlx5e_ipsec_aso { u8 __aligned(64) ctx[MLX5_ST_SZ_BYTES(ipsec_aso)]; dma_addr_t dma_addr; @@ -193,6 +199,7 @@ struct mlx5e_ipsec_sa_entry { u32 enc_key_id; struct mlx5e_ipsec_rule ipsec_rule; struct mlx5e_ipsec_work *work; + struct mlx5e_ipsec_dwork *dwork; struct mlx5e_ipsec_limits limits; }; @@ -235,6 +242,7 @@ int mlx5e_accel_ipsec_fs_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry); void mlx5e_accel_ipsec_fs_del_rule(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5e_accel_ipsec_fs_add_pol(struct mlx5e_ipsec_pol_entry *pol_entry); void mlx5e_accel_ipsec_fs_del_pol(struct mlx5e_ipsec_pol_entry *pol_entry); +void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry); int mlx5_ipsec_create_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); void mlx5_ipsec_free_sa_ctx(struct mlx5e_ipsec_sa_entry *sa_entry); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 0539640a4d88..b47794d4146e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -926,9 +926,12 @@ static int rx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; flow_act.crypto.obj_id = sa_entry->ipsec_obj_id; flow_act.flags |= FLOW_ACT_NO_APPEND; - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (attrs->drop) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + else + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[0].ft = rx->ft.status; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; @@ -1018,9 +1021,13 @@ static int tx_add_rule(struct mlx5e_ipsec_sa_entry *sa_entry) flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_IPSEC; flow_act.crypto.obj_id = sa_entry->ipsec_obj_id; flow_act.flags |= FLOW_ACT_NO_APPEND; - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | - MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT | MLX5_FLOW_CONTEXT_ACTION_COUNT; + if (attrs->drop) + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_DROP; + else + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + dest[0].ft = tx->ft.status; dest[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; @@ -1430,3 +1437,19 @@ err_rx_ipv4: kfree(ipsec->tx); return err; } + +void mlx5e_accel_ipsec_fs_modify(struct mlx5e_ipsec_sa_entry *sa_entry) +{ + struct mlx5e_ipsec_sa_entry sa_entry_shadow = {}; + int err; + + memcpy(&sa_entry_shadow, sa_entry, sizeof(*sa_entry)); + memset(&sa_entry_shadow.ipsec_rule, 0x00, sizeof(sa_entry->ipsec_rule)); + + err = mlx5e_accel_ipsec_fs_add_rule(&sa_entry_shadow); + if (err) + return; + + mlx5e_accel_ipsec_fs_del_rule(sa_entry); + memcpy(sa_entry, &sa_entry_shadow, sizeof(*sa_entry)); +}