From 6ab36e35f11244b469cdf3c976c185363f1ce61c Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:50:54 +0300 Subject: [PATCH 01/16] net/mlx5: E-Switch, Add operational mode to the SRIOV e-Switch Define three modes for the SRIOV e-switch operation, none (SRIOV_NONE, none of the VF vports are enabled), legacy (SRIOV_LEGACY, the current mode) and sriov offloads (SRIOV_OFFLOADS). Currently, when in SRIOV, only the legacy mode is supported, where steering rules are of the form: destination mac --> VF vport This patch does not change any functionality. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 51 ++++++++++--------- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 19 +++++-- .../net/ethernet/mellanox/mlx5/core/sriov.c | 5 +- 3 files changed, 46 insertions(+), 29 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index aebbd6ccb9fe..8068dde172e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -428,7 +428,7 @@ esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport) return __esw_fdb_set_vport_rule(esw, vport, true, mac_c, mac_v); } -static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -479,7 +479,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create flow group err(%d)\n", err); goto out; } - esw->fdb_table.addr_grp = g; + esw->fdb_table.legacy.addr_grp = g; /* Allmulti group : One rule that forwards any mcast traffic */ MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, @@ -494,7 +494,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err); goto out; } - esw->fdb_table.allmulti_grp = g; + esw->fdb_table.legacy.allmulti_grp = g; /* Promiscuous group : * One rule that forward all unmatched traffic from previous groups @@ -511,17 +511,17 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports) esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err); goto out; } - esw->fdb_table.promisc_grp = g; + esw->fdb_table.legacy.promisc_grp = g; out: if (err) { - if (!IS_ERR_OR_NULL(esw->fdb_table.allmulti_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - esw->fdb_table.allmulti_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.allmulti_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + esw->fdb_table.legacy.allmulti_grp = NULL; } - if (!IS_ERR_OR_NULL(esw->fdb_table.addr_grp)) { - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); - esw->fdb_table.addr_grp = NULL; + if (!IS_ERR_OR_NULL(esw->fdb_table.legacy.addr_grp)) { + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); + esw->fdb_table.legacy.addr_grp = NULL; } if (!IS_ERR_OR_NULL(esw->fdb_table.fdb)) { mlx5_destroy_flow_table(esw->fdb_table.fdb); @@ -533,20 +533,20 @@ out: return err; } -static void esw_destroy_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_legacy_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; esw_debug(esw->dev, "Destroy FDB Table\n"); - mlx5_destroy_flow_group(esw->fdb_table.promisc_grp); - mlx5_destroy_flow_group(esw->fdb_table.allmulti_grp); - mlx5_destroy_flow_group(esw->fdb_table.addr_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.promisc_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.allmulti_grp); + mlx5_destroy_flow_group(esw->fdb_table.legacy.addr_grp); mlx5_destroy_flow_table(esw->fdb_table.fdb); esw->fdb_table.fdb = NULL; - esw->fdb_table.addr_grp = NULL; - esw->fdb_table.allmulti_grp = NULL; - esw->fdb_table.promisc_grp = NULL; + esw->fdb_table.legacy.addr_grp = NULL; + esw->fdb_table.legacy.allmulti_grp = NULL; + esw->fdb_table.legacy.promisc_grp = NULL; } /* E-Switch vport UC/MC lists management */ @@ -1540,7 +1540,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) } /* Public E-Switch API */ -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; int i; @@ -1561,11 +1561,14 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs) if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support)) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); - esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs); + esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); + if (mode != SRIOV_LEGACY) + return -EINVAL; + esw->mode = mode; esw_disable_vport(esw, 0); - err = esw_create_fdb_table(esw, nvfs + 1); + err = esw_create_legacy_fdb_table(esw, nvfs + 1); if (err) goto abort; @@ -1590,8 +1593,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return; - esw_info(esw->dev, "disable SRIOV: active vports(%d)\n", - esw->enabled_vports); + esw_info(esw->dev, "disable SRIOV: active vports(%d) mode(%d)\n", + esw->enabled_vports, esw->mode); mc_promisc = esw->mc_promisc; @@ -1601,8 +1604,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rule(mc_promisc->uplink_rule); - esw_destroy_fdb_table(esw); + esw_destroy_legacy_fdb_table(esw); + esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } @@ -1673,6 +1677,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->total_vports = total_vports; esw->enabled_vports = 0; + esw->mode = SRIOV_NONE; dev->priv.eswitch = esw; esw_enable_vport(esw, 0, UC_ADDR_CHANGE); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index fd6800256d4a..544fbfe8bcbd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -134,9 +134,19 @@ struct mlx5_l2_table { struct mlx5_eswitch_fdb { void *fdb; - struct mlx5_flow_group *addr_grp; - struct mlx5_flow_group *allmulti_grp; - struct mlx5_flow_group *promisc_grp; + union { + struct legacy_fdb { + struct mlx5_flow_group *addr_grp; + struct mlx5_flow_group *allmulti_grp; + struct mlx5_flow_group *promisc_grp; + } legacy; + }; +}; + +enum { + SRIOV_NONE, + SRIOV_LEGACY, + SRIOV_OFFLOADS }; struct mlx5_eswitch { @@ -153,13 +163,14 @@ struct mlx5_eswitch { */ struct mutex state_lock; struct esw_mc_addr *mc_promisc; + int mode; }; /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); -int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs); +int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index d6a3f412ba9f..b380a6bc1f85 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -167,7 +167,7 @@ int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) mlx5_core_init_vfs(dev, num_vfs); #ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); #endif return num_vfs; @@ -209,7 +209,8 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev) mlx5_core_init_vfs(dev, cur_vfs); #ifdef CONFIG_MLX5_CORE_EN if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs); + mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, + SRIOV_LEGACY); #endif enable_vfs(dev, cur_vfs); From 69697b6e2086b5860bd2d216bc4c6c49d84d73ff Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:50:55 +0300 Subject: [PATCH 02/16] net/mlx5: E-Switch, Add support for the sriov offloads mode Unlike the legacy mode, here, forwarding rules are not learned by the driver per events on macs set by VFs/VMs into their vports, but rather should be programmed by higher-level SW entities. Saying that, still, in the offloads mode (SRIOV_OFFLOADS), two flow groups are created by the driver for management (slow path) purposes: The first group will be used for sending packets over e-switch vports from the host OS where the e-switch management code runs, to be received by VFs. The second group will be used by a miss rule which forwards packets toward the e-switch manager. Further logic will trap these packets such that the receiving net-device as seen by the networking stack is the representor of the vport that sent the packet over the e-switch data-path. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 35 +++-- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 16 +++ .../mellanox/mlx5/core/eswitch_offloads.c | 135 ++++++++++++++++++ 4 files changed, 168 insertions(+), 20 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index c4f450f1c658..96f18264a0ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -5,7 +5,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ fs_counters.o rl.o -mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ +mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ en_arfs.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8068dde172e7..1fc4cfd36e53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -40,17 +40,6 @@ #define UPLINK_VPORT 0xFFFF -#define MLX5_DEBUG_ESWITCH_MASK BIT(3) - -#define esw_info(dev, format, ...) \ - pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_warn(dev, format, ...) \ - pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) - -#define esw_debug(dev, format, ...) \ - mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - enum { MLX5_ACTION_NONE = 0, MLX5_ACTION_ADD = 1, @@ -92,6 +81,9 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) +int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports); +void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw); + static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { @@ -578,7 +570,8 @@ static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr) if (err) goto abort; - if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */ + /* SRIOV is enabled: Forward UC MAC to vport */ + if (esw->fdb_table.fdb && esw->mode == SRIOV_LEGACY) vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport); esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n", @@ -1543,7 +1536,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) { int err; - int i; + int i, enabled_events; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) @@ -1562,18 +1555,19 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n"); esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d) mode (%d)\n", nvfs, mode); - if (mode != SRIOV_LEGACY) - return -EINVAL; - esw->mode = mode; esw_disable_vport(esw, 0); - err = esw_create_legacy_fdb_table(esw, nvfs + 1); + if (mode == SRIOV_LEGACY) + err = esw_create_legacy_fdb_table(esw, nvfs + 1); + else + err = esw_create_offloads_fdb_table(esw, nvfs + 1); if (err) goto abort; + enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE; for (i = 0; i <= nvfs; i++) - esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS); + esw_enable_vport(esw, i, enabled_events); esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n", esw->enabled_vports); @@ -1604,7 +1598,10 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (mc_promisc && mc_promisc->uplink_rule) mlx5_del_flow_rule(mc_promisc->uplink_rule); - esw_destroy_legacy_fdb_table(esw); + if (esw->mode == SRIOV_LEGACY) + esw_destroy_legacy_fdb_table(esw); + else + esw_destroy_offloads_fdb_table(esw); esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 544fbfe8bcbd..2360180c26c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -140,6 +140,11 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *allmulti_grp; struct mlx5_flow_group *promisc_grp; } legacy; + + struct offloads_fdb { + struct mlx5_flow_group *send_to_vport_grp; + struct mlx5_flow_group *miss_grp; + } offloads; }; }; @@ -188,4 +193,15 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +#define MLX5_DEBUG_ESWITCH_MASK BIT(3) + +#define esw_info(dev, format, ...) \ + pr_info("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_warn(dev, format, ...) \ + pr_warn("(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__) + +#define esw_debug(dev, format, ...) \ + mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) + #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c new file mode 100644 index 000000000000..c6b28df3168f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include "mlx5_core.h" +#include "eswitch.h" + +#define MAX_PF_SQ 256 + +int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_core_dev *dev = esw->dev; + struct mlx5_flow_namespace *root_ns; + struct mlx5_flow_table *fdb = NULL; + struct mlx5_flow_group *g; + u32 *flow_group_in; + void *match_criteria; + int table_size, ix, err = 0; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); + if (!root_ns) { + esw_warn(dev, "Failed to get FDB flow namespace\n"); + goto ns_err; + } + + esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n", + MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)); + + table_size = nvports + MAX_PF_SQ + 1; + fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0); + if (IS_ERR(fdb)) { + err = PTR_ERR(fdb); + esw_warn(dev, "Failed to create FDB Table err %d\n", err); + goto fdb_err; + } + esw->fdb_table.fdb = fdb; + + /* create send-to-vport group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_sqn); + MLX5_SET_TO_ONES(fte_match_param, match_criteria, misc_parameters.source_port); + + ix = nvports + MAX_PF_SQ; + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix - 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create send-to-vport flow group err(%d)\n", err); + goto send_vport_err; + } + esw->fdb_table.offloads.send_to_vport_grp = g; + + /* create miss group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, 0); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ix); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ix + 1); + + g = mlx5_create_flow_group(fdb, flow_group_in); + if (IS_ERR(g)) { + err = PTR_ERR(g); + esw_warn(dev, "Failed to create miss flow group err(%d)\n", err); + goto miss_err; + } + esw->fdb_table.offloads.miss_grp = g; + + return 0; + +miss_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); +send_vport_err: + mlx5_destroy_flow_table(fdb); +fdb_err: +ns_err: + kvfree(flow_group_in); + return err; +} + +void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +{ + if (!esw->fdb_table.fdb) + return; + + esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); + + mlx5_destroy_flow_table(esw->fdb_table.fdb); +} From 3aa335724f0793027c87fae03ecf0a297fc04b29 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:50:56 +0300 Subject: [PATCH 03/16] net/mlx5: E-Switch, Add miss rule for offloads mode In the sriov offloads mode, packets that are not matched by any other rule should be sent towards the e-switch manager for further processing. Add such "miss" rule which matches ANY packet as the last rule in the e-switch FDB and programs the HW to send the packet to vport 0 where the e-switch manager runs. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 1 + .../mellanox/mlx5/core/eswitch_offloads.c | 40 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 2360180c26c2..8eed33f20ca3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -144,6 +144,7 @@ struct mlx5_eswitch_fdb { struct offloads_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; + struct mlx5_flow_rule *miss_rule; } offloads; }; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index c6b28df3168f..e3d81aeac83b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,39 @@ #include "mlx5_core.h" #include "eswitch.h" +static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule = NULL; + u32 *match_v, *match_c; + int err = 0; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + err = -ENOMEM; + goto out; + } + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = 0; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, 0, match_c, match_v, + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + esw_warn(esw->dev, "FDB: Failed to add miss flow rule err %d\n", err); + goto out; + } + + esw->fdb_table.offloads.miss_rule = flow_rule; +out: + kfree(match_v); + kfree(match_c); + return err; +} + #define MAX_PF_SQ 256 int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) @@ -110,8 +143,14 @@ int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) } esw->fdb_table.offloads.miss_grp = g; + err = esw_add_fdb_miss_rule(esw); + if (err) + goto miss_rule_err; + return 0; +miss_rule_err: + mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); miss_err: mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); send_vport_err: @@ -128,6 +167,7 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) return; esw_debug(esw->dev, "Destroy offloads FDB Table\n"); + mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule); mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp); mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp); From ab22be9ba30a08482b2c2effb36ac3f0ed3df465 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:50:57 +0300 Subject: [PATCH 04/16] net/mlx5: E-Switch, Add API to create send-to-vport rules Add the API to create send-to-vport e-switch rules of the form packet meta-data :: send-queue-number == $SQN and source-vport == 0 --> $VPORT These rules are to be used for a send-to-vport logic which conceptually bypasses the "normal" steering rules currently present at the e-switch datapath. Such rule should apply only for packets that originate in the e-switch manager vport (0) and are sent for a given SQN which is used by a given VF representor device, and hence the matching logic. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 3 +- .../mellanox/mlx5/core/eswitch_offloads.c | 39 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 8eed33f20ca3..b7fabd1b97d6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -193,6 +193,8 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); +struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) @@ -204,5 +206,4 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, #define esw_debug(dev, format, ...) \ mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__) - #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e3d81aeac83b..8964f71d106f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,6 +38,45 @@ #include "mlx5_core.h" #include "eswitch.h" +struct mlx5_flow_rule * +mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + int match_header = MLX5_MATCH_MISC_PARAMETERS; + u32 *match_v, *match_c; + void *misc; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_sqn, sqn); + MLX5_SET(fte_match_set_misc, misc, source_port, 0x0); /* source vport is 0 */ + + misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_sqn); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; + dest.vport_num = vport; + + flow_rule = mlx5_add_flow_rule(esw->fdb_table.fdb, match_header, match_c, + match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) + esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule)); +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_destination dest; From acbc2004d7129a1ecf02414c1da8808bdc06d5a2 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:50:58 +0300 Subject: [PATCH 05/16] net/mlx5: Introduce offloads steering namespace Add a new namespace (MLX5_FLOW_NAMESPACE_OFFLOADS) to be populated with flow steering rules that deal with rules that have have to be executed before the EN NIC steering rules are matched. The namespace is located after the bypass name-space and before the kernel name-space. Therefore, it precedes the HW processing done for rules set for the kernel NIC name-space. Under SRIOV, it would allow us to match on e-switch missed packet and forward them to the relevant VF representor TIR. Signed-off-by: Or Gerlitz Signed-off-by: Amir Vadai Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 11 ++++++++++- include/linux/mlx5/fs.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index e912a3d2505e..b0401104afb9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -83,6 +83,11 @@ #define ANCHOR_NUM_LEVELS 1 #define ANCHOR_NUM_PRIOS 1 #define ANCHOR_MIN_LEVEL (BY_PASS_MIN_LEVEL + 1) + +#define OFFLOADS_MAX_FT 1 +#define OFFLOADS_NUM_PRIOS 1 +#define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long *caps; @@ -98,7 +103,7 @@ static struct init_tree_node { int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 4, + .ar_size = 5, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_REQUIRED_CAPS(FS_CAP(flow_table_properties_nic_receive.flow_modify_en), @@ -107,6 +112,9 @@ static struct init_tree_node { FS_CAP(flow_table_properties_nic_receive.flow_table_modify)), ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, + ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), + ADD_PRIO(0, KERNEL_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(1, 1), ADD_MULTIPLE_PRIO(KERNEL_NIC_NUM_PRIOS, @@ -1369,6 +1377,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_OFFLOADS: case MLX5_FLOW_NAMESPACE_KERNEL: case MLX5_FLOW_NAMESPACE_LEFTOVERS: case MLX5_FLOW_NAMESPACE_ANCHOR: diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 4b7a107d9c19..6ad111938709 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_KERNEL, MLX5_FLOW_NAMESPACE_LEFTOVERS, MLX5_FLOW_NAMESPACE_ANCHOR, From c116c6eec6f72aac82ff4228ab1d277f3f9a2460 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:50:59 +0300 Subject: [PATCH 06/16] net/mlx5: E-Switch, Add offloads table Belongs to the NIC offloads name-space, and to be used as part of the SRIOV offloads logic to steer packets that hit the e-switch miss rule to the TIR of the relevant VF representor. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 5 +++ .../mellanox/mlx5/core/eswitch_offloads.c | 31 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index b7fabd1b97d6..32db37ab867f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -155,6 +155,10 @@ enum { SRIOV_OFFLOADS }; +struct mlx5_esw_offload { + struct mlx5_flow_table *ft_offloads; +}; + struct mlx5_eswitch { struct mlx5_core_dev *dev; struct mlx5_l2_table l2_table; @@ -169,6 +173,7 @@ struct mlx5_eswitch { */ struct mutex state_lock; struct esw_mc_addr *mc_promisc; + struct mlx5_esw_offload offloads; int mode; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 8964f71d106f..e895c6ff0d61 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -212,3 +212,34 @@ void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(esw->fdb_table.fdb); } + +static int esw_create_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft_offloads; + struct mlx5_core_dev *dev = esw->dev; + int err = 0; + + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_OFFLOADS); + if (!ns) { + esw_warn(esw->dev, "Failed to get offloads flow namespace\n"); + return -ENOMEM; + } + + ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0); + if (IS_ERR(ft_offloads)) { + err = PTR_ERR(ft_offloads); + esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err); + return err; + } + + esw->offloads.ft_offloads = ft_offloads; + return 0; +} + +static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + mlx5_destroy_flow_table(offloads->ft_offloads); +} From fed9ce22bf8ae8f417b8f047d2d630542d152ccf Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:51:00 +0300 Subject: [PATCH 07/16] net/mlx5: E-Switch, Add API to create vport rx rules Add the API to create vport rx rules of the form packet meta-data :: vport == $VPORT --> $TIR where the TIR is opened by this VF representor. This logic will by used for packets that didn't match any rule in the e-switch datapath and should be received into the host OS through the netdevice that represents the VF they were sent from. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 + .../mellanox/mlx5/core/eswitch_offloads.c | 85 +++++++++++++++++++ 2 files changed, 89 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 32db37ab867f..cf959f7b0bea 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -157,6 +157,7 @@ enum { struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; + struct mlx5_flow_group *vport_rx_group; }; struct mlx5_eswitch { @@ -201,6 +202,9 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e895c6ff0d61..7aad3671a399 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -243,3 +243,88 @@ static void esw_destroy_offloads_table(struct mlx5_eswitch *esw) mlx5_destroy_flow_table(offloads->ft_offloads); } + +static int esw_create_vport_rx_group(struct mlx5_eswitch *esw) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *g; + struct mlx5_priv *priv = &esw->dev->priv; + u32 *flow_group_in; + void *match_criteria, *misc; + int err = 0; + int nvports = priv->sriov.num_vfs + 2; + + flow_group_in = mlx5_vzalloc(inlen); + if (!flow_group_in) + return -ENOMEM; + + /* create vport rx group */ + memset(flow_group_in, 0, inlen); + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, + MLX5_MATCH_MISC_PARAMETERS); + + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria); + misc = MLX5_ADDR_OF(fte_match_param, match_criteria, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, nvports - 1); + + g = mlx5_create_flow_group(esw->offloads.ft_offloads, flow_group_in); + + if (IS_ERR(g)) { + err = PTR_ERR(g); + mlx5_core_warn(esw->dev, "Failed to create vport rx group err %d\n", err); + goto out; + } + + esw->offloads.vport_rx_group = g; +out: + kfree(flow_group_in); + return err; +} + +static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw) +{ + mlx5_destroy_flow_group(esw->offloads.vport_rx_group); +} + +struct mlx5_flow_rule * +mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) +{ + struct mlx5_flow_destination dest; + struct mlx5_flow_rule *flow_rule; + int match_header = MLX5_MATCH_MISC_PARAMETERS; + u32 *match_v, *match_c; + void *misc; + + match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_v || !match_c) { + esw_warn(esw->dev, "Failed to alloc match parameters\n"); + flow_rule = ERR_PTR(-ENOMEM); + goto out; + } + + misc = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + MLX5_SET(fte_match_set_misc, misc, source_port, vport); + + misc = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); + + dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; + dest.tir_num = tirn; + + flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, match_header, match_c, + match_v, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + 0, &dest); + if (IS_ERR(flow_rule)) { + esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + goto out; + } + +out: + kfree(match_v); + kfree(match_c); + return flow_rule; +} From 08f4b5918b2d6b491f0403cc1886f5cdccef89bb Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:51:01 +0300 Subject: [PATCH 08/16] net/devlink: Add E-Switch mode control Add the commands to set and show the mode of SRIOV E-Switch, two modes are supported: * legacy: operating in the "old" L2 based mode (DMAC --> VF vport) * switchdev: the E-Switch is referred to as whitebox switch configured using standard tools such as tc, bridge, openvswitch etc. To allow working with the tools, for each VF, a VF representor netdevice is created by the E-Switch manager vendor device driver instance (e.g PF). Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/net/devlink.h | 3 ++ include/uapi/linux/devlink.h | 8 ++++ net/core/devlink.c | 87 ++++++++++++++++++++++++++++++++++++ 3 files changed, 98 insertions(+) diff --git a/include/net/devlink.h b/include/net/devlink.h index 1d45b61cb320..c99ffe8cef3c 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -90,6 +90,9 @@ struct devlink_ops { u16 tc_index, enum devlink_sb_pool_type pool_type, u32 *p_cur, u32 *p_max); + + int (*eswitch_mode_get)(struct devlink *devlink, u16 *p_mode); + int (*eswitch_mode_set)(struct devlink *devlink, u16 mode); }; static inline void *devlink_priv(struct devlink *devlink) diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ba0073b26fa6..915bfa74458c 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -57,6 +57,8 @@ enum devlink_command { DEVLINK_CMD_SB_OCC_SNAPSHOT, DEVLINK_CMD_SB_OCC_MAX_CLEAR, + DEVLINK_CMD_ESWITCH_MODE_GET, + DEVLINK_CMD_ESWITCH_MODE_SET, /* add new commands above here */ __DEVLINK_CMD_MAX, @@ -95,6 +97,11 @@ enum devlink_sb_threshold_type { #define DEVLINK_SB_THRESHOLD_TO_ALPHA_MAX 20 +enum devlink_eswitch_mode { + DEVLINK_ESWITCH_MODE_LEGACY, + DEVLINK_ESWITCH_MODE_SWITCHDEV, +}; + enum devlink_attr { /* don't change the order or add anything between, this is ABI! */ DEVLINK_ATTR_UNSPEC, @@ -125,6 +132,7 @@ enum devlink_attr { DEVLINK_ATTR_SB_TC_INDEX, /* u16 */ DEVLINK_ATTR_SB_OCC_CUR, /* u32 */ DEVLINK_ATTR_SB_OCC_MAX, /* u32 */ + DEVLINK_ATTR_ESWITCH_MODE, /* u16 */ /* add new attributes above here, update the policy in devlink.c */ diff --git a/net/core/devlink.c b/net/core/devlink.c index 933e8d4d3968..b2e592a198c0 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1394,6 +1394,78 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb, return -EOPNOTSUPP; } +static int devlink_eswitch_fill(struct sk_buff *msg, struct devlink *devlink, + enum devlink_command cmd, u32 portid, + u32 seq, int flags, u16 mode) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_eswitch_mode_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + struct sk_buff *msg; + u16 mode; + int err; + + if (!ops || !ops->eswitch_mode_get) + return -EOPNOTSUPP; + + err = ops->eswitch_mode_get(devlink, &mode); + if (err) + return err; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_MODE_GET, + info->snd_portid, info->snd_seq, 0, mode); + + if (err) { + nlmsg_free(msg); + return err; + } + + return genlmsg_reply(msg, info); +} + +static int devlink_nl_cmd_eswitch_mode_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink *devlink = info->user_ptr[0]; + const struct devlink_ops *ops = devlink->ops; + u16 mode; + + if (!info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) + return -EINVAL; + + mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]); + + if (ops && ops->eswitch_mode_set) + return ops->eswitch_mode_set(devlink, mode); + return -EOPNOTSUPP; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -1407,6 +1479,7 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 }, [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 }, [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 }, + [DEVLINK_ATTR_ESWITCH_MODE] = { .type = NLA_U16 }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -1525,6 +1598,20 @@ static const struct genl_ops devlink_nl_ops[] = { DEVLINK_NL_FLAG_NEED_SB | DEVLINK_NL_FLAG_LOCK_PORTS, }, + { + .cmd = DEVLINK_CMD_ESWITCH_MODE_GET, + .doit = devlink_nl_cmd_eswitch_mode_get_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_ESWITCH_MODE_SET, + .doit = devlink_nl_cmd_eswitch_mode_set_doit, + .policy = devlink_nl_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; /** From feae908744d7f78b9dd06afda9de47f997f2d81a Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:51:02 +0300 Subject: [PATCH 09/16] net/mlx5: Add devlink interface The devlink interface is initially used to set/get the mode of the SRIOV e-switch. Currently, these are only stubs for get/set, down-stream patch will actually fill them out. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Kconfig | 1 + .../net/ethernet/mellanox/mlx5/core/eswitch.h | 4 +++ .../mellanox/mlx5/core/eswitch_offloads.c | 10 +++++++ .../net/ethernet/mellanox/mlx5/core/main.c | 26 ++++++++++++++++--- 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 1cf722eba607..aae46884bf93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -4,6 +4,7 @@ config MLX5_CORE tristate "Mellanox Technologies ConnectX-4 and Connect-IB core driver" + depends on MAY_USE_DEVLINK depends on PCI default n ---help--- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index cf959f7b0bea..7843f981509d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -35,6 +35,7 @@ #include #include +#include #include #define MLX5_MAX_UC_PER_VPORT(dev) \ @@ -205,6 +206,9 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(dev, format, ...) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7aad3671a399..e1727a9b9bcf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -328,3 +328,13 @@ out: kfree(match_c); return flow_rule; } + +int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) +{ + return -EOPNOTSUPP; +} + +int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + return -EOPNOTSUPP; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 1f3b6d6a852e..1fb3c681df97 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -51,6 +51,7 @@ #ifdef CONFIG_RFS_ACCEL #include #endif +#include #include "mlx5_core.h" #include "fs_core.h" #ifdef CONFIG_MLX5_CORE_EN @@ -1315,19 +1316,28 @@ struct mlx5_core_event_handler { void *data); }; +static const struct devlink_ops mlx5_devlink_ops = { +#ifdef CONFIG_MLX5_CORE_EN + .eswitch_mode_set = mlx5_devlink_eswitch_mode_set, + .eswitch_mode_get = mlx5_devlink_eswitch_mode_get, +#endif +}; static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { struct mlx5_core_dev *dev; + struct devlink *devlink; struct mlx5_priv *priv; int err; - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (!dev) { + devlink = devlink_alloc(&mlx5_devlink_ops, sizeof(*dev)); + if (!devlink) { dev_err(&pdev->dev, "kzalloc failed\n"); return -ENOMEM; } + + dev = devlink_priv(devlink); priv = &dev->priv; priv->pci_dev_data = id->driver_data; @@ -1364,15 +1374,21 @@ static int init_one(struct pci_dev *pdev, goto clean_health; } + err = devlink_register(devlink, &pdev->dev); + if (err) + goto clean_load; + return 0; +clean_load: + mlx5_unload_one(dev, priv); clean_health: mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); clean_dev: pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); return err; } @@ -1380,8 +1396,10 @@ clean_dev: static void remove_one(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + struct devlink *devlink = priv_to_devlink(dev); struct mlx5_priv *priv = &dev->priv; + devlink_unregister(devlink); if (mlx5_unload_one(dev, priv)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); @@ -1390,7 +1408,7 @@ static void remove_one(struct pci_dev *pdev) mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); - kfree(dev); + devlink_free(devlink); } static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, From c930a3ad7453615b6707509e23afa5969095b5b7 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 1 Jul 2016 14:51:03 +0300 Subject: [PATCH 10/16] net/mlx5e: Add devlink based SRIOV mode changes Implement handlers for the devlink commands to get and set the SRIOV E-Switch mode. When turning to the switchdev/offloads mode, we disable the e-switch and enable it again in the new mode, create the NIC offloads table and create VF reps. When turning to legacy mode, we remove the VF reps and the offloads table, and re-initiate the e-switch in it's legacy mode. The actual creation/removal of the VF reps is done in downstream patches. Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +- .../mellanox/mlx5/core/eswitch_offloads.c | 121 +++++++++++++++++- 2 files changed, 124 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 1fc4cfd36e53..12f509c8d65d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,8 +81,8 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports); -void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) @@ -1561,7 +1561,7 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode) if (mode == SRIOV_LEGACY) err = esw_create_legacy_fdb_table(esw, nvfs + 1); else - err = esw_create_offloads_fdb_table(esw, nvfs + 1); + err = esw_offloads_init(esw, nvfs + 1); if (err) goto abort; @@ -1581,6 +1581,7 @@ abort: void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) { struct esw_mc_addr *mc_promisc; + int nvports; int i; if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || @@ -1591,6 +1592,7 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw->enabled_vports, esw->mode); mc_promisc = esw->mc_promisc; + nvports = esw->enabled_vports; for (i = 0; i < esw->total_vports; i++) esw_disable_vport(esw, i); @@ -1600,8 +1602,8 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) if (esw->mode == SRIOV_LEGACY) esw_destroy_legacy_fdb_table(esw); - else - esw_destroy_offloads_fdb_table(esw); + else if (esw->mode == SRIOV_OFFLOADS) + esw_offloads_cleanup(esw, nvports); esw->mode = SRIOV_NONE; /* VPORT 0 (PF) must be enabled back with non-sriov configuration */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index e1727a9b9bcf..312b6f31fd65 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -112,7 +112,7 @@ out: #define MAX_PF_SQ 256 -int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) +static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports) { int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); struct mlx5_core_dev *dev = esw->dev; @@ -200,7 +200,7 @@ ns_err: return err; } -void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) +static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw) { if (!esw->fdb_table.fdb) return; @@ -329,12 +329,125 @@ out: return flow_rule; } +static int esw_offloads_start(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + if (esw->mode != SRIOV_LEGACY) { + esw_warn(esw->dev, "Can't set offloads mode, SRIOV legacy not enabled\n"); + return -EINVAL; + } + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_OFFLOADS); + if (err) + esw_warn(esw->dev, "Failed set eswitch to offloads, err %d\n", err); + return err; +} + +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) +{ + int err; + + err = esw_create_offloads_fdb_table(esw, nvports); + if (err) + return err; + + err = esw_create_offloads_table(esw); + if (err) + goto create_ft_err; + + err = esw_create_vport_rx_group(esw); + if (err) + goto create_fg_err; + + return 0; + +create_fg_err: + esw_destroy_offloads_table(esw); + +create_ft_err: + esw_destroy_offloads_fdb_table(esw); + return err; +} + +static int esw_offloads_stop(struct mlx5_eswitch *esw) +{ + int err, num_vfs = esw->dev->priv.sriov.num_vfs; + + mlx5_eswitch_disable_sriov(esw); + err = mlx5_eswitch_enable_sriov(esw, num_vfs, SRIOV_LEGACY); + if (err) + esw_warn(esw->dev, "Failed set eswitch legacy mode. err %d\n", err); + + return err; +} + +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) +{ + esw_destroy_vport_rx_group(esw); + esw_destroy_offloads_table(esw); + esw_destroy_offloads_fdb_table(esw); +} + +static int mlx5_esw_mode_from_devlink(u16 mode, u16 *mlx5_mode) +{ + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + *mlx5_mode = SRIOV_LEGACY; + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + *mlx5_mode = SRIOV_OFFLOADS; + break; + default: + return -EINVAL; + } + + return 0; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode) { - return -EOPNOTSUPP; + struct mlx5_core_dev *dev; + u16 cur_mlx5_mode, mlx5_mode = 0; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + cur_mlx5_mode = dev->priv.eswitch->mode; + + if (cur_mlx5_mode == SRIOV_NONE) + return -EOPNOTSUPP; + + if (mlx5_esw_mode_from_devlink(mode, &mlx5_mode)) + return -EINVAL; + + if (cur_mlx5_mode == mlx5_mode) + return 0; + + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV) + return esw_offloads_start(dev->priv.eswitch); + else if (mode == DEVLINK_ESWITCH_MODE_LEGACY) + return esw_offloads_stop(dev->priv.eswitch); + else + return -EINVAL; } int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) { - return -EOPNOTSUPP; + struct mlx5_core_dev *dev; + + dev = devlink_priv(devlink); + + if (!MLX5_CAP_GEN(dev, vport_group_manager)) + return -EOPNOTSUPP; + + if (dev->priv.eswitch->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + *mode = dev->priv.eswitch->mode; + + return 0; } From b50d292b4399f4eb11e82d0430aacf62dd5d5365 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 1 Jul 2016 14:51:04 +0300 Subject: [PATCH 11/16] net/mlx5e: Create NIC global resources only once To allow creating more than one netdev over the same PCI function, we change the driver such that global NIC resources are created once and later be shared amongst all the mlx5e netdevs running over that port. Move the CQ UAR, PD (pdn), Transport Domain (tdn), MKey resources from being kept in the mlx5e priv part to a new resources structure (mlx5e_resources) placed under the mlx5_core device. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 6 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 6 +- .../ethernet/mellanox/mlx5/core/en_common.c | 112 ++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 124 ++++++------------ include/linux/mlx5/driver.h | 13 ++ 5 files changed, 171 insertions(+), 90 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_common.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 96f18264a0ef..9b14dadd9309 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -6,8 +6,8 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o rl.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ - en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ - en_arfs.o + en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ + en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ + en_tc.o en_arfs.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b97511bf4c7b..3226b92a344d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -570,10 +570,6 @@ struct mlx5e_priv { unsigned long state; struct mutex state_lock; /* Protects Interface state */ - struct mlx5_uar cq_uar; - u32 pdn; - u32 tdn; - struct mlx5_core_mkey mkey; struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; @@ -788,5 +784,7 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c new file mode 100644 index 000000000000..33b373216594 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* mlx5e global resources should be placed in this file. + * Global resources are common to all the netdevices crated on the same nic. + */ + +static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, + struct mlx5_core_mkey *mkey) +{ + struct mlx5_create_mkey_mbox_in *in; + int err; + + in = mlx5_vzalloc(sizeof(*in)); + if (!in) + return -ENOMEM; + + in->seg.flags = MLX5_PERM_LOCAL_WRITE | + MLX5_PERM_LOCAL_READ | + MLX5_ACCESS_MODE_PA; + in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); + in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + + err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, + NULL); + + kvfree(in); + + return err; +} + +int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + int err; + + err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false); + if (err) { + mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); + return err; + } + + err = mlx5_core_alloc_pd(mdev, &res->pdn); + if (err) { + mlx5_core_err(mdev, "alloc pd failed, %d\n", err); + goto err_unmap_free_uar; + } + + err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); + if (err) { + mlx5_core_err(mdev, "alloc td failed, %d\n", err); + goto err_dealloc_pd; + } + + err = mlx5e_create_mkey(mdev, res->pdn, &res->mkey); + if (err) { + mlx5_core_err(mdev, "create mkey failed, %d\n", err); + goto err_dealloc_transport_domain; + } + + return 0; + +err_dealloc_transport_domain: + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); +err_dealloc_pd: + mlx5_core_dealloc_pd(mdev, res->pdn); +err_unmap_free_uar: + mlx5_unmap_free_uar(mdev, &res->cq_uar); + + return err; +} + +void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) +{ + struct mlx5e_resources *res = &mdev->mlx5e_res; + + mlx5_core_destroy_mkey(mdev, &res->mkey); + mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); + mlx5_core_dealloc_pd(mdev, res->pdn); + mlx5_unmap_free_uar(mdev, &res->cq_uar); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a64ce5df5810..9b2e2b211cee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -858,7 +858,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1136,7 +1136,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->cpu = cpu; c->pdev = &priv->mdev->pdev->dev; c->netdev = priv->netdev; - c->mkey_be = cpu_to_be32(priv->mkey.key); + c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key); c->num_tc = priv->params.num_tc; if (priv->params.rx_am_enabled) @@ -1252,7 +1252,7 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, end_padding_mode, MLX5_WQ_END_PAD_MODE_ALIGN); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); MLX5_SET(wq, wq, log_wq_sz, priv->params.log_rq_size); - MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); MLX5_SET(rqc, rqc, counter_set_id, priv->q_counter); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); @@ -1277,7 +1277,7 @@ static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, void *wq = MLX5_ADDR_OF(sqc, sqc, wq); MLX5_SET(wq, wq, log_wq_stride, ilog2(MLX5_SEND_WQE_BB)); - MLX5_SET(wq, wq, pd, priv->pdn); + MLX5_SET(wq, wq, pd, priv->mdev->mlx5e_res.pdn); param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); } @@ -1299,7 +1299,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -1920,7 +1920,7 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &priv->cq_uar; + mcq->uar = &mdev->mlx5e_res.cq_uar; cq->priv = priv; @@ -1986,7 +1986,7 @@ static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) memset(in, 0, sizeof(in)); MLX5_SET(tisc, tisc, prio, tc << 1); - MLX5_SET(tisc, tisc, transport_domain, priv->tdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); } @@ -2029,7 +2029,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, { void *hfso = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer); - MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -2136,7 +2136,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, u32 rqtn) { - MLX5_SET(tirc, tirc, transport_domain, priv->tdn); + MLX5_SET(tirc, tirc, transport_domain, priv->mdev->mlx5e_res.td.tdn); mlx5e_build_tir_ctx_lro(tirc, priv); @@ -3082,31 +3082,6 @@ static void mlx5e_build_netdev(struct net_device *netdev) mlx5e_set_netdev_dev_addr(netdev); } -static int mlx5e_create_mkey(struct mlx5e_priv *priv, u32 pdn, - struct mlx5_core_mkey *mkey) -{ - struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_create_mkey_mbox_in *in; - int err; - - in = mlx5_vzalloc(sizeof(*in)); - if (!in) - return -ENOMEM; - - in->seg.flags = MLX5_PERM_LOCAL_WRITE | - MLX5_PERM_LOCAL_READ | - MLX5_ACCESS_MODE_PA; - in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - - err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, - NULL); - - kvfree(in); - - return err; -} - static void mlx5e_create_q_counter(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -3149,7 +3124,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) MLX5_ACCESS_MODE_MTT; mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - mkc->flags_pd = cpu_to_be32(priv->pdn); + mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); mkc->len = cpu_to_be64(npages << PAGE_SHIFT); mkc->xlt_oct_size = cpu_to_be32(mlx5e_get_mtt_octw(npages)); mkc->log2_page_size = PAGE_SHIFT; @@ -3169,9 +3144,6 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) int nch = mlx5e_get_max_num_channels(mdev); int err; - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; - netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * MLX5E_MAX_NUM_TC, nch); @@ -3191,34 +3163,10 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) if (!priv->wq) goto err_free_netdev; - err = mlx5_alloc_map_uar(mdev, &priv->cq_uar, false); - if (err) { - mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); - goto err_destroy_wq; - } - - err = mlx5_core_alloc_pd(mdev, &priv->pdn); - if (err) { - mlx5_core_err(mdev, "alloc pd failed, %d\n", err); - goto err_unmap_free_uar; - } - - err = mlx5_core_alloc_transport_domain(mdev, &priv->tdn); - if (err) { - mlx5_core_err(mdev, "alloc td failed, %d\n", err); - goto err_dealloc_pd; - } - - err = mlx5e_create_mkey(priv, priv->pdn, &priv->mkey); - if (err) { - mlx5_core_err(mdev, "create mkey failed, %d\n", err); - goto err_dealloc_transport_domain; - } - err = mlx5e_create_umr_mkey(priv); if (err) { mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto err_destroy_mkey; + goto err_destroy_wq; } err = mlx5e_create_tises(priv); @@ -3304,18 +3252,6 @@ err_destroy_tises: err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); -err_destroy_mkey: - mlx5_core_destroy_mkey(mdev, &priv->mkey); - -err_dealloc_transport_domain: - mlx5_core_dealloc_transport_domain(mdev, priv->tdn); - -err_dealloc_pd: - mlx5_core_dealloc_pd(mdev, priv->pdn); - -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &priv->cq_uar); - err_destroy_wq: destroy_workqueue(priv->wq); @@ -3325,9 +3261,27 @@ err_free_netdev: return NULL; } -static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) +static void *mlx5e_add(struct mlx5_core_dev *mdev) +{ + void *ret; + + if (mlx5e_check_required_hca_cap(mdev)) + return NULL; + + if (mlx5e_create_mdev_resources(mdev)) + return NULL; + + ret = mlx5e_create_netdev(mdev); + if (!ret) { + mlx5e_destroy_mdev_resources(mdev); + return NULL; + } + return ret; +} + +static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, + struct mlx5e_priv *priv) { - struct mlx5e_priv *priv = vpriv; struct net_device *netdev = priv->netdev; set_bit(MLX5E_STATE_DESTROYING, &priv->state); @@ -3351,10 +3305,6 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) mlx5e_close_drop_rq(priv); mlx5e_destroy_tises(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); - mlx5_core_destroy_mkey(priv->mdev, &priv->mkey); - mlx5_core_dealloc_transport_domain(priv->mdev, priv->tdn); - mlx5_core_dealloc_pd(priv->mdev, priv->pdn); - mlx5_unmap_free_uar(priv->mdev, &priv->cq_uar); cancel_delayed_work_sync(&priv->update_stats_work); destroy_workqueue(priv->wq); @@ -3362,6 +3312,14 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv) free_netdev(netdev); } +static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + + mlx5e_destroy_netdev(mdev, priv); + mlx5e_destroy_mdev_resources(mdev); +} + static void *mlx5e_get_netdev(void *vpriv) { struct mlx5e_priv *priv = vpriv; @@ -3370,8 +3328,8 @@ static void *mlx5e_get_netdev(void *vpriv) } static struct mlx5_interface mlx5e_interface = { - .add = mlx5e_create_netdev, - .remove = mlx5e_destroy_netdev, + .add = mlx5e_add, + .remove = mlx5e_remove, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 46260fdc5305..e22b3456b2ee 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -578,6 +578,18 @@ enum mlx5_pci_status { MLX5_PCI_STATUS_ENABLED, }; +struct mlx5_td { + struct list_head tirs_list; + u32 tdn; +}; + +struct mlx5e_resources { + struct mlx5_uar cq_uar; + u32 pdn; + struct mlx5_td td; + struct mlx5_core_mkey mkey; +}; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -602,6 +614,7 @@ struct mlx5_core_dev { struct mlx5_profile *profile; atomic_t num_qps; u32 issi; + struct mlx5e_resources mlx5e_res; #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif From 724b2aa15126d9e24b36650c5cad9cf468c20785 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 1 Jul 2016 14:51:05 +0300 Subject: [PATCH 12/16] net/mlx5e: TIRs management refactoring The current refresh tirs self loopback mechanism, refreshes all the tirs belonging to the same mlx5e instance to prevent self loopback by packets sent over any ring of that instance. This mechanism relies on all the tirs/tises of an instance to be created with the same transport domain number (tdn). Change the driver to refresh all the tirs created under the same tdn regardless of which mlx5e netdev instance they belong to. This behaviour is needed for introducing new mlx5e instances which serve to represent SRIOV VFs. The representors and the PF share vport used for E-Switch management, and we want to avoid NIC level HW loopback between them, e.g when sending broadcast packets. To achieve that, both the representors and the PF NIC will share the tdn. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 +++- .../net/ethernet/mellanox/mlx5/core/en_arfs.c | 14 ++--- .../ethernet/mellanox/mlx5/core/en_common.c | 48 ++++++++++++++++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_fs.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 56 ++++--------------- 6 files changed, 77 insertions(+), 57 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 3226b92a344d..8dad50caa4c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -552,9 +552,10 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tables arfs; }; -struct mlx5e_direct_tir { +struct mlx5e_tir { u32 tirn; u32 rqtn; + struct list_head list; }; enum { @@ -576,8 +577,8 @@ struct mlx5e_priv { struct mlx5e_channel **channel; u32 tisn[MLX5E_MAX_NUM_TC]; u32 indir_rqtn; - u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; - struct mlx5e_direct_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; + struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; @@ -784,7 +785,12 @@ int mlx5e_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb, #endif u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev); +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen); +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir); int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c index 3515e78ba68f..10f18d46b8ab 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c @@ -93,14 +93,14 @@ static enum mlx5e_traffic_types arfs_get_tt(enum arfs_type type) static int arfs_disable(struct mlx5e_priv *priv) { struct mlx5_flow_destination dest; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; int err = 0; int tt; int i; dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; for (i = 0; i < ARFS_NUM_TYPES; i++) { - dest.tir_num = tirn[i]; + dest.tir_num = tir[i].tirn; tt = arfs_get_tt(i); /* Modify ttc rules destination to bypass the aRFS tables*/ err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt], @@ -176,7 +176,7 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type]; struct mlx5_flow_destination dest; u8 match_criteria_enable = 0; - u32 *tirn = priv->indir_tirn; + struct mlx5e_tir *tir = priv->indir_tir; u32 *match_criteria; u32 *match_value; int err = 0; @@ -192,16 +192,16 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv, dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR; switch (type) { case ARFS_IPV4_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV4_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV4_TCP].tirn; break; case ARFS_IPV4_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV4_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV4_UDP].tirn; break; case ARFS_IPV6_TCP: - dest.tir_num = tirn[MLX5E_TT_IPV6_TCP]; + dest.tir_num = tir[MLX5E_TT_IPV6_TCP].tirn; break; case ARFS_IPV6_UDP: - dest.tir_num = tirn[MLX5E_TT_IPV6_UDP]; + dest.tir_num = tir[MLX5E_TT_IPV6_UDP].tirn; break; default: err = -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 33b373216594..673043ccd76c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -36,6 +36,27 @@ * Global resources are common to all the netdevices crated on the same nic. */ +int mlx5e_create_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir, u32 *in, int inlen) +{ + int err; + + err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn); + if (err) + return err; + + list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list); + + return 0; +} + +void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, + struct mlx5e_tir *tir) +{ + mlx5_core_destroy_tir(mdev, tir->tirn); + list_del(&tir->list); +} + static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) { @@ -89,6 +110,8 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) goto err_dealloc_transport_domain; } + INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list); + return 0; err_dealloc_transport_domain: @@ -110,3 +133,28 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_core_dealloc_pd(mdev, res->pdn); mlx5_unmap_free_uar(mdev, &res->cq_uar); } + +int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev) +{ + struct mlx5e_tir *tir; + void *in; + int inlen; + int err; + + inlen = MLX5_ST_SZ_BYTES(modify_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); + + list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) { + err = mlx5_core_modify_tir(mdev, tir->tirn, in, inlen); + if (err) + return err; + } + + kvfree(in); + + return 0; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index b29684d9fcd6..5b88967ec378 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -876,7 +876,7 @@ static void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen) mlx5e_build_tir_ctx_hash(tirc, priv); for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_modify_tir(mdev, priv->indir_tirn[i], in, inlen); + mlx5_core_modify_tir(mdev, priv->indir_tir[i].tirn, in, inlen); } static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index b32740092854..606e69b4babc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -655,7 +655,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv) if (tt == MLX5E_TT_ANY) dest.tir_num = priv->direct_tir[0].tirn; else - dest.tir_num = priv->indir_tirn[tt]; + dest.tir_num = priv->indir_tir[tt].tirn; rules[tt] = mlx5e_generate_ttc_rule(priv, ft, &dest, ttc_rules[tt].etype, ttc_rules[tt].proto); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 9b2e2b211cee..30efa8a9207f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1661,7 +1661,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) mlx5e_build_tir_ctx_lro(tirc, priv); for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5_core_modify_tir(mdev, priv->indir_tirn[tt], in, + err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen); if (err) goto free_in; @@ -1680,40 +1680,6 @@ free_in: return err; } -static int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5e_priv *priv) -{ - void *in; - int inlen; - int err; - int i; - - inlen = MLX5_ST_SZ_BYTES(modify_tir_in); - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1); - - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) { - err = mlx5_core_modify_tir(priv->mdev, priv->indir_tirn[i], in, - inlen); - if (err) - return err; - } - - for (i = 0; i < priv->params.num_channels; i++) { - err = mlx5_core_modify_tir(priv->mdev, - priv->direct_tir[i].tirn, in, - inlen); - if (err) - return err; - } - - kvfree(in); - - return 0; -} - static int mlx5e_set_mtu(struct mlx5e_priv *priv, u16 mtu) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1804,7 +1770,7 @@ int mlx5e_open_locked(struct net_device *netdev) goto err_clear_state_opened_flag; } - err = mlx5e_refresh_tirs_self_loopback_enable(priv); + err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev); if (err) { netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n", __func__, err); @@ -2148,9 +2114,9 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, static int mlx5e_create_tirs(struct mlx5e_priv *priv) { int nch = mlx5e_get_max_num_channels(priv->mdev); + struct mlx5e_tir *tir; void *tirc; int inlen; - u32 *tirn; int err; u32 *in; int ix; @@ -2164,10 +2130,10 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) /* indirect tirs */ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(in, 0, inlen); - tirn = &priv->indir_tirn[tt]; + tir = &priv->indir_tir[tt]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_indir_tir_ctx(priv, tirc, tt); - err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_tirs; } @@ -2175,11 +2141,11 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) /* direct tirs */ for (ix = 0; ix < nch; ix++) { memset(in, 0, inlen); - tirn = &priv->direct_tir[ix].tirn; + tir = &priv->direct_tir[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_direct_tir_ctx(priv, tirc, priv->direct_tir[ix].rqtn); - err = mlx5_core_create_tir(priv->mdev, in, inlen, tirn); + err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_ch_tirs; } @@ -2190,11 +2156,11 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) err_destroy_ch_tirs: for (ix--; ix >= 0; ix--) - mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[ix].tirn); + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); err_destroy_tirs: for (tt--; tt >= 0; tt--) - mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[tt]); + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); kvfree(in); @@ -2207,10 +2173,10 @@ static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) int i; for (i = 0; i < nch; i++) - mlx5_core_destroy_tir(priv->mdev, priv->direct_tir[i].tirn); + mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5_core_destroy_tir(priv->mdev, priv->indir_tirn[i]); + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); } int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) From 398f33511e97aad7f259e864a1596fc8ef559dc1 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 1 Jul 2016 14:51:06 +0300 Subject: [PATCH 13/16] net/mlx5e: Mark enabled RQTs instances explicitly In the current driver implementation two types of receive queue tables (RQTs) are in use - direct and indirect. Change the driver to mark each new created RQT (direct or indirect) as "enabled". This behaviour is needed for introducing new mlx5e instances which serve to represent SRIOV VFs. The VF representors will have only one type of RQTs (direct). An "enabled" flag is added to each RQT to allow better handling and code sharing between the representors and the nic netdevices. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 13 ++++-- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 45 +++++++++++-------- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8dad50caa4c9..91c6bbeca656 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -552,10 +552,15 @@ struct mlx5e_flow_steering { struct mlx5e_arfs_tables arfs; }; -struct mlx5e_tir { - u32 tirn; +struct mlx5e_rqt { u32 rqtn; - struct list_head list; + bool enabled; +}; + +struct mlx5e_tir { + u32 tirn; + struct mlx5e_rqt rqt; + struct list_head list; }; enum { @@ -576,7 +581,7 @@ struct mlx5e_priv { struct mlx5e_channel **channel; u32 tisn[MLX5E_MAX_NUM_TC]; - u32 indir_rqtn; + struct mlx5e_rqt indir_rqt; struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; u32 tx_rates[MLX5E_MAX_NUM_SQS]; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 5b88967ec378..7e61ffa96732 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -898,7 +898,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir, mutex_lock(&priv->state_lock); if (indir) { - u32 rqtn = priv->indir_rqtn; + u32 rqtn = priv->indir_rqt.rqtn; memcpy(priv->params.indirection_rqt, indir, sizeof(priv->params.indirection_rqt)); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 30efa8a9207f..7f1f1ec24475 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1486,7 +1486,8 @@ static void mlx5e_fill_direct_rqt_rqn(struct mlx5e_priv *priv, void *rqtc, MLX5_SET(rqtc, rqtc, rq_num[0], rqn); } -static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) +static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, + int ix, struct mlx5e_rqt *rqt) { struct mlx5_core_dev *mdev = priv->mdev; void *rqtc; @@ -1509,34 +1510,37 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, int ix, u32 *rqtn) else mlx5e_fill_direct_rqt_rqn(priv, rqtc, ix); - err = mlx5_core_create_rqt(mdev, in, inlen, rqtn); + err = mlx5_core_create_rqt(mdev, in, inlen, &rqt->rqtn); + if (!err) + rqt->enabled = true; kvfree(in); return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, u32 rqtn) +static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) { - mlx5_core_destroy_rqt(priv->mdev, rqtn); + rqt->enabled = false; + mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } static int mlx5e_create_rqts(struct mlx5e_priv *priv) { int nch = mlx5e_get_max_num_channels(priv->mdev); - u32 *rqtn; + struct mlx5e_rqt *rqt; int err; int ix; /* Indirect RQT */ - rqtn = &priv->indir_rqtn; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqtn); + rqt = &priv->indir_rqt; + err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); if (err) return err; /* Direct RQTs */ for (ix = 0; ix < nch; ix++) { - rqtn = &priv->direct_tir[ix].rqtn; - err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqtn); + rqt = &priv->direct_tir[ix].rqt; + err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) goto err_destroy_rqts; } @@ -1545,9 +1549,9 @@ static int mlx5e_create_rqts(struct mlx5e_priv *priv) err_destroy_rqts: for (ix--; ix >= 0; ix--) - mlx5e_destroy_rqt(priv, priv->direct_tir[ix].rqtn); + mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); - mlx5e_destroy_rqt(priv, priv->indir_rqtn); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); return err; } @@ -1558,9 +1562,9 @@ static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) int i; for (i = 0; i < nch; i++) - mlx5e_destroy_rqt(priv, priv->direct_tir[i].rqtn); + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); - mlx5e_destroy_rqt(priv, priv->indir_rqtn); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); } int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) @@ -1598,10 +1602,15 @@ static void mlx5e_redirect_rqts(struct mlx5e_priv *priv) u32 rqtn; int ix; - rqtn = priv->indir_rqtn; - mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + if (priv->indir_rqt.enabled) { + rqtn = priv->indir_rqt.rqtn; + mlx5e_redirect_rqt(priv, rqtn, MLX5E_INDIR_RQT_SIZE, 0); + } + for (ix = 0; ix < priv->params.num_channels; ix++) { - rqtn = priv->direct_tir[ix].rqtn; + if (!priv->direct_tir[ix].rqt.enabled) + continue; + rqtn = priv->direct_tir[ix].rqt.rqtn; mlx5e_redirect_rqt(priv, rqtn, 1, ix); } } @@ -2012,7 +2021,7 @@ static void mlx5e_build_indir_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, mlx5e_build_tir_ctx_lro(tirc, priv); MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT); - MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqtn); + MLX5_SET(tirc, tirc, indirect_table, priv->indir_rqt.rqtn); mlx5e_build_tir_ctx_hash(tirc, priv); switch (tt) { @@ -2144,7 +2153,7 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) tir = &priv->direct_tir[ix]; tirc = MLX5_ADDR_OF(create_tir_in, in, ctx); mlx5e_build_direct_tir_ctx(priv, tirc, - priv->direct_tir[ix].rqtn); + priv->direct_tir[ix].rqt.rqtn); err = mlx5e_create_tir(priv->mdev, tir, in, inlen); if (err) goto err_destroy_ch_tirs; From 6bfd390ba5466675f6f02f77a3e957bd4e6075ee Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 1 Jul 2016 14:51:07 +0300 Subject: [PATCH 14/16] net/mlx5e: Add support for multiple profiles To allow support in representor netdevices where we create more than one netdevice per NIC, add profiles to the mlx5e driver. The profiling allows for creation of mlx5e instances with different characteristics. Each profile implements its own behavior using set of function pointers defined in struct mlx5e_profile. This is done to allow for avoiding complex per profix branching in the code. Currently only the profile for the conventional NIC is implemented, which is of use when a netdev is created upon pci probe. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 17 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 341 ++++++++++++------ 2 files changed, 240 insertions(+), 118 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 91c6bbeca656..edfc9beb1d2c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -568,6 +568,22 @@ enum { MLX5E_NIC_PRIO }; +struct mlx5e_profile { + void (*init)(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile); + void (*cleanup)(struct mlx5e_priv *priv); + int (*init_rx)(struct mlx5e_priv *priv); + void (*cleanup_rx)(struct mlx5e_priv *priv); + int (*init_tx)(struct mlx5e_priv *priv); + void (*cleanup_tx)(struct mlx5e_priv *priv); + void (*enable)(struct mlx5e_priv *priv); + void (*disable)(struct mlx5e_priv *priv); + void (*update_stats)(struct mlx5e_priv *priv); + int (*max_nch)(struct mlx5_core_dev *mdev); + int max_tc; +}; + struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; @@ -601,6 +617,7 @@ struct mlx5e_priv { struct mlx5e_stats stats; struct mlx5e_tstamp tstamp; u16 q_counter; + const struct mlx5e_profile *profile; }; enum mlx5e_link_mode { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7f1f1ec24475..3e22c5e8dcf6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -233,7 +233,7 @@ static void mlx5e_update_stats_work(struct work_struct *work) update_stats_work); mutex_lock(&priv->state_lock); if (test_bit(MLX5E_STATE_OPENED, &priv->state)) { - mlx5e_update_stats(priv); + priv->profile->update_stats(priv); queue_delayed_work(priv->wq, dwork, msecs_to_jiffies(MLX5E_UPDATE_STATS_INTERVAL)); } @@ -1036,7 +1036,7 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) { int i; - for (i = 0; i < MLX5E_MAX_NUM_TC; i++) + for (i = 0; i < priv->profile->max_tc; i++) priv->channeltc_to_txq_map[ix][i] = ix + i * priv->params.num_channels; } @@ -1524,21 +1524,20 @@ static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); } -static int mlx5e_create_rqts(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) +{ + struct mlx5e_rqt *rqt = &priv->indir_rqt; + + return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); +} + +static int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); struct mlx5e_rqt *rqt; int err; int ix; - /* Indirect RQT */ - rqt = &priv->indir_rqt; - err = mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); - if (err) - return err; - - /* Direct RQTs */ - for (ix = 0; ix < nch; ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { rqt = &priv->direct_tir[ix].rqt; err = mlx5e_create_rqt(priv, 1 /*size */, ix, rqt); if (err) @@ -1551,22 +1550,9 @@ err_destroy_rqts: for (ix--; ix >= 0; ix--) mlx5e_destroy_rqt(priv, &priv->direct_tir[ix].rqt); - mlx5e_destroy_rqt(priv, &priv->indir_rqt); - return err; } -static void mlx5e_destroy_rqts(struct mlx5e_priv *priv) -{ - int nch = mlx5e_get_max_num_channels(priv->mdev); - int i; - - for (i = 0; i < nch; i++) - mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); - - mlx5e_destroy_rqt(priv, &priv->indir_rqt); -} - int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz, int ix) { struct mlx5_core_dev *mdev = priv->mdev; @@ -1676,7 +1662,7 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv) goto free_in; } - for (ix = 0; ix < mlx5e_get_max_num_channels(mdev); ix++) { + for (ix = 0; ix < priv->profile->max_nch(priv->mdev); ix++) { err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in, inlen); if (err) @@ -1976,7 +1962,7 @@ static int mlx5e_create_tises(struct mlx5e_priv *priv) int err; int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) { + for (tc = 0; tc < priv->profile->max_tc; tc++) { err = mlx5e_create_tis(priv, tc); if (err) goto err_close_tises; @@ -1991,11 +1977,11 @@ err_close_tises: return err; } -static void mlx5e_destroy_tises(struct mlx5e_priv *priv) +static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; - for (tc = 0; tc < MLX5E_MAX_NUM_TC; tc++) + for (tc = 0; tc < priv->profile->max_tc; tc++) mlx5e_destroy_tis(priv, tc); } @@ -2120,15 +2106,13 @@ static void mlx5e_build_direct_tir_ctx(struct mlx5e_priv *priv, u32 *tirc, MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_INVERTED_XOR8); } -static int mlx5e_create_tirs(struct mlx5e_priv *priv) +static int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); struct mlx5e_tir *tir; void *tirc; int inlen; int err; u32 *in; - int ix; int tt; inlen = MLX5_ST_SZ_BYTES(create_tir_in); @@ -2136,7 +2120,6 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) if (!in) return -ENOMEM; - /* indirect tirs */ for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { memset(in, 0, inlen); tir = &priv->indir_tir[tt]; @@ -2147,7 +2130,34 @@ static int mlx5e_create_tirs(struct mlx5e_priv *priv) goto err_destroy_tirs; } - /* direct tirs */ + kvfree(in); + + return 0; + +err_destroy_tirs: + for (tt--; tt >= 0; tt--) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); + + kvfree(in); + + return err; +} + +static int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); + struct mlx5e_tir *tir; + void *tirc; + int inlen; + int err; + u32 *in; + int ix; + + inlen = MLX5_ST_SZ_BYTES(create_tir_in); + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + for (ix = 0; ix < nch; ix++) { memset(in, 0, inlen); tir = &priv->direct_tir[ix]; @@ -2167,25 +2177,26 @@ err_destroy_ch_tirs: for (ix--; ix >= 0; ix--) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[ix]); -err_destroy_tirs: - for (tt--; tt >= 0; tt--) - mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[tt]); - kvfree(in); return err; } -static void mlx5e_destroy_tirs(struct mlx5e_priv *priv) +static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) { - int nch = mlx5e_get_max_num_channels(priv->mdev); + int i; + + for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) + mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); +} + +static void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +{ + int nch = priv->profile->max_nch(priv->mdev); int i; for (i = 0; i < nch; i++) mlx5e_destroy_tir(priv->mdev, &priv->direct_tir[i]); - - for (i = 0; i < MLX5E_NUM_INDIR_TIRS; i++) - mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); } int mlx5e_modify_rqs_vsd(struct mlx5e_priv *priv, bool vsd) @@ -2867,9 +2878,9 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; } -static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, - struct net_device *netdev, - int num_channels) +static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 link_speed = 0; @@ -2938,7 +2949,7 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, sizeof(priv->params.toeplitz_hash_key)); mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, - MLX5E_INDIR_RQT_SIZE, num_channels); + MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; @@ -2949,7 +2960,8 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->mdev = mdev; priv->netdev = netdev; - priv->params.num_channels = num_channels; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); @@ -2974,7 +2986,7 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } -static void mlx5e_build_netdev(struct net_device *netdev) +static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -3084,7 +3096,7 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) struct mlx5_mkey_seg *mkc; int inlen = sizeof(*in); u64 npages = - mlx5e_get_max_num_channels(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; + priv->profile->max_nch(mdev) * MLX5_CHANNEL_MAX_NUM_MTTS; int err; in = mlx5_vzalloc(inlen); @@ -3112,23 +3124,159 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) return err; } -static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) +static void mlx5e_nic_init(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + mlx5e_build_nic_netdev_priv(mdev, netdev, profile); + mlx5e_build_nic_netdev(netdev); + mlx5e_vxlan_init(priv); +} + +static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) +{ + mlx5e_vxlan_cleanup(priv); +} + +static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int err; + int i; + + err = mlx5e_create_indirect_rqts(priv); + if (err) { + mlx5_core_warn(mdev, "create indirect rqts failed, %d\n", err); + return err; + } + + err = mlx5e_create_direct_rqts(priv); + if (err) { + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + goto err_destroy_indirect_rqts; + } + + err = mlx5e_create_indirect_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "create indirect tirs failed, %d\n", err); + goto err_destroy_direct_rqts; + } + + err = mlx5e_create_direct_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_indirect_tirs; + } + + err = mlx5e_create_flow_steering(priv); + if (err) { + mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); + goto err_destroy_direct_tirs; + } + + err = mlx5e_tc_init(priv); + if (err) + goto err_destroy_flow_steering; + + return 0; + +err_destroy_flow_steering: + mlx5e_destroy_flow_steering(priv); +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_indirect_tirs: + mlx5e_destroy_indirect_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->profile->max_nch(mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +err_destroy_indirect_rqts: + mlx5e_destroy_rqt(priv, &priv->indir_rqt); + return err; +} + +static void mlx5e_cleanup_nic_rx(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_tc_cleanup(priv); + mlx5e_destroy_flow_steering(priv); + mlx5e_destroy_direct_tirs(priv); + mlx5e_destroy_indirect_tirs(priv); + for (i = 0; i < priv->profile->max_nch(priv->mdev); i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + mlx5e_destroy_rqt(priv, &priv->indir_rqt); +} + +static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + +#ifdef CONFIG_MLX5_CORE_EN_DCB + mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); +#endif + return 0; +} + +static void mlx5e_nic_enable(struct mlx5e_priv *priv) +{ + struct net_device *netdev = priv->netdev; + struct mlx5_core_dev *mdev = priv->mdev; + + if (mlx5e_vxlan_allowed(mdev)) { + rtnl_lock(); + udp_tunnel_get_rx_info(netdev); + rtnl_unlock(); + } + + mlx5e_enable_async_events(priv); + queue_work(priv->wq, &priv->set_rx_mode_work); +} + +static void mlx5e_nic_disable(struct mlx5e_priv *priv) +{ + queue_work(priv->wq, &priv->set_rx_mode_work); + mlx5e_disable_async_events(priv); +} + +static const struct mlx5e_profile mlx5e_nic_profile = { + .init = mlx5e_nic_init, + .cleanup = mlx5e_nic_cleanup, + .init_rx = mlx5e_init_nic_rx, + .cleanup_rx = mlx5e_cleanup_nic_rx, + .init_tx = mlx5e_init_nic_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .enable = mlx5e_nic_enable, + .disable = mlx5e_nic_disable, + .update_stats = mlx5e_update_stats, + .max_nch = mlx5e_get_max_num_channels, + .max_tc = MLX5E_MAX_NUM_TC, +}; + +static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile) { struct net_device *netdev; struct mlx5e_priv *priv; - int nch = mlx5e_get_max_num_channels(mdev); + int nch = profile->max_nch(mdev); int err; netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), - nch * MLX5E_MAX_NUM_TC, + nch * profile->max_tc, nch); if (!netdev) { mlx5_core_err(mdev, "alloc_etherdev_mqs() failed\n"); return NULL; } - mlx5e_build_netdev_priv(mdev, netdev, nch); - mlx5e_build_netdev(netdev); + profile->init(mdev, netdev, profile); netif_carrier_off(netdev); @@ -3144,85 +3292,44 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev) goto err_destroy_wq; } - err = mlx5e_create_tises(priv); - if (err) { - mlx5_core_warn(mdev, "create tises failed, %d\n", err); + err = profile->init_tx(priv); + if (err) goto err_destroy_umr_mkey; - } err = mlx5e_open_drop_rq(priv); if (err) { mlx5_core_err(mdev, "open drop rq failed, %d\n", err); - goto err_destroy_tises; + goto err_cleanup_tx; } - err = mlx5e_create_rqts(priv); - if (err) { - mlx5_core_warn(mdev, "create rqts failed, %d\n", err); + err = profile->init_rx(priv); + if (err) goto err_close_drop_rq; - } - - err = mlx5e_create_tirs(priv); - if (err) { - mlx5_core_warn(mdev, "create tirs failed, %d\n", err); - goto err_destroy_rqts; - } - - err = mlx5e_create_flow_steering(priv); - if (err) { - mlx5_core_warn(mdev, "create flow steering failed, %d\n", err); - goto err_destroy_tirs; - } mlx5e_create_q_counter(priv); mlx5e_init_l2_addr(priv); - mlx5e_vxlan_init(priv); - - err = mlx5e_tc_init(priv); - if (err) - goto err_dealloc_q_counters; - -#ifdef CONFIG_MLX5_CORE_EN_DCB - mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets); -#endif - err = register_netdev(netdev); if (err) { mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_tc_cleanup; + goto err_dealloc_q_counters; } - if (mlx5e_vxlan_allowed(mdev)) { - rtnl_lock(); - udp_tunnel_get_rx_info(netdev); - rtnl_unlock(); - } - - mlx5e_enable_async_events(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); + if (profile->enable) + profile->enable(priv); return priv; -err_tc_cleanup: - mlx5e_tc_cleanup(priv); - err_dealloc_q_counters: mlx5e_destroy_q_counter(priv); - mlx5e_destroy_flow_steering(priv); - -err_destroy_tirs: - mlx5e_destroy_tirs(priv); - -err_destroy_rqts: - mlx5e_destroy_rqts(priv); + profile->cleanup_rx(priv); err_close_drop_rq: mlx5e_close_drop_rq(priv); -err_destroy_tises: - mlx5e_destroy_tises(priv); +err_cleanup_tx: + profile->cleanup_tx(priv); err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); @@ -3246,7 +3353,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (mlx5e_create_mdev_resources(mdev)) return NULL; - ret = mlx5e_create_netdev(mdev); + ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile); if (!ret) { mlx5e_destroy_mdev_resources(mdev); return NULL; @@ -3254,15 +3361,15 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return ret; } -static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, - struct mlx5e_priv *priv) +static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) { + const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; set_bit(MLX5E_STATE_DESTROYING, &priv->state); + if (profile->disable) + profile->disable(priv); - queue_work(priv->wq, &priv->set_rx_mode_work); - mlx5e_disable_async_events(priv); flush_workqueue(priv->wq); if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { netif_device_detach(netdev); @@ -3271,17 +3378,15 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, unregister_netdev(netdev); } - mlx5e_tc_cleanup(priv); - mlx5e_vxlan_cleanup(priv); mlx5e_destroy_q_counter(priv); - mlx5e_destroy_flow_steering(priv); - mlx5e_destroy_tirs(priv); - mlx5e_destroy_rqts(priv); + profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); - mlx5e_destroy_tises(priv); + profile->cleanup_tx(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); cancel_delayed_work_sync(&priv->update_stats_work); destroy_workqueue(priv->wq); + if (profile->cleanup) + profile->cleanup(priv); if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) free_netdev(netdev); From 127ea380acc9de16c2cbd57ed99475944c9917ec Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 1 Jul 2016 14:51:08 +0300 Subject: [PATCH 15/16] net/mlx5: Add Representors registration API Introduce E-Switch registration/unregister representors functions. Those functions are called by the mlx5e driver when the PF NIC is created upon pci probe action regardless of the E-Switch mode (NONE, LEGACY or OFFLOADS). Adding basic E-Switch database that will hold the vport represntors upon creation. This patch doesn't add any new functionality. Signed-off-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 60 +++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/eswitch.c | 10 ++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 12 ++++ .../mellanox/mlx5/core/eswitch_offloads.c | 19 ++++++ 5 files changed, 97 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index edfc9beb1d2c..081259a4edc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -571,7 +571,7 @@ enum { struct mlx5e_profile { void (*init)(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile); + const struct mlx5e_profile *profile, void *ppriv); void (*cleanup)(struct mlx5e_priv *priv); int (*init_rx)(struct mlx5e_priv *priv); void (*cleanup_rx)(struct mlx5e_priv *priv); @@ -618,6 +618,7 @@ struct mlx5e_priv { struct mlx5e_tstamp tstamp; u16 q_counter; const struct mlx5e_profile *profile; + void *ppriv; }; enum mlx5e_link_mode { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3e22c5e8dcf6..2c9e45893316 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2880,7 +2880,8 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile) + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); u32 link_speed = 0; @@ -2962,6 +2963,7 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->netdev = netdev; priv->params.num_channels = profile->max_nch(mdev); priv->profile = profile; + priv->ppriv = ppriv; #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); @@ -3126,18 +3128,25 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) static void mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev, - const struct mlx5e_profile *profile) + const struct mlx5e_profile *profile, + void *ppriv) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5e_build_nic_netdev_priv(mdev, netdev, profile); + mlx5e_build_nic_netdev_priv(mdev, netdev, profile, ppriv); mlx5e_build_nic_netdev(netdev); mlx5e_vxlan_init(priv); } static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + mlx5e_vxlan_cleanup(priv); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5_eswitch_unregister_vport_rep(esw, 0); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) @@ -3229,6 +3238,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) { struct net_device *netdev = priv->netdev; struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_eswitch *esw = mdev->priv.eswitch; + struct mlx5_eswitch_rep rep; if (mlx5e_vxlan_allowed(mdev)) { rtnl_lock(); @@ -3238,6 +3249,12 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5e_enable_async_events(priv); queue_work(priv->wq, &priv->set_rx_mode_work); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rep.vport = 0; + rep.priv_data = priv; + mlx5_eswitch_register_vport_rep(esw, &rep); + } } static void mlx5e_nic_disable(struct mlx5e_priv *priv) @@ -3261,7 +3278,7 @@ static const struct mlx5e_profile mlx5e_nic_profile = { }; static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile) + const struct mlx5e_profile *profile, void *ppriv) { struct net_device *netdev; struct mlx5e_priv *priv; @@ -3276,7 +3293,7 @@ static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, return NULL; } - profile->init(mdev, netdev, profile); + profile->init(mdev, netdev, profile, ppriv); netif_carrier_off(netdev); @@ -3343,8 +3360,27 @@ err_free_netdev: return NULL; } +static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + int vport; + + if (!MLX5_CAP_GEN(mdev, vport_group_manager)) + return; + + for (vport = 1; vport < total_vfs; vport++) { + struct mlx5_eswitch_rep rep; + + rep.vport = vport; + mlx5_eswitch_register_vport_rep(esw, &rep); + } +} + static void *mlx5e_add(struct mlx5_core_dev *mdev) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + void *ppriv = NULL; void *ret; if (mlx5e_check_required_hca_cap(mdev)) @@ -3353,7 +3389,12 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) if (mlx5e_create_mdev_resources(mdev)) return NULL; - ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile); + mlx5e_register_vport_rep(mdev); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + ppriv = &esw->offloads.vport_reps[0]; + + ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); if (!ret) { mlx5e_destroy_mdev_resources(mdev); return NULL; @@ -3394,9 +3435,16 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv * static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) { + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); struct mlx5e_priv *priv = vpriv; + int vport; mlx5e_destroy_netdev(mdev, priv); + + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + mlx5e_destroy_mdev_resources(mdev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 12f509c8d65d..f0a973557f7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1663,6 +1663,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + esw->offloads.vport_reps = + kzalloc(total_vports * sizeof(struct mlx5_eswitch_rep), + GFP_KERNEL); + if (!esw->offloads.vport_reps) { + err = -ENOMEM; + goto abort; + } + mutex_init(&esw->state_lock); for (vport_num = 0; vport_num < total_vports; vport_num++) { @@ -1687,6 +1695,7 @@ abort: destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); kfree(esw->vports); + kfree(esw->offloads.vport_reps); kfree(esw); return err; } @@ -1704,6 +1713,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) destroy_workqueue(esw->work_queue); kfree(esw->l2_table.bitmap); kfree(esw->mc_promisc); + kfree(esw->offloads.vport_reps); kfree(esw->vports); kfree(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 7843f981509d..ffe5eaba626d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -156,9 +156,17 @@ enum { SRIOV_OFFLOADS }; + +struct mlx5_eswitch_rep { + u16 vport; + void *priv_data; + bool valid; +}; + struct mlx5_esw_offload { struct mlx5_flow_table *ft_offloads; struct mlx5_flow_group *vport_rx_group; + struct mlx5_eswitch_rep *vport_reps; }; struct mlx5_eswitch { @@ -208,6 +216,10 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn) int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 312b6f31fd65..f84aa794d080 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -451,3 +451,22 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) return 0; } + +void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + memcpy(&offloads->vport_reps[rep->vport], rep, + sizeof(struct mlx5_eswitch_rep)); + + offloads->vport_reps[rep->vport].valid = true; +} + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + + offloads->vport_reps[vport].valid = false; +} From cb67b832921cfa20ad79bafdc51f1745339d0557 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 1 Jul 2016 14:51:09 +0300 Subject: [PATCH 16/16] net/mlx5e: Introduce SRIOV VF representors Implement the relevant profile functions to create mlx5e driver instance serving as VF representor. When SRIOV offloads mode is enabled, each VF will have a representor netdevice instance on the host. To do that, we also export set of shared service functions from en_main.c, such that they can be used by both NIC and repsresentors netdevs. The newly created representor netdevice has a basic set of net_device_ops which are the same ndo functions as the NIC netdevice and an ndo of it's own for phys port name. The profiling infrastructure allow sharing code between the NIC and the vport representor even though the representor has only a subset of the NIC functionality. The VF reps and the PF which is used in that mode to represent the uplink, expose switchdev ops. Currently the only op supposed is attr get for the port parent ID which here serves to identify net-devices belonging to the same HW E-Switch. Other than that, no offloading is implemented and hence switching functionality is achieved if one sets SW switching rules, e.g using tc, bridge or ovs. Port phys name (ndo_get_phys_port_name) is implemented to allow exporting to user-space the VF vport number and along with the switchdev port parent id (phys_switch_id) enable a udev base consistent naming scheme: SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="", \ ATTR{phys_port_name}!="", NAME="$PF_NIC$attr{phys_port_name}" where phys_switch_id is exposed by the PF (and VF reps) and $PF_NIC is the name of the PF netdevice. Signed-off-by: Hadar Hen Zion Signed-off-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 28 ++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 53 ++- .../net/ethernet/mellanox/mlx5/core/en_rep.c | 394 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/eswitch.h | 20 +- .../mellanox/mlx5/core/eswitch_offloads.c | 96 ++++- 6 files changed, 574 insertions(+), 19 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9b14dadd9309..a574deabdda8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -8,6 +8,6 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \ - en_tc.o en_arfs.o + en_tc.o en_arfs.o en_rep.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 081259a4edc0..00643a116492 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -44,6 +44,7 @@ #include #include #include +#include #include "wq.h" #include "mlx5_core.h" #include "en_stats.h" @@ -816,4 +817,31 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev); void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev); int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev); +struct mlx5_eswitch_rep; +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); + +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); +int mlx5e_create_tises(struct mlx5e_priv *priv); +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); +int mlx5e_close(struct net_device *netdev); +int mlx5e_open(struct net_device *netdev); +void mlx5e_update_stats_work(struct work_struct *work); +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv); +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +struct rtnl_link_stats64 * +mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); + #endif /* __MLX5_EN_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2c9e45893316..96ec53a6a595 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -226,7 +226,7 @@ void mlx5e_update_stats(struct mlx5e_priv *priv) mlx5e_update_sw_counters(priv); } -static void mlx5e_update_stats_work(struct work_struct *work) +void mlx5e_update_stats_work(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct mlx5e_priv *priv = container_of(dwork, struct mlx5e_priv, @@ -1518,7 +1518,7 @@ static int mlx5e_create_rqt(struct mlx5e_priv *priv, int sz, return err; } -static void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) +void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt) { rqt->enabled = false; mlx5_core_destroy_rqt(priv->mdev, rqt->rqtn); @@ -1531,7 +1531,7 @@ static int mlx5e_create_indirect_rqts(struct mlx5e_priv *priv) return mlx5e_create_rqt(priv, MLX5E_INDIR_RQT_SIZE, 0, rqt); } -static int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) +int mlx5e_create_direct_rqts(struct mlx5e_priv *priv) { struct mlx5e_rqt *rqt; int err; @@ -1743,6 +1743,7 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev) int mlx5e_open_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; int num_txqs; int err; @@ -1778,9 +1779,14 @@ int mlx5e_open_locked(struct net_device *netdev) #ifdef CONFIG_RFS_ACCEL priv->netdev->rx_cpu_rmap = priv->mdev->rmap; #endif + if (priv->profile->update_stats) + queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - queue_delayed_work(priv->wq, &priv->update_stats_work, 0); - + if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + err = mlx5e_add_sqs_fwd_rules(priv); + if (err) + goto err_close_channels; + } return 0; err_close_channels: @@ -1790,7 +1796,7 @@ err_clear_state_opened_flag: return err; } -static int mlx5e_open(struct net_device *netdev) +int mlx5e_open(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1805,6 +1811,7 @@ static int mlx5e_open(struct net_device *netdev) int mlx5e_close_locked(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; /* May already be CLOSED in case a previous configuration operation * (e.g RX/TX queue size change) that involves close&open failed. @@ -1814,6 +1821,9 @@ int mlx5e_close_locked(struct net_device *netdev) clear_bit(MLX5E_STATE_OPENED, &priv->state); + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + mlx5e_remove_sqs_fwd_rules(priv); + mlx5e_timestamp_cleanup(priv); netif_carrier_off(priv->netdev); mlx5e_redirect_rqts(priv); @@ -1822,7 +1832,7 @@ int mlx5e_close_locked(struct net_device *netdev) return 0; } -static int mlx5e_close(struct net_device *netdev) +int mlx5e_close(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); int err; @@ -1957,7 +1967,7 @@ static void mlx5e_destroy_tis(struct mlx5e_priv *priv, int tc) mlx5_core_destroy_tis(priv->mdev, priv->tisn[tc]); } -static int mlx5e_create_tises(struct mlx5e_priv *priv) +int mlx5e_create_tises(struct mlx5e_priv *priv) { int err; int tc; @@ -1977,7 +1987,7 @@ err_close_tises: return err; } -static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) +void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) { int tc; @@ -2143,7 +2153,7 @@ err_destroy_tirs: return err; } -static int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) +int mlx5e_create_direct_tirs(struct mlx5e_priv *priv) { int nch = priv->profile->max_nch(priv->mdev); struct mlx5e_tir *tir; @@ -2190,7 +2200,7 @@ static void mlx5e_destroy_indirect_tirs(struct mlx5e_priv *priv) mlx5e_destroy_tir(priv->mdev, &priv->indir_tir[i]); } -static void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) +void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv) { int nch = priv->profile->max_nch(priv->mdev); int i; @@ -2270,7 +2280,7 @@ mqprio: return mlx5e_setup_tc(dev, tc->tc); } -static struct rtnl_link_stats64 * +struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) { struct mlx5e_priv *priv = netdev_priv(dev); @@ -2988,6 +2998,10 @@ static void mlx5e_set_netdev_dev_addr(struct net_device *netdev) } } +static const struct switchdev_ops mlx5e_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + static void mlx5e_build_nic_netdev(struct net_device *netdev) { struct mlx5e_priv *priv = netdev_priv(netdev); @@ -3069,6 +3083,11 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev) netdev->priv_flags |= IFF_UNICAST_FLT; mlx5e_set_netdev_dev_addr(netdev); + +#ifdef CONFIG_NET_SWITCHDEV + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + netdev->switchdev_ops = &mlx5e_switchdev_ops; +#endif } static void mlx5e_create_q_counter(struct mlx5e_priv *priv) @@ -3251,6 +3270,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) queue_work(priv->wq, &priv->set_rx_mode_work); if (MLX5_CAP_GEN(mdev, vport_group_manager)) { + rep.load = mlx5e_nic_rep_load; + rep.unload = mlx5e_nic_rep_unload; rep.vport = 0; rep.priv_data = priv; mlx5_eswitch_register_vport_rep(esw, &rep); @@ -3277,8 +3298,8 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .max_tc = MLX5E_MAX_NUM_TC, }; -static void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv) +void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, void *ppriv) { struct net_device *netdev; struct mlx5e_priv *priv; @@ -3372,6 +3393,8 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) for (vport = 1; vport < total_vfs; vport++) { struct mlx5_eswitch_rep rep; + rep.load = mlx5e_vport_rep_load; + rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; mlx5_eswitch_register_vport_rep(esw, &rep); } @@ -3402,7 +3425,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev) return ret; } -static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) { const struct mlx5e_profile *profile = priv->profile; struct net_device *netdev = priv->netdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c new file mode 100644 index 000000000000..5ef02f02a1d5 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include + +#include "eswitch.h" +#include "en.h" + +static const char mlx5e_rep_driver_name[] = "mlx5e_rep"; + +static void mlx5e_rep_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strlcpy(drvinfo->driver, mlx5e_rep_driver_name, + sizeof(drvinfo->driver)); + strlcpy(drvinfo->version, UTS_RELEASE, sizeof(drvinfo->version)); +} + +static const struct counter_desc sw_rep_stats_desc[] = { + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, +}; + +#define NUM_VPORT_REP_COUNTERS ARRAY_SIZE(sw_rep_stats_desc) + +static void mlx5e_rep_get_strings(struct net_device *dev, + u32 stringset, uint8_t *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + strcpy(data + (i * ETH_GSTRING_LEN), + sw_rep_stats_desc[i].format); + break; + } +} + +static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv) +{ + struct mlx5e_sw_stats *s = &priv->stats.sw; + struct mlx5e_rq_stats *rq_stats; + struct mlx5e_sq_stats *sq_stats; + int i, j; + + memset(s, 0, sizeof(*s)); + for (i = 0; i < priv->params.num_channels; i++) { + rq_stats = &priv->channel[i]->rq.stats; + + s->rx_packets += rq_stats->packets; + s->rx_bytes += rq_stats->bytes; + + for (j = 0; j < priv->params.num_tc; j++) { + sq_stats = &priv->channel[i]->sq[j].stats; + + s->tx_packets += sq_stats->packets; + s->tx_bytes += sq_stats->bytes; + } + } +} + +static void mlx5e_rep_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i; + + if (!data) + return; + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_update_sw_rep_counters(priv); + mutex_unlock(&priv->state_lock); + + for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++) + data[i] = MLX5E_READ_CTR64_CPU(&priv->stats.sw, + sw_rep_stats_desc, i); +} + +static int mlx5e_rep_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return NUM_VPORT_REP_COUNTERS; + default: + return -EOPNOTSUPP; + } +} + +static const struct ethtool_ops mlx5e_rep_ethtool_ops = { + .get_drvinfo = mlx5e_rep_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = mlx5e_rep_get_strings, + .get_sset_count = mlx5e_rep_get_sset_count, + .get_ethtool_stats = mlx5e_rep_get_ethtool_stats, +}; + +int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + u8 mac[ETH_ALEN]; + + if (esw->mode == SRIOV_NONE) + return -EOPNOTSUPP; + + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: + mlx5_query_nic_vport_mac_address(priv->mdev, 0, mac); + attr->u.ppid.id_len = ETH_ALEN; + memcpy(&attr->u.ppid.id, &mac, ETH_ALEN); + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) + +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5e_channel *c; + int n, tc, err, num_sqs = 0; + u16 *sqs; + + sqs = kcalloc(priv->params.num_channels * priv->params.num_tc, sizeof(u16), GFP_KERNEL); + if (!sqs) + return -ENOMEM; + + for (n = 0; n < priv->params.num_channels; n++) { + c = priv->channel[n]; + for (tc = 0; tc < c->num_tc; tc++) + sqs[num_sqs++] = c->sq[tc].sqn; + } + + err = mlx5_eswitch_sqs2vport_start(esw, rep, sqs, num_sqs); + + kfree(sqs); + return err; +} + +int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + return mlx5e_add_sqs_fwd_rules(priv); + return 0; +} + +void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + + mlx5_eswitch_sqs2vport_stop(esw, rep); +} + +void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + mlx5e_remove_sqs_fwd_rules(priv); +} + +static int mlx5e_rep_get_phys_port_name(struct net_device *dev, + char *buf, size_t len) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_eswitch_rep *rep = priv->ppriv; + int ret; + + ret = snprintf(buf, len, "%d", rep->vport - 1); + if (ret >= len) + return -EOPNOTSUPP; + + return 0; +} + +static const struct switchdev_ops mlx5e_rep_switchdev_ops = { + .switchdev_port_attr_get = mlx5e_attr_get, +}; + +static const struct net_device_ops mlx5e_netdev_ops_rep = { + .ndo_open = mlx5e_open, + .ndo_stop = mlx5e_close, + .ndo_start_xmit = mlx5e_xmit, + .ndo_get_phys_port_name = mlx5e_rep_get_phys_port_name, + .ndo_get_stats64 = mlx5e_get_stats, +}; + +static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + + priv->params.log_sq_size = + MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE; + priv->params.rq_wq_type = MLX5_WQ_TYPE_LINKED_LIST; + priv->params.log_rq_size = MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE; + + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); + + priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); + priv->params.num_tc = 1; + + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + mutex_init(&priv->state_lock); + + INIT_DELAYED_WORK(&priv->update_stats_work, mlx5e_update_stats_work); +} + +static void mlx5e_build_rep_netdev(struct net_device *netdev) +{ + netdev->netdev_ops = &mlx5e_netdev_ops_rep; + + netdev->watchdog_timeo = 15 * HZ; + + netdev->ethtool_ops = &mlx5e_rep_ethtool_ops; + +#ifdef CONFIG_NET_SWITCHDEV + netdev->switchdev_ops = &mlx5e_rep_switchdev_ops; +#endif + + netdev->features |= NETIF_F_VLAN_CHALLENGED; + + eth_hw_addr_random(netdev); +} + +static void mlx5e_init_rep(struct mlx5_core_dev *mdev, + struct net_device *netdev, + const struct mlx5e_profile *profile, + void *ppriv) +{ + mlx5e_build_rep_netdev_priv(mdev, netdev, profile, ppriv); + mlx5e_build_rep_netdev(netdev); +} + +static int mlx5e_init_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_eswitch_rep *rep = priv->ppriv; + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5_flow_rule *flow_rule; + int err; + int i; + + err = mlx5e_create_direct_rqts(priv); + if (err) { + mlx5_core_warn(mdev, "create direct rqts failed, %d\n", err); + return err; + } + + err = mlx5e_create_direct_tirs(priv); + if (err) { + mlx5_core_warn(mdev, "create direct tirs failed, %d\n", err); + goto err_destroy_direct_rqts; + } + + flow_rule = mlx5_eswitch_create_vport_rx_rule(esw, + rep->vport, + priv->direct_tir[0].tirn); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + goto err_destroy_direct_tirs; + } + rep->vport_rx_rule = flow_rule; + + return 0; + +err_destroy_direct_tirs: + mlx5e_destroy_direct_tirs(priv); +err_destroy_direct_rqts: + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); + return err; +} + +static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = priv->ppriv; + int i; + + mlx5_del_flow_rule(rep->vport_rx_rule); + mlx5e_destroy_direct_tirs(priv); + for (i = 0; i < priv->params.num_channels; i++) + mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt); +} + +static int mlx5e_init_rep_tx(struct mlx5e_priv *priv) +{ + int err; + + err = mlx5e_create_tises(priv); + if (err) { + mlx5_core_warn(priv->mdev, "create tises failed, %d\n", err); + return err; + } + return 0; +} + +static int mlx5e_get_rep_max_num_channels(struct mlx5_core_dev *mdev) +{ +#define MLX5E_PORT_REPRESENTOR_NCH 1 + return MLX5E_PORT_REPRESENTOR_NCH; +} + +static struct mlx5e_profile mlx5e_rep_profile = { + .init = mlx5e_init_rep, + .init_rx = mlx5e_init_rep_rx, + .cleanup_rx = mlx5e_cleanup_rep_rx, + .init_tx = mlx5e_init_rep_tx, + .cleanup_tx = mlx5e_cleanup_nic_tx, + .update_stats = mlx5e_update_sw_rep_counters, + .max_nch = mlx5e_get_rep_max_num_channels, + .max_tc = 1, +}; + +int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!rep->priv_data) { + pr_warn("Failed to create representor for vport %d\n", + rep->vport); + return -EINVAL; + } + return 0; +} + +void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5e_priv *priv = rep->priv_data; + + mlx5e_destroy_netdev(esw->dev, priv); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index ffe5eaba626d..7b45e6a6efb8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -47,6 +47,8 @@ #define MLX5_L2_ADDR_HASH_SIZE (BIT(BITS_PER_BYTE)) #define MLX5_L2_ADDR_HASH(addr) (addr[5]) +#define FDB_UPLINK_VPORT 0xffff + /* L2 -mac address based- hash helpers */ struct l2addr_node { struct hlist_node hlist; @@ -156,10 +158,20 @@ enum { SRIOV_OFFLOADS }; +struct mlx5_esw_sq { + struct mlx5_flow_rule *send_to_vport_rule; + struct list_head list; +}; struct mlx5_eswitch_rep { + int (*load)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + void (*unload)(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); u16 vport; + struct mlx5_flow_rule *vport_rx_rule; void *priv_data; + struct list_head vport_sqs_list; bool valid; }; @@ -208,12 +220,16 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, int vport, struct ifla_vf_stats *vf_stats); -struct mlx5_flow_rule * -mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num); +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep); + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index f84aa794d080..ed8ad988f07a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -38,7 +38,7 @@ #include "mlx5_core.h" #include "eswitch.h" -struct mlx5_flow_rule * +static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { struct mlx5_flow_destination dest; @@ -77,6 +77,63 @@ out: return flow_rule; } +void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep) +{ + struct mlx5_esw_sq *esw_sq, *tmp; + + if (esw->mode != SRIOV_OFFLOADS) + return; + + list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) { + mlx5_del_flow_rule(esw_sq->send_to_vport_rule); + list_del(&esw_sq->list); + kfree(esw_sq); + } +} + +int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, + struct mlx5_eswitch_rep *rep, + u16 *sqns_array, int sqns_num) +{ + struct mlx5_flow_rule *flow_rule; + struct mlx5_esw_sq *esw_sq; + int vport; + int err; + int i; + + if (esw->mode != SRIOV_OFFLOADS) + return 0; + + vport = rep->vport == 0 ? + FDB_UPLINK_VPORT : rep->vport; + + for (i = 0; i < sqns_num; i++) { + esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); + if (!esw_sq) { + err = -ENOMEM; + goto out_err; + } + + /* Add re-inject rule to the PF/representor sqs */ + flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, + vport, + sqns_array[i]); + if (IS_ERR(flow_rule)) { + err = PTR_ERR(flow_rule); + kfree(esw_sq); + goto out_err; + } + esw_sq->send_to_vport_rule = flow_rule; + list_add(&esw_sq->list, &rep->vport_sqs_list); + } + return 0; + +out_err: + mlx5_eswitch_sqs2vport_stop(esw, rep); + return err; +} + static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw) { struct mlx5_flow_destination dest; @@ -347,6 +404,8 @@ static int esw_offloads_start(struct mlx5_eswitch *esw) int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) { + struct mlx5_eswitch_rep *rep; + int vport; int err; err = esw_create_offloads_fdb_table(esw, nvports); @@ -361,8 +420,26 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int nvports) if (err) goto create_fg_err; + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + + err = rep->load(esw, rep); + if (err) + goto err_reps; + } return 0; +err_reps: + for (vport--; vport >= 0; vport--) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + esw_destroy_vport_rx_group(esw); + create_fg_err: esw_destroy_offloads_table(esw); @@ -385,6 +462,16 @@ static int esw_offloads_stop(struct mlx5_eswitch *esw) void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports) { + struct mlx5_eswitch_rep *rep; + int vport; + + for (vport = 0; vport < nvports; vport++) { + rep = &esw->offloads.vport_reps[vport]; + if (!rep->valid) + continue; + rep->unload(esw, rep); + } + esw_destroy_vport_rx_group(esw); esw_destroy_offloads_table(esw); esw_destroy_offloads_fdb_table(esw); @@ -460,6 +547,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, memcpy(&offloads->vport_reps[rep->vport], rep, sizeof(struct mlx5_eswitch_rep)); + INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); offloads->vport_reps[rep->vport].valid = true; } @@ -467,6 +555,12 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, int vport) { struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + rep->unload(esw, rep); offloads->vport_reps[vport].valid = false; }