From 1466cc5b23d18e7b6b8f1a45443d595393dbcae7 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Thu, 23 Jun 2016 17:02:37 +0300 Subject: [PATCH 01/10] net/mlx5: Rate limit tables support Configuring and managing HW rate limit tables. The HW holds a table of rate limits, each rate is associated with an index in that table. Later a Send Queue uses this index to set the rate limit. Multiple Send Queues can have the same rate limit, which is represented by a single entry in this table. Even though a rate can be shared, each queue is being rate limited independently of others. The SW shadow of this table holds the rate itself, the index in the HW table and the refcount (number of queues) working with this rate. The exported functions are mlx5_rl_add_rate and mlx5_rl_remove_rate. Number of different rates and their values are derived from HW capabilities. Signed-off-by: Yevgeny Petrilin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 5 +- drivers/net/ethernet/mellanox/mlx5/core/fw.c | 6 + .../net/ethernet/mellanox/mlx5/core/main.c | 10 + drivers/net/ethernet/mellanox/mlx5/core/rl.c | 209 ++++++++++++++++++ include/linux/mlx5/device.h | 4 + include/linux/mlx5/driver.h | 27 +++ 6 files changed, 259 insertions(+), 2 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rl.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 9ea7b583096a..0c8a7dcea483 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -1,8 +1,9 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ - health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ - mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o fs_counters.o + health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ + mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ + fs_counters.o rl.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 75c7ae6a5cc4..77fc1aa26114 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -151,6 +151,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, qos)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_QOS); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a19b59348dd6..08cae3485960 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1144,6 +1144,13 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) dev_err(&pdev->dev, "Failed to init flow steering\n"); goto err_fs; } + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_rl; + } + #ifdef CONFIG_MLX5_CORE_EN err = mlx5_eswitch_init(dev); if (err) { @@ -1183,6 +1190,8 @@ err_sriov: mlx5_eswitch_cleanup(dev->priv.eswitch); #endif err_reg_dev: + mlx5_cleanup_rl_table(dev); +err_rl: mlx5_cleanup_fs(dev); err_fs: mlx5_cleanup_mkey_table(dev); @@ -1253,6 +1262,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_eswitch_cleanup(dev->priv.eswitch); #endif + mlx5_cleanup_rl_table(dev); mlx5_cleanup_fs(dev); mlx5_cleanup_mkey_table(dev); mlx5_cleanup_srq_table(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c new file mode 100644 index 000000000000..c07c28bd3d55 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2013-2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "mlx5_core.h" + +/* Finds an entry where we can register the given rate + * If the rate already exists, return the entry where it is registered, + * otherwise return the first available entry. + * If the table is full, return NULL + */ +static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, + u32 rate) +{ + struct mlx5_rl_entry *ret_entry = NULL; + bool empty_found = false; + int i; + + for (i = 0; i < table->max_size; i++) { + if (table->rl_entry[i].rate == rate) + return &table->rl_entry[i]; + if (!empty_found && !table->rl_entry[i].rate) { + empty_found = true; + ret_entry = &table->rl_entry[i]; + } + } + + return ret_entry; +} + +static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, + u32 rate, u16 index) +{ + u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]; + u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)]; + + memset(in, 0, sizeof(in)); + memset(out, 0, sizeof(out)); + + MLX5_SET(set_rate_limit_in, in, opcode, + MLX5_CMD_OP_SET_RATE_LIMIT); + MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); + MLX5_SET(set_rate_limit_in, in, rate_limit, rate); + + return mlx5_cmd_exec_check_status(dev, in, sizeof(in), + out, sizeof(out)); +} + +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + + return (rate <= table->max_rate && rate >= table->min_rate); +} +EXPORT_SYMBOL(mlx5_rl_is_in_range); + +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry; + int err = 0; + + mutex_lock(&table->rl_lock); + + if (!rate || !mlx5_rl_is_in_range(dev, rate)) { + mlx5_core_err(dev, "Invalid rate: %u, should be %u to %u\n", + rate, table->min_rate, table->max_rate); + err = -EINVAL; + goto out; + } + + entry = find_rl_entry(table, rate); + if (!entry) { + mlx5_core_err(dev, "Max number of %u rates reached\n", + table->max_size); + err = -ENOSPC; + goto out; + } + if (entry->refcount) { + /* rate already configured */ + entry->refcount++; + } else { + /* new rate limit */ + err = mlx5_set_rate_limit_cmd(dev, rate, entry->index); + if (err) { + mlx5_core_err(dev, "Failed configuring rate: %u (%d)\n", + rate, err); + goto out; + } + entry->rate = rate; + entry->refcount = 1; + } + *index = entry->index; + +out: + mutex_unlock(&table->rl_lock); + return err; +} +EXPORT_SYMBOL(mlx5_rl_add_rate); + +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + struct mlx5_rl_entry *entry = NULL; + + /* 0 is a reserved value for unlimited rate */ + if (rate == 0) + return; + + mutex_lock(&table->rl_lock); + entry = find_rl_entry(table, rate); + if (!entry || !entry->refcount) { + mlx5_core_warn(dev, "Rate %u is not configured\n", rate); + goto out; + } + + entry->refcount--; + if (!entry->refcount) { + /* need to remove rate */ + mlx5_set_rate_limit_cmd(dev, 0, entry->index); + entry->rate = 0; + } + +out: + mutex_unlock(&table->rl_lock); +} +EXPORT_SYMBOL(mlx5_rl_remove_rate); + +int mlx5_init_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + int i; + + mutex_init(&table->rl_lock); + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, packet_pacing)) { + table->max_size = 0; + return 0; + } + + /* First entry is reserved for unlimited rate */ + table->max_size = MLX5_CAP_QOS(dev, packet_pacing_rate_table_size) - 1; + table->max_rate = MLX5_CAP_QOS(dev, packet_pacing_max_rate); + table->min_rate = MLX5_CAP_QOS(dev, packet_pacing_min_rate); + + table->rl_entry = kcalloc(table->max_size, sizeof(struct mlx5_rl_entry), + GFP_KERNEL); + if (!table->rl_entry) + return -ENOMEM; + + /* The index represents the index in HW rate limit table + * Index 0 is reserved for unlimited rate + */ + for (i = 0; i < table->max_size; i++) + table->rl_entry[i].index = i + 1; + + /* Index 0 is reserved */ + mlx5_core_info(dev, "Rate limit: %u rates are supported, range: %uMbps to %uMbps\n", + table->max_size, + table->min_rate >> 10, + table->max_rate >> 10); + + return 0; +} + +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev) +{ + struct mlx5_rl_table *table = &dev->priv.rl_table; + int i; + + /* Clear all configured rates */ + for (i = 0; i < table->max_size; i++) + if (table->rl_entry[i].rate) + mlx5_set_rate_limit_cmd(dev, 0, + table->rl_entry[i].index); + + kfree(dev->priv.rl_table.rl_entry); +} diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 73a48479892d..e0a3ed758287 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1330,6 +1330,7 @@ enum mlx5_cap_type { MLX5_CAP_ESWITCH, MLX5_CAP_RESERVED, MLX5_CAP_VECTOR_CALC, + MLX5_CAP_QOS, /* NUM OF CAP Types */ MLX5_CAP_NUM }; @@ -1414,6 +1415,9 @@ enum mlx5_cap_type { MLX5_GET(vector_calc_cap, \ mdev->hca_caps_cur[MLX5_CAP_VECTOR_CALC], cap) +#define MLX5_CAP_QOS(mdev, cap)\ + MLX5_GET(qos_cap, mdev->hca_caps_cur[MLX5_CAP_QOS], cap) + enum { MLX5_CMD_STAT_OK = 0x0, MLX5_CMD_STAT_INT_ERR = 0x1, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 80776d0c52dc..46260fdc5305 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -481,6 +481,21 @@ struct mlx5_fc_stats { struct mlx5_eswitch; +struct mlx5_rl_entry { + u32 rate; + u16 index; + u16 refcount; +}; + +struct mlx5_rl_table { + /* protect rate limit table */ + struct mutex rl_lock; + u16 max_size; + u32 max_rate; + u32 min_rate; + struct mlx5_rl_entry *rl_entry; +}; + struct mlx5_priv { char name[MLX5_MAX_NAME_LEN]; struct mlx5_eq_table eq_table; @@ -544,6 +559,7 @@ struct mlx5_priv { struct mlx5_flow_root_namespace *esw_ingress_root_ns; struct mlx5_fc_stats fc_stats; + struct mlx5_rl_table rl_table; }; enum mlx5_device_state { @@ -861,6 +877,12 @@ int mlx5_query_odp_caps(struct mlx5_core_dev *dev, int mlx5_core_query_ib_ppcnt(struct mlx5_core_dev *dev, u8 port_num, void *out, size_t sz); +int mlx5_init_rl_table(struct mlx5_core_dev *dev); +void mlx5_cleanup_rl_table(struct mlx5_core_dev *dev); +int mlx5_rl_add_rate(struct mlx5_core_dev *dev, u32 rate, u16 *index); +void mlx5_rl_remove_rate(struct mlx5_core_dev *dev, u32 rate); +bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate); + static inline int fw_initializing(struct mlx5_core_dev *dev) { return ioread32be(&dev->iseg->initializing) >> 31; @@ -938,6 +960,11 @@ static inline int mlx5_get_gid_table_len(u16 param) return 8 * (1 << param); } +static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev) +{ + return !!(dev->priv.rl_table.max_size); +} + enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; From 507f0c817f7a28bbf4facb3a8dca72a68bc25248 Mon Sep 17 00:00:00 2001 From: Yevgeny Petrilin Date: Thu, 23 Jun 2016 17:02:38 +0300 Subject: [PATCH 02/10] net/mlx5e: Add TXQ set max rate support Implement set_maxrate ndo. Use the rate index from the hardware table to attach to channel SQ/TXQ. In case of failure to configure new rate, the queue remains with unlimited rate. We save the configuration on priv structure and apply it each time Send Queues are being reinitialized (after open/close) operations. Signed-off-by: Yevgeny Petrilin Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 + .../net/ethernet/mellanox/mlx5/core/en_main.c | 102 +++++++++++++++++- 2 files changed, 102 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e8a6c3325b39..017e047965a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -88,6 +88,7 @@ #define MLX5E_LOG_INDIR_RQT_SIZE 0x7 #define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE) #define MLX5E_MAX_NUM_CHANNELS (MLX5E_INDIR_RQT_SIZE >> 1) +#define MLX5E_MAX_NUM_SQS (MLX5E_MAX_NUM_CHANNELS * MLX5E_MAX_NUM_TC) #define MLX5E_TX_CQ_POLL_BUDGET 128 #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 @@ -354,6 +355,7 @@ struct mlx5e_sq { struct mlx5e_channel *channel; int tc; struct mlx5e_ico_wqe_info *ico_wqe_info; + u32 rate_limit; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -530,6 +532,7 @@ struct mlx5e_priv { u32 indir_rqtn; u32 indir_tirn[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_direct_tir direct_tir[MLX5E_MAX_NUM_CHANNELS]; + u32 tx_rates[MLX5E_MAX_NUM_SQS]; struct mlx5e_flow_steering fs; struct mlx5e_vxlan_db vxlan; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 8b7c6f381706..e5a2cefdc0a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -702,7 +702,8 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) return err; } -static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) +static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, + int next_state, bool update_rl, int rl_index) { struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; @@ -722,6 +723,10 @@ static int mlx5e_modify_sq(struct mlx5e_sq *sq, int curr_state, int next_state) MLX5_SET(modify_sq_in, in, sq_state, curr_state); MLX5_SET(sqc, sqc, state, next_state); + if (update_rl && next_state == MLX5_SQC_STATE_RDY) { + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + } err = mlx5_core_modify_sq(mdev, sq->sqn, in, inlen); @@ -737,6 +742,8 @@ static void mlx5e_disable_sq(struct mlx5e_sq *sq) struct mlx5_core_dev *mdev = priv->mdev; mlx5_core_destroy_sq(mdev, sq->sqn); + if (sq->rate_limit) + mlx5_rl_remove_rate(mdev, sq->rate_limit); } static int mlx5e_open_sq(struct mlx5e_channel *c, @@ -754,7 +761,8 @@ static int mlx5e_open_sq(struct mlx5e_channel *c, if (err) goto err_destroy_sq; - err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY); + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY, + false, 0); if (err) goto err_disable_sq; @@ -793,7 +801,8 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) if (mlx5e_sq_has_room_for(sq, 1)) mlx5e_send_nop(sq, true); - mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR); + mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, MLX5_SQC_STATE_ERR, + false, 0); } while (sq->cc != sq->pc) /* wait till sq is empty */ @@ -1024,6 +1033,79 @@ static void mlx5e_build_channeltc_to_txq_map(struct mlx5e_priv *priv, int ix) ix + i * priv->params.num_channels; } +static int mlx5e_set_sq_maxrate(struct net_device *dev, + struct mlx5e_sq *sq, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + u16 rl_index = 0; + int err; + + if (rate == sq->rate_limit) + /* nothing to do */ + return 0; + + if (sq->rate_limit) + /* remove current rl index to free space to next ones */ + mlx5_rl_remove_rate(mdev, sq->rate_limit); + + sq->rate_limit = 0; + + if (rate) { + err = mlx5_rl_add_rate(mdev, rate, &rl_index); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + return err; + } + } + + err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RDY, + MLX5_SQC_STATE_RDY, true, rl_index); + if (err) { + netdev_err(dev, "Failed configuring rate %u: %d\n", + rate, err); + /* remove the rate from the table */ + if (rate) + mlx5_rl_remove_rate(mdev, rate); + return err; + } + + sq->rate_limit = rate; + return 0; +} + +static int mlx5e_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_sq *sq = priv->txq_to_sq_map[index]; + int err = 0; + + if (!mlx5_rl_is_supported(mdev)) { + netdev_err(dev, "Rate limiting is not supported on this device\n"); + return -EINVAL; + } + + /* rate is given in Mb/sec, HW config is in Kb/sec */ + rate = rate << 10; + + /* Check whether rate in valid range, 0 is always valid */ + if (rate && !mlx5_rl_is_in_range(mdev, rate)) { + netdev_err(dev, "TX rate %u, is not in range\n", rate); + return -ERANGE; + } + + mutex_lock(&priv->state_lock); + if (test_bit(MLX5E_STATE_OPENED, &priv->state)) + err = mlx5e_set_sq_maxrate(dev, sq, rate); + if (!err) + priv->tx_rates[index] = rate; + mutex_unlock(&priv->state_lock); + + return err; +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) @@ -1031,7 +1113,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct net_device *netdev = priv->netdev; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; + struct mlx5e_sq *sq; int err; + int i; c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) @@ -1073,6 +1157,16 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, if (err) goto err_close_icosq; + for (i = 0; i < priv->params.num_tc; i++) { + u32 txq_ix = priv->channeltc_to_txq_map[ix][i]; + + if (priv->tx_rates[txq_ix]) { + sq = priv->txq_to_sq_map[txq_ix]; + mlx5e_set_sq_maxrate(priv->netdev, sq, + priv->tx_rates[txq_ix]); + } + } + err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) goto err_close_sqs; @@ -2611,6 +2705,7 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_set_features = mlx5e_set_features, .ndo_change_mtu = mlx5e_change_mtu, .ndo_do_ioctl = mlx5e_ioctl, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif @@ -2632,6 +2727,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_do_ioctl = mlx5e_ioctl, .ndo_udp_tunnel_add = mlx5e_add_vxlan_port, .ndo_udp_tunnel_del = mlx5e_del_vxlan_port, + .ndo_set_tx_maxrate = mlx5e_set_tx_maxrate, .ndo_features_check = mlx5e_features_check, #ifdef CONFIG_RFS_ACCEL .ndo_rx_flow_steer = mlx5e_rx_flow_steer, From 4e59e288813901815b39c82fc00d4e9fe78ce16b Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:39 +0300 Subject: [PATCH 03/10] net/mlx5e: Introduce net device priv flags infrastructure Introduce an infrastructure for getting/setting private net device flags. Currently a 'nop' priv flag is added, following patches will override the flag will actual feature specific flags. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 17 ++++++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 59 +++++++++++++++++++ 2 files changed, 76 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 017e047965a6..02fa4daef59d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -144,6 +144,22 @@ struct mlx5e_umr_wqe { struct mlx5_wqe_data_seg data; }; +static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { + "nop", +}; + +enum mlx5e_priv_flag { + MLX5E_PFLAG_NOP = (1 << 0), +}; + +#define MLX5E_SET_PRIV_FLAG(priv, pflag, enable) \ + do { \ + if (enable) \ + priv->pflags |= pflag; \ + else \ + priv->pflags &= ~pflag; \ + } while (0) + #ifdef CONFIG_MLX5_CORE_EN_DCB #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */ #define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ @@ -543,6 +559,7 @@ struct mlx5e_priv { struct work_struct set_rx_mode_work; struct delayed_work update_stats_work; + u32 pflags; struct mlx5_core_dev *mdev; struct net_device *netdev; struct mlx5e_stats stats; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fc7dcc03b1de..f8bbc2b44fb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -198,6 +198,8 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset) MLX5E_NUM_RQ_STATS(priv) + MLX5E_NUM_SQ_STATS(priv) + MLX5E_NUM_PFC_COUNTERS(priv); + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(mlx5e_priv_flags); /* fallthrough */ default: return -EOPNOTSUPP; @@ -272,9 +274,12 @@ static void mlx5e_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { struct mlx5e_priv *priv = netdev_priv(dev); + int i; switch (stringset) { case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(mlx5e_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, mlx5e_priv_flags[i]); break; case ETH_SS_TEST: @@ -1272,6 +1277,58 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, return 0; } +typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); + +static int set_pflag_nop(struct net_device *netdev, bool enable) +{ + return 0; +} + +static int mlx5e_handle_pflag(struct net_device *netdev, + u32 wanted_flags, + enum mlx5e_priv_flag flag, + mlx5e_pflag_handler pflag_handler) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + bool enable = !!(wanted_flags & flag); + u32 changes = wanted_flags ^ priv->pflags; + int err; + + if (!(changes & flag)) + return 0; + + err = pflag_handler(netdev, enable); + if (err) { + netdev_err(netdev, "%s private flag 0x%x failed err %d\n", + enable ? "Enable" : "Disable", flag, err); + return err; + } + + MLX5E_SET_PRIV_FLAG(priv, flag, enable); + return 0; +} + +static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + int err; + + mutex_lock(&priv->state_lock); + + err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_NOP, + set_pflag_nop); + + mutex_unlock(&priv->state_lock); + return err ? -EINVAL : 0; +} + +static u32 mlx5e_get_priv_flags(struct net_device *netdev) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + + return priv->pflags; +} + const struct ethtool_ops mlx5e_ethtool_ops = { .get_drvinfo = mlx5e_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1301,4 +1358,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_wol = mlx5e_set_wol, .get_module_info = mlx5e_get_module_info, .get_module_eeprom = mlx5e_get_module_eeprom, + .get_priv_flags = mlx5e_get_priv_flags, + .set_priv_flags = mlx5e_set_priv_flags }; From 9908aa292971ee3320ea13a71d75f90a52929892 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 23 Jun 2016 17:02:40 +0300 Subject: [PATCH 04/10] net/mlx5e: CQE based moderation In this mode the moderation timer will restart upon new completion (CQE) generation rather than upon interrupt generation. The outcome is that for bursty traffic the period timer will never expire and thus only the moderation frames counter will dictate interrupt generation, thus the interrupt rate will be relative to the incoming packets size. If the burst seizes for "moderation period" time then an interrupt will be issued immediately. CQE based moderation is off by default and can be controlled via ethtool set_priv_flags. Performance tested on ConnectX4-Lx 50G. Less packet loss in netperf UDP and TCP tests, with no bw degradation, for both single and multi streams, with message sizes of 64, 1024, 1472 and 32768 byte. Signed-off-by: Tariq Toukan Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed Signed-off-by: Gal Pressman Signed-off-by: Gil Rockah Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 20 ++++--- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 54 ++++++++++++++----- .../net/ethernet/mellanox/mlx5/core/en_main.c | 54 +++++++++++++------ 3 files changed, 95 insertions(+), 33 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 02fa4daef59d..36f625d3c736 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -79,6 +79,7 @@ #define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024) #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 +#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20 @@ -145,11 +146,11 @@ struct mlx5e_umr_wqe { }; static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = { - "nop", + "rx_cqe_moder", }; enum mlx5e_priv_flag { - MLX5E_PFLAG_NOP = (1 << 0), + MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0), }; #define MLX5E_SET_PRIV_FLAG(priv, pflag, enable) \ @@ -165,6 +166,11 @@ enum mlx5e_priv_flag { #define MLX5E_MIN_BW_ALLOC 1 /* Min percentage of BW allocation */ #endif +struct mlx5e_cq_moder { + u16 usec; + u16 pkts; +}; + struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; @@ -173,12 +179,11 @@ struct mlx5e_params { u8 log_rq_size; u16 num_channels; u8 num_tc; + u8 rx_cq_period_mode; bool rx_cqe_compress_admin; bool rx_cqe_compress; - u16 rx_cq_moderation_usec; - u16 rx_cq_moderation_pkts; - u16 tx_cq_moderation_usec; - u16 tx_cq_moderation_pkts; + struct mlx5e_cq_moder rx_cq_moderation; + struct mlx5e_cq_moder tx_cq_moderation; u16 min_rx_wqes; bool lro_en; u32 lro_wqe_sz; @@ -667,6 +672,9 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, + u8 cq_period_mode); + static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index f8bbc2b44fb3..4f433d39f693 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -524,10 +524,10 @@ static int mlx5e_get_coalesce(struct net_device *netdev, if (!MLX5_CAP_GEN(priv->mdev, cq_moderation)) return -ENOTSUPP; - coal->rx_coalesce_usecs = priv->params.rx_cq_moderation_usec; - coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation_pkts; - coal->tx_coalesce_usecs = priv->params.tx_cq_moderation_usec; - coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation_pkts; + coal->rx_coalesce_usecs = priv->params.rx_cq_moderation.usec; + coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; + coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec; + coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts; return 0; } @@ -545,10 +545,11 @@ static int mlx5e_set_coalesce(struct net_device *netdev, return -ENOTSUPP; mutex_lock(&priv->state_lock); - priv->params.tx_cq_moderation_usec = coal->tx_coalesce_usecs; - priv->params.tx_cq_moderation_pkts = coal->tx_max_coalesced_frames; - priv->params.rx_cq_moderation_usec = coal->rx_coalesce_usecs; - priv->params.rx_cq_moderation_pkts = coal->rx_max_coalesced_frames; + + priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; + priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; + priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; + priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) goto out; @@ -1279,9 +1280,37 @@ static int mlx5e_get_module_eeprom(struct net_device *netdev, typedef int (*mlx5e_pflag_handler)(struct net_device *netdev, bool enable); -static int set_pflag_nop(struct net_device *netdev, bool enable) +static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable) { - return 0; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_core_dev *mdev = priv->mdev; + bool rx_mode_changed; + u8 rx_cq_period_mode; + int err = 0; + bool reset; + + rx_cq_period_mode = enable ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; + rx_mode_changed = rx_cq_period_mode != priv->params.rx_cq_period_mode; + + if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE && + !MLX5_CAP_GEN(mdev, cq_period_start_from_cqe)) + return -ENOTSUPP; + + if (!rx_mode_changed) + return 0; + + reset = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (reset) + mlx5e_close_locked(netdev); + + mlx5e_set_rx_cq_mode_params(&priv->params, rx_cq_period_mode); + + if (reset) + err = mlx5e_open_locked(netdev); + + return err; } static int mlx5e_handle_pflag(struct net_device *netdev, @@ -1315,8 +1344,9 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags) mutex_lock(&priv->state_lock); - err = mlx5e_handle_pflag(netdev, pflags, MLX5E_PFLAG_NOP, - set_pflag_nop); + err = mlx5e_handle_pflag(netdev, pflags, + MLX5E_PFLAG_RX_CQE_BASED_MODER, + set_pflag_rx_cqe_based_moder); mutex_unlock(&priv->state_lock); return err ? -EINVAL : 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index e5a2cefdc0a3..13e7a45650f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -55,6 +55,7 @@ struct mlx5e_cq_param { u32 cqc[MLX5_ST_SZ_DW(cqc)]; struct mlx5_wq_param wq; u16 eq_ix; + u8 cq_period_mode; }; struct mlx5e_channel_param { @@ -896,6 +897,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); + MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - @@ -925,8 +927,7 @@ static void mlx5e_disable_cq(struct mlx5e_cq *cq) static int mlx5e_open_cq(struct mlx5e_channel *c, struct mlx5e_cq_param *param, struct mlx5e_cq *cq, - u16 moderation_usecs, - u16 moderation_frames) + struct mlx5e_cq_moder moderation) { int err; struct mlx5e_priv *priv = c->priv; @@ -942,8 +943,8 @@ static int mlx5e_open_cq(struct mlx5e_channel *c, if (MLX5_CAP_GEN(mdev, cq_moderation)) mlx5_core_modify_cq_moderation(mdev, &cq->mcq, - moderation_usecs, - moderation_frames); + moderation.usec, + moderation.pkts); return 0; err_destroy_cq: @@ -972,8 +973,7 @@ static int mlx5e_open_tx_cqs(struct mlx5e_channel *c, for (tc = 0; tc < c->num_tc; tc++) { err = mlx5e_open_cq(c, &cparam->tx_cq, &c->sq[tc].cq, - priv->params.tx_cq_moderation_usec, - priv->params.tx_cq_moderation_pkts); + priv->params.tx_cq_moderation); if (err) goto err_close_tx_cqs; } @@ -1110,6 +1110,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_channel_param *cparam, struct mlx5e_channel **cp) { + struct mlx5e_cq_moder icosq_cq_moder = {0, 0}; struct net_device *netdev = priv->netdev; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; @@ -1133,7 +1134,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); - err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, 0, 0); + err = mlx5e_open_cq(c, &cparam->icosq_cq, &c->icosq.cq, icosq_cq_moder); if (err) goto err_napi_del; @@ -1142,8 +1143,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, goto err_close_icosq_cq; err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, - priv->params.rx_cq_moderation_usec, - priv->params.rx_cq_moderation_pkts); + priv->params.rx_cq_moderation); if (err) goto err_close_tx_cqs; @@ -1308,6 +1308,8 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, } mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = priv->params.rx_cq_period_mode; } static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, @@ -1318,6 +1320,8 @@ static void mlx5e_build_tx_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, priv->params.log_sq_size); mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; } static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, @@ -1329,6 +1333,8 @@ static void mlx5e_build_ico_cq_param(struct mlx5e_priv *priv, MLX5_SET(cqc, cqc, log_cq_size, log_wq_size); mlx5e_build_common_cq_param(priv, param); + + param->cq_period_mode = MLX5_CQ_PERIOD_MODE_START_FROM_EQE; } static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, @@ -2856,6 +2862,20 @@ static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) (pci_bw < 40000) && (pci_bw < link_speed)); } +void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, u8 cq_period_mode) +{ + params->rx_cq_period_mode = cq_period_mode; + + params->rx_cq_moderation.pkts = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; + params->rx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; + + if (cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + params->rx_cq_moderation.usec = + MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE; +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) @@ -2908,13 +2928,13 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); - priv->params.rx_cq_moderation_usec = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC; - priv->params.rx_cq_moderation_pkts = - MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS; - priv->params.tx_cq_moderation_usec = + + mlx5e_set_rx_cq_mode_params(&priv->params, + MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + + priv->params.tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; - priv->params.tx_cq_moderation_pkts = + priv->params.tx_cq_moderation.pkts = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS; priv->params.tx_max_inline = mlx5e_get_max_inline_cap(mdev); priv->params.num_tc = 1; @@ -2929,6 +2949,10 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.lro_wqe_sz = MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + /* Initialize pflags */ + MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, + priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); + priv->mdev = mdev; priv->netdev = netdev; priv->params.num_channels = num_channels; From cb3c7fd4f8393e0c42cbb13367b60454ae4e05f7 Mon Sep 17 00:00:00 2001 From: Gil Rockah Date: Thu, 23 Jun 2016 17:02:41 +0300 Subject: [PATCH 05/10] net/mlx5e: Support adaptive RX coalescing Striving for high message rate and low interrupt rate. Usage: ethtool -C adaptive-rx on/off Signed-off-by: Gil Rockah Signed-off-by: Achiad Shochat Signed-off-by: Saeed Mahameed CC: Arnd Bergmann Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/Makefile | 3 +- drivers/net/ethernet/mellanox/mlx5/core/en.h | 33 ++ .../ethernet/mellanox/mlx5/core/en_ethtool.c | 18 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 30 +- .../ethernet/mellanox/mlx5/core/en_rx_am.c | 335 ++++++++++++++++++ .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 5 + 6 files changed, 416 insertions(+), 8 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 0c8a7dcea483..c4f450f1c658 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -7,6 +7,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \ en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \ - en_txrx.o en_clock.o vxlan.o en_tc.o en_arfs.o + en_rx_am.o en_txrx.o en_clock.o vxlan.o en_tc.o \ + en_arfs.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 36f625d3c736..aa36a3a65e72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -195,6 +195,7 @@ struct mlx5e_params { #ifdef CONFIG_MLX5_CORE_EN_DCB struct ieee_ets ets; #endif + bool rx_am_enabled; }; struct mlx5e_tstamp { @@ -213,6 +214,7 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_POST_WQES_ENABLE, MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, + MLX5E_RQ_STATE_AM, }; struct mlx5e_cq { @@ -220,6 +222,7 @@ struct mlx5e_cq { struct mlx5_cqwq wq; /* data path - accessed per napi poll */ + u16 event_ctr; struct napi_struct *napi; struct mlx5_core_cq mcq; struct mlx5e_channel *channel; @@ -247,6 +250,30 @@ struct mlx5e_dma_info { dma_addr_t addr; }; +struct mlx5e_rx_am_stats { + int ppms; /* packets per msec */ + int epms; /* events per msec */ +}; + +struct mlx5e_rx_am_sample { + ktime_t time; + unsigned int pkt_ctr; + u16 event_ctr; +}; + +struct mlx5e_rx_am { /* Adaptive Moderation */ + u8 state; + struct mlx5e_rx_am_stats prev_stats; + struct mlx5e_rx_am_sample start_sample; + struct work_struct work; + u8 profile_ix; + u8 mode; + u8 tune_state; + u8 steps_right; + u8 steps_left; + u8 tired; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; @@ -267,6 +294,8 @@ struct mlx5e_rq { unsigned long state; int ix; + struct mlx5e_rx_am am; /* Adaptive Moderation */ + /* control */ struct mlx5_wq_ctrl wq_ctrl; u8 wq_type; @@ -637,6 +666,10 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); +void mlx5e_rx_am(struct mlx5e_rq *rq); +void mlx5e_rx_am_work(struct work_struct *work); +struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode); + void mlx5e_update_stats(struct mlx5e_priv *priv); int mlx5e_create_flow_steering(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4f433d39f693..c4be3941957e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -528,6 +528,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, coal->rx_max_coalesced_frames = priv->params.rx_cq_moderation.pkts; coal->tx_coalesce_usecs = priv->params.tx_cq_moderation.usec; coal->tx_max_coalesced_frames = priv->params.tx_cq_moderation.pkts; + coal->use_adaptive_rx_coalesce = priv->params.rx_am_enabled; return 0; } @@ -538,6 +539,10 @@ static int mlx5e_set_coalesce(struct net_device *netdev, struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_channel *c; + bool restart = + !!coal->use_adaptive_rx_coalesce != priv->params.rx_am_enabled; + bool was_opened; + int err = 0; int tc; int i; @@ -546,12 +551,18 @@ static int mlx5e_set_coalesce(struct net_device *netdev, mutex_lock(&priv->state_lock); + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened && restart) { + mlx5e_close_locked(netdev); + priv->params.rx_am_enabled = !!coal->use_adaptive_rx_coalesce; + } + priv->params.tx_cq_moderation.usec = coal->tx_coalesce_usecs; priv->params.tx_cq_moderation.pkts = coal->tx_max_coalesced_frames; priv->params.rx_cq_moderation.usec = coal->rx_coalesce_usecs; priv->params.rx_cq_moderation.pkts = coal->rx_max_coalesced_frames; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) + if (!was_opened || restart) goto out; for (i = 0; i < priv->params.num_channels; ++i) { @@ -570,8 +581,11 @@ static int mlx5e_set_coalesce(struct net_device *netdev, } out: + if (was_opened && restart) + err = mlx5e_open_locked(netdev); + mutex_unlock(&priv->state_lock); - return 0; + return err; } static u32 ptys2ethtool_supported_link(u32 eth_proto_cap) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 13e7a45650f0..39c06861c6b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -40,8 +40,9 @@ #include "vxlan.h" struct mlx5e_rq_param { - u32 rqc[MLX5_ST_SZ_DW(rqc)]; - struct mlx5_wq_param wq; + u32 rqc[MLX5_ST_SZ_DW(rqc)]; + struct mlx5_wq_param wq; + bool am_enabled; }; struct mlx5e_sq_param { @@ -337,6 +338,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wqe->data.byte_count = cpu_to_be32(byte_count); } + INIT_WORK(&rq->am.work, mlx5e_rx_am_work); + rq->am.mode = priv->params.rx_cq_period_mode; + rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; @@ -509,6 +513,9 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (err) goto err_disable_rq; + if (param->am_enabled) + set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + set_bit(MLX5E_RQ_STATE_POST_WQES_ENABLE, &rq->state); sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; @@ -537,6 +544,8 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) /* avoid destroying rq before mlx5e_poll_rx_cq() is done with it */ napi_synchronize(&rq->channel->napi); + cancel_work_sync(&rq->am.work); + mlx5e_disable_rq(rq); mlx5e_destroy_rq(rq); } @@ -1112,6 +1121,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, { struct mlx5e_cq_moder icosq_cq_moder = {0, 0}; struct net_device *netdev = priv->netdev; + struct mlx5e_cq_moder rx_cq_profile; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; struct mlx5e_sq *sq; @@ -1130,6 +1140,11 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->mkey_be = cpu_to_be32(priv->mkey.key); c->num_tc = priv->params.num_tc; + if (priv->params.rx_am_enabled) + rx_cq_profile = mlx5e_am_get_def_profile(priv->params.rx_cq_period_mode); + else + rx_cq_profile = priv->params.rx_cq_moderation; + mlx5e_build_channeltc_to_txq_map(priv, ix); netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); @@ -1143,7 +1158,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, goto err_close_icosq_cq; err = mlx5e_open_cq(c, &cparam->rx_cq, &c->rq.cq, - priv->params.rx_cq_moderation); + rx_cq_profile); if (err) goto err_close_tx_cqs; @@ -1243,6 +1258,8 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(&priv->mdev->pdev->dev); param->wq.linear = 1; + + param->am_enabled = priv->params.rx_am_enabled; } static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) @@ -2883,6 +2900,9 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv = netdev_priv(netdev); u32 link_speed = 0; u32 pci_bw = 0; + u8 cq_period_mode = MLX5_CAP_GEN(mdev, cq_period_start_from_cqe) ? + MLX5_CQ_PERIOD_MODE_START_FROM_CQE : + MLX5_CQ_PERIOD_MODE_START_FROM_EQE; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; @@ -2929,8 +2949,8 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); - mlx5e_set_rx_cq_mode_params(&priv->params, - MLX5_CQ_PERIOD_MODE_START_FROM_EQE); + priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); + mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); priv->params.tx_cq_moderation.usec = MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c new file mode 100644 index 000000000000..1fffe48a93cc --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx_am.c @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "en.h" + +/* Adaptive moderation profiles */ +#define MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE 256 +#define MLX5E_RX_AM_DEF_PROFILE_CQE 1 +#define MLX5E_RX_AM_DEF_PROFILE_EQE 1 +#define MLX5E_PARAMS_AM_NUM_PROFILES 5 + +/* All profiles sizes must be MLX5E_PARAMS_AM_NUM_PROFILES */ +#define MLX5_AM_EQE_PROFILES { \ + {1, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {8, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {64, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {128, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ + {256, MLX5E_AM_DEFAULT_RX_CQ_MODERATION_PKTS_FROM_EQE}, \ +} + +#define MLX5_AM_CQE_PROFILES { \ + {2, 256}, \ + {8, 128}, \ + {16, 64}, \ + {32, 64}, \ + {64, 64} \ +} + +static const struct mlx5e_cq_moder +profile[MLX5_CQ_PERIOD_NUM_MODES][MLX5E_PARAMS_AM_NUM_PROFILES] = { + MLX5_AM_EQE_PROFILES, + MLX5_AM_CQE_PROFILES, +}; + +static inline struct mlx5e_cq_moder mlx5e_am_get_profile(u8 cq_period_mode, int ix) +{ + return profile[cq_period_mode][ix]; +} + +struct mlx5e_cq_moder mlx5e_am_get_def_profile(u8 rx_cq_period_mode) +{ + int default_profile_ix; + + if (rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE) + default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_CQE; + else /* MLX5_CQ_PERIOD_MODE_START_FROM_EQE */ + default_profile_ix = MLX5E_RX_AM_DEF_PROFILE_EQE; + + return profile[rx_cq_period_mode][default_profile_ix]; +} + +/* Adaptive moderation logic */ +enum { + MLX5E_AM_START_MEASURE, + MLX5E_AM_MEASURE_IN_PROGRESS, + MLX5E_AM_APPLY_NEW_PROFILE, +}; + +enum { + MLX5E_AM_PARKING_ON_TOP, + MLX5E_AM_PARKING_TIRED, + MLX5E_AM_GOING_RIGHT, + MLX5E_AM_GOING_LEFT, +}; + +enum { + MLX5E_AM_STATS_WORSE, + MLX5E_AM_STATS_SAME, + MLX5E_AM_STATS_BETTER, +}; + +enum { + MLX5E_AM_STEPPED, + MLX5E_AM_TOO_TIRED, + MLX5E_AM_ON_EDGE, +}; + +static bool mlx5e_am_on_top(struct mlx5e_rx_am *am) +{ + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_on_top: PARKING\n"); + return true; + case MLX5E_AM_GOING_RIGHT: + return (am->steps_left > 1) && (am->steps_right == 1); + default: /* MLX5E_AM_GOING_LEFT */ + return (am->steps_right > 1) && (am->steps_left == 1); + } +} + +static void mlx5e_am_turn(struct mlx5e_rx_am *am) +{ + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_turn: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + am->tune_state = MLX5E_AM_GOING_LEFT; + am->steps_left = 0; + break; + case MLX5E_AM_GOING_LEFT: + am->tune_state = MLX5E_AM_GOING_RIGHT; + am->steps_right = 0; + break; + } +} + +static int mlx5e_am_step(struct mlx5e_rx_am *am) +{ + if (am->tired == (MLX5E_PARAMS_AM_NUM_PROFILES * 2)) + return MLX5E_AM_TOO_TIRED; + + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + case MLX5E_AM_PARKING_TIRED: + WARN_ONCE(true, "mlx5e_am_step: PARKING\n"); + break; + case MLX5E_AM_GOING_RIGHT: + if (am->profile_ix == (MLX5E_PARAMS_AM_NUM_PROFILES - 1)) + return MLX5E_AM_ON_EDGE; + am->profile_ix++; + am->steps_right++; + break; + case MLX5E_AM_GOING_LEFT: + if (am->profile_ix == 0) + return MLX5E_AM_ON_EDGE; + am->profile_ix--; + am->steps_left++; + break; + } + + am->tired++; + return MLX5E_AM_STEPPED; +} + +static void mlx5e_am_park_on_top(struct mlx5e_rx_am *am) +{ + am->steps_right = 0; + am->steps_left = 0; + am->tired = 0; + am->tune_state = MLX5E_AM_PARKING_ON_TOP; +} + +static void mlx5e_am_park_tired(struct mlx5e_rx_am *am) +{ + am->steps_right = 0; + am->steps_left = 0; + am->tune_state = MLX5E_AM_PARKING_TIRED; +} + +static void mlx5e_am_exit_parking(struct mlx5e_rx_am *am) +{ + am->tune_state = am->profile_ix ? MLX5E_AM_GOING_LEFT : + MLX5E_AM_GOING_RIGHT; + mlx5e_am_step(am); +} + +static int mlx5e_am_stats_compare(struct mlx5e_rx_am_stats *curr, + struct mlx5e_rx_am_stats *prev) +{ + int diff; + + if (!prev->ppms) + return curr->ppms ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_SAME; + + diff = curr->ppms - prev->ppms; + if (((100 * abs(diff)) / prev->ppms) > 10) /* more than 10% diff */ + return (diff > 0) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; + + if (!prev->epms) + return curr->epms ? MLX5E_AM_STATS_WORSE : + MLX5E_AM_STATS_SAME; + + diff = curr->epms - prev->epms; + if (((100 * abs(diff)) / prev->epms) > 10) /* more than 10% diff */ + return (diff < 0) ? MLX5E_AM_STATS_BETTER : + MLX5E_AM_STATS_WORSE; + + return MLX5E_AM_STATS_SAME; +} + +static bool mlx5e_am_decision(struct mlx5e_rx_am_stats *curr_stats, + struct mlx5e_rx_am *am) +{ + int prev_state = am->tune_state; + int prev_ix = am->profile_ix; + int stats_res; + int step_res; + + switch (am->tune_state) { + case MLX5E_AM_PARKING_ON_TOP: + stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats); + if (stats_res != MLX5E_AM_STATS_SAME) + mlx5e_am_exit_parking(am); + break; + + case MLX5E_AM_PARKING_TIRED: + am->tired--; + if (!am->tired) + mlx5e_am_exit_parking(am); + break; + + case MLX5E_AM_GOING_RIGHT: + case MLX5E_AM_GOING_LEFT: + stats_res = mlx5e_am_stats_compare(curr_stats, &am->prev_stats); + if (stats_res != MLX5E_AM_STATS_BETTER) + mlx5e_am_turn(am); + + if (mlx5e_am_on_top(am)) { + mlx5e_am_park_on_top(am); + break; + } + + step_res = mlx5e_am_step(am); + switch (step_res) { + case MLX5E_AM_ON_EDGE: + mlx5e_am_park_on_top(am); + break; + case MLX5E_AM_TOO_TIRED: + mlx5e_am_park_tired(am); + break; + } + + break; + } + + if ((prev_state != MLX5E_AM_PARKING_ON_TOP) || + (am->tune_state != MLX5E_AM_PARKING_ON_TOP)) + am->prev_stats = *curr_stats; + + return am->profile_ix != prev_ix; +} + +static void mlx5e_am_sample(struct mlx5e_rq *rq, + struct mlx5e_rx_am_sample *s) +{ + s->time = ktime_get(); + s->pkt_ctr = rq->stats.packets; + s->event_ctr = rq->cq.event_ctr; +} + +#define MLX5E_AM_NEVENTS 64 + +static void mlx5e_am_calc_stats(struct mlx5e_rx_am_sample *start, + struct mlx5e_rx_am_sample *end, + struct mlx5e_rx_am_stats *curr_stats) +{ + /* u32 holds up to 71 minutes, should be enough */ + u32 delta_us = ktime_us_delta(end->time, start->time); + unsigned int npkts = end->pkt_ctr - start->pkt_ctr; + + if (!delta_us) { + WARN_ONCE(true, "mlx5e_am_calc_stats: delta_us=0\n"); + return; + } + + curr_stats->ppms = (npkts * USEC_PER_MSEC) / delta_us; + curr_stats->epms = (MLX5E_AM_NEVENTS * USEC_PER_MSEC) / delta_us; +} + +void mlx5e_rx_am_work(struct work_struct *work) +{ + struct mlx5e_rx_am *am = container_of(work, struct mlx5e_rx_am, + work); + struct mlx5e_rq *rq = container_of(am, struct mlx5e_rq, am); + struct mlx5e_cq_moder cur_profile = profile[am->mode][am->profile_ix]; + + mlx5_core_modify_cq_moderation(rq->priv->mdev, &rq->cq.mcq, + cur_profile.usec, cur_profile.pkts); + + am->state = MLX5E_AM_START_MEASURE; +} + +void mlx5e_rx_am(struct mlx5e_rq *rq) +{ + struct mlx5e_rx_am *am = &rq->am; + struct mlx5e_rx_am_sample end_sample; + struct mlx5e_rx_am_stats curr_stats; + u16 nevents; + + switch (am->state) { + case MLX5E_AM_MEASURE_IN_PROGRESS: + nevents = rq->cq.event_ctr - am->start_sample.event_ctr; + if (nevents < MLX5E_AM_NEVENTS) + break; + mlx5e_am_sample(rq, &end_sample); + mlx5e_am_calc_stats(&am->start_sample, &end_sample, + &curr_stats); + if (mlx5e_am_decision(&curr_stats, am)) { + am->state = MLX5E_AM_APPLY_NEW_PROFILE; + schedule_work(&am->work); + break; + } + /* fall through */ + case MLX5E_AM_START_MEASURE: + mlx5e_am_sample(rq, &am->start_sample); + am->state = MLX5E_AM_MEASURE_IN_PROGRESS; + break; + case MLX5E_AM_APPLY_NEW_PROFILE: + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index c38781fa567d..64ae2e800daa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -136,6 +136,10 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); + + if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state)) + mlx5e_rx_am(&c->rq); + mlx5e_cq_arm(&c->rq.cq); mlx5e_cq_arm(&c->icosq.cq); @@ -146,6 +150,7 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) { struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); + cq->event_ctr++; set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); napi_schedule(cq->napi); } From 667daedaecd15b89d0ded7af49519f28d6ea2cf4 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:42 +0300 Subject: [PATCH 06/10] net/mlx5e: Toggle link only after modifying port parameters Add a dedicated function to toggle port link. It should be called only after setting a port register. Toggle will set port link to down and bring it back up in case that it's admin status was up. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 9 +-------- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 7 +------ drivers/net/ethernet/mellanox/mlx5/core/port.c | 12 ++++++++++++ include/linux/mlx5/port.h | 1 + 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index b2db180ae2a5..e6883132b555 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -191,7 +191,6 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; - enum mlx5_port_status ps; u8 curr_pfc_en; int ret; @@ -200,14 +199,8 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev, if (pfc->pfc_en == curr_pfc_en) return 0; - mlx5_query_port_admin_status(mdev, &ps); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); - ret = mlx5_set_port_pfc(mdev, pfc->pfc_en, pfc->pfc_en); - - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + mlx5_toggle_port_link(mdev); return ret; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c4be3941957e..d0d3dcfb4de9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -795,7 +795,6 @@ static int mlx5e_set_settings(struct net_device *netdev, u32 link_modes; u32 speed; u32 eth_proto_cap, eth_proto_admin; - enum mlx5_port_status ps; int err; speed = ethtool_cmd_speed(cmd); @@ -829,12 +828,8 @@ static int mlx5e_set_settings(struct net_device *netdev, if (link_modes == eth_proto_admin) goto out; - mlx5_query_port_admin_status(mdev, &ps); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_DOWN); mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); - if (ps == MLX5_PORT_UP) - mlx5_set_port_admin_status(mdev, MLX5_PORT_UP); + mlx5_toggle_port_link(mdev); out: return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 3e35611b19c3..1562e7310f5b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -222,6 +222,18 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, } EXPORT_SYMBOL_GPL(mlx5_set_port_proto); +/* This function should be used after setting a port register only */ +void mlx5_toggle_port_link(struct mlx5_core_dev *dev) +{ + enum mlx5_port_status ps; + + mlx5_query_port_admin_status(dev, &ps); + mlx5_set_port_admin_status(dev, MLX5_PORT_DOWN); + if (ps == MLX5_PORT_UP) + mlx5_set_port_admin_status(dev, MLX5_PORT_UP); +} +EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); + int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status) { diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 9851862c0ec5..4adfac15f0e9 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -67,6 +67,7 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, u8 local_port); int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, int proto_mask); +void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, From 89da45b8b5b2187734a11038b8593714f964ffd1 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:43 +0300 Subject: [PATCH 07/10] ethtool: Add 50G baseSR2 link mode Add ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT bit. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Cc: Ben Hutchings Cc: David Decotigny Acked-By: David Decotigny Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 5f030b46cff4..b8f38e84d93a 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1362,6 +1362,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1370,7 +1371,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ From 4a50e35b04c9008558a73ed4e349b3b483ef6739 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:44 +0300 Subject: [PATCH 08/10] net/mlx5e: Add missing 50G baseSR2 link mode Add MLX5E_50GBASE_SR2 as ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Cc: Ben Hutchings Cc: David Decotigny Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index aa36a3a65e72..b8732e6d2b6b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -616,6 +616,7 @@ enum mlx5e_link_mode { MLX5E_10GBASE_ER = 14, MLX5E_40GBASE_SR4 = 15, MLX5E_40GBASE_LR4 = 16, + MLX5E_50GBASE_SR2 = 18, MLX5E_100GBASE_CR4 = 20, MLX5E_100GBASE_SR4 = 21, MLX5E_100GBASE_KR4 = 22, From 665bc53969d79af9cfd080e25b91b0415a2b5eec Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:45 +0300 Subject: [PATCH 09/10] net/mlx5e: Use new ethtool get/set link ksettings API Use new get/set link ksettings and remove get/set settings legacy callbacks. This allows us to use bitmasks longer than 32 bit for supported and advertised link modes and use modes that were previously not supported. Signed-off-by: Gal Pressman CC: Ben Hutchings CC: David Decotigny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 + .../ethernet/mellanox/mlx5/core/en_ethtool.c | 306 ++++++++---------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 1 + 3 files changed, 143 insertions(+), 167 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b8732e6d2b6b..da885c0dfebe 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -634,6 +634,9 @@ enum mlx5e_link_mode { #define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + +void mlx5e_build_ptys2ethtool_map(void); + void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d0d3dcfb4de9..4c560e003d56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -48,123 +48,85 @@ static void mlx5e_get_drvinfo(struct net_device *dev, sizeof(drvinfo->bus_info)); } -static const struct { - u32 supported; - u32 advertised; +struct ptys2ethtool_config { + __ETHTOOL_DECLARE_LINK_MODE_MASK(supported); + __ETHTOOL_DECLARE_LINK_MODE_MASK(advertised); u32 speed; -} ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER] = { - [MLX5E_1000BASE_CX_SGMII] = { - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, - }, - [MLX5E_1000BASE_KX] = { - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, - }, - [MLX5E_10GBASE_CX4] = { - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_KX4] = { - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_KR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_20GBASE_KR2] = { - .supported = SUPPORTED_20000baseKR2_Full, - .advertised = ADVERTISED_20000baseKR2_Full, - .speed = 20000, - }, - [MLX5E_40GBASE_CR4] = { - .supported = SUPPORTED_40000baseCR4_Full, - .advertised = ADVERTISED_40000baseCR4_Full, - .speed = 40000, - }, - [MLX5E_40GBASE_KR4] = { - .supported = SUPPORTED_40000baseKR4_Full, - .advertised = ADVERTISED_40000baseKR4_Full, - .speed = 40000, - }, - [MLX5E_56GBASE_R4] = { - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, - }, - [MLX5E_10GBASE_CR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_SR] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_10GBASE_ER] = { - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, - }, - [MLX5E_40GBASE_SR4] = { - .supported = SUPPORTED_40000baseSR4_Full, - .advertised = ADVERTISED_40000baseSR4_Full, - .speed = 40000, - }, - [MLX5E_40GBASE_LR4] = { - .supported = SUPPORTED_40000baseLR4_Full, - .advertised = ADVERTISED_40000baseLR4_Full, - .speed = 40000, - }, - [MLX5E_100GBASE_CR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_SR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_KR4] = { - .speed = 100000, - }, - [MLX5E_100GBASE_LR4] = { - .speed = 100000, - }, - [MLX5E_100BASE_TX] = { - .speed = 100, - }, - [MLX5E_1000BASE_T] = { - .supported = SUPPORTED_1000baseT_Full, - .advertised = ADVERTISED_1000baseT_Full, - .speed = 1000, - }, - [MLX5E_10GBASE_T] = { - .supported = SUPPORTED_10000baseT_Full, - .advertised = ADVERTISED_10000baseT_Full, - .speed = 1000, - }, - [MLX5E_25GBASE_CR] = { - .speed = 25000, - }, - [MLX5E_25GBASE_KR] = { - .speed = 25000, - }, - [MLX5E_25GBASE_SR] = { - .speed = 25000, - }, - [MLX5E_50GBASE_CR2] = { - .speed = 50000, - }, - [MLX5E_50GBASE_KR2] = { - .speed = 50000, - }, }; +static struct ptys2ethtool_config ptys2ethtool_table[MLX5E_LINK_MODES_NUMBER]; + +#define MLX5_BUILD_PTYS2ETHTOOL_CONFIG(reg_, speed_, ...) \ + ({ \ + struct ptys2ethtool_config *cfg; \ + const unsigned int modes[] = { __VA_ARGS__ }; \ + unsigned int i; \ + cfg = &ptys2ethtool_table[reg_]; \ + cfg->speed = speed_; \ + bitmap_zero(cfg->supported, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + bitmap_zero(cfg->advertised, \ + __ETHTOOL_LINK_MODE_MASK_NBITS); \ + for (i = 0 ; i < ARRAY_SIZE(modes) ; ++i) { \ + __set_bit(modes[i], cfg->supported); \ + __set_bit(modes[i], cfg->advertised); \ + } \ + }) + +void mlx5e_build_ptys2ethtool_map(void) +{ + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_CX_SGMII, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_KX, SPEED_1000, + ETHTOOL_LINK_MODE_1000baseKX_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KX4, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_KR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_20GBASE_KR2, SPEED_20000, + ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_CR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_KR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_56GBASE_R4, SPEED_56000, + ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_CR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_SR, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_ER, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_SR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_40GBASE_LR4, SPEED_40000, + ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_SR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_CR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_SR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_KR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, SPEED_100000, + ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, SPEED_10000, + ETHTOOL_LINK_MODE_10000baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseCR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_KR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseKR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_SR, SPEED_25000, + ETHTOOL_LINK_MODE_25000baseSR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_CR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_50GBASE_KR2, SPEED_50000, + ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT); +} + static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -588,31 +550,30 @@ out: return err; } -static u32 ptys2ethtool_supported_link(u32 eth_proto_cap) +static void ptys2ethtool_supported_link(unsigned long *supported_modes, + u32 eth_proto_cap) { - int i; - u32 supported_modes = 0; + int proto; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (eth_proto_cap & MLX5E_PROT_MASK(i)) - supported_modes |= ptys2ethtool_table[i].supported; - } - return supported_modes; + for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(supported_modes, supported_modes, + ptys2ethtool_table[proto].supported, + __ETHTOOL_LINK_MODE_MASK_NBITS); } -static u32 ptys2ethtool_adver_link(u32 eth_proto_cap) +static void ptys2ethtool_adver_link(unsigned long *advertising_modes, + u32 eth_proto_cap) { - int i; - u32 advertising_modes = 0; + int proto; - for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (eth_proto_cap & MLX5E_PROT_MASK(i)) - advertising_modes |= ptys2ethtool_table[i].advertised; - } - return advertising_modes; + for_each_set_bit(proto, (unsigned long *)ð_proto_cap, MLX5E_LINK_MODES_NUMBER) + bitmap_or(advertising_modes, advertising_modes, + ptys2ethtool_table[proto].advertised, + __ETHTOOL_LINK_MODE_MASK_NBITS); } -static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) +static void ptys2ethtool_supported_port(struct ethtool_link_ksettings *link_ksettings, + u32 eth_proto_cap) { if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_10GBASE_CR) | MLX5E_PROT_MASK(MLX5E_10GBASE_SR) @@ -620,7 +581,7 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) | MLX5E_PROT_MASK(MLX5E_40GBASE_SR4) | MLX5E_PROT_MASK(MLX5E_100GBASE_SR4) | MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII))) { - return SUPPORTED_FIBRE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, FIBRE); } if (eth_proto_cap & (MLX5E_PROT_MASK(MLX5E_100GBASE_KR4) @@ -628,9 +589,8 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) | MLX5E_PROT_MASK(MLX5E_10GBASE_KR) | MLX5E_PROT_MASK(MLX5E_10GBASE_KX4) | MLX5E_PROT_MASK(MLX5E_1000BASE_KX))) { - return SUPPORTED_Backplane; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Backplane); } - return 0; } int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) @@ -654,7 +614,7 @@ int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *link_ksettings) { int i; u32 speed = SPEED_UNKNOWN; @@ -671,23 +631,32 @@ static void get_speed_duplex(struct net_device *netdev, } } out: - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + link_ksettings->base.speed = speed; + link_ksettings->base.duplex = duplex; } -static void get_supported(u32 eth_proto_cap, u32 *supported) +static void get_supported(u32 eth_proto_cap, + struct ethtool_link_ksettings *link_ksettings) { - *supported |= ptys2ethtool_supported_port(eth_proto_cap); - *supported |= ptys2ethtool_supported_link(eth_proto_cap); - *supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; + unsigned long *supported = link_ksettings->link_modes.supported; + + ptys2ethtool_supported_port(link_ksettings, eth_proto_cap); + ptys2ethtool_supported_link(supported, eth_proto_cap); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Pause); + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, Asym_Pause); } static void get_advertising(u32 eth_proto_cap, u8 tx_pause, - u8 rx_pause, u32 *advertising) + u8 rx_pause, + struct ethtool_link_ksettings *link_ksettings) { - *advertising |= ptys2ethtool_adver_link(eth_proto_cap); - *advertising |= tx_pause ? ADVERTISED_Pause : 0; - *advertising |= (tx_pause ^ rx_pause) ? ADVERTISED_Asym_Pause : 0; + unsigned long *advertising = link_ksettings->link_modes.advertising; + + ptys2ethtool_adver_link(advertising, eth_proto_cap); + if (tx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Pause); + if (tx_pause ^ rx_pause) + ethtool_link_ksettings_add_link_mode(link_ksettings, advertising, Asym_Pause); } static u8 get_connector_port(u32 eth_proto) @@ -715,13 +684,16 @@ static u8 get_connector_port(u32 eth_proto) return PORT_OTHER; } -static void get_lp_advertising(u32 eth_proto_lp, u32 *lp_advertising) +static void get_lp_advertising(u32 eth_proto_lp, + struct ethtool_link_ksettings *link_ksettings) { - *lp_advertising = ptys2ethtool_adver_link(eth_proto_lp); + unsigned long *lp_advertising = link_ksettings->link_modes.lp_advertising; + + ptys2ethtool_adver_link(lp_advertising, eth_proto_lp); } -static int mlx5e_get_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int mlx5e_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -745,30 +717,30 @@ static int mlx5e_get_settings(struct net_device *netdev, eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); - cmd->supported = 0; - cmd->advertising = 0; + ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); + ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); - get_supported(eth_proto_cap, &cmd->supported); - get_advertising(eth_proto_admin, 0, 0, &cmd->advertising); - get_speed_duplex(netdev, eth_proto_oper, cmd); + get_supported(eth_proto_cap, link_ksettings); + get_advertising(eth_proto_admin, 0, 0, link_ksettings); + get_speed_duplex(netdev, eth_proto_oper, link_ksettings); eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - cmd->port = get_connector_port(eth_proto_oper); - get_lp_advertising(eth_proto_lp, &cmd->lp_advertising); - - cmd->transceiver = XCVR_INTERNAL; + link_ksettings->base.port = get_connector_port(eth_proto_oper); + get_lp_advertising(eth_proto_lp, link_ksettings); err_query_ptys: return err; } -static u32 mlx5e_ethtool2ptys_adver_link(u32 link_modes) +static u32 mlx5e_ethtool2ptys_adver_link(const unsigned long *link_modes) { u32 i, ptys_modes = 0; for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) { - if (ptys2ethtool_table[i].advertised & link_modes) + if (bitmap_intersects(ptys2ethtool_table[i].advertised, + link_modes, + __ETHTOOL_LINK_MODE_MASK_NBITS)) ptys_modes |= MLX5E_PROT_MASK(i); } @@ -787,8 +759,8 @@ static u32 mlx5e_ethtool2ptys_speed_link(u32 speed) return speed_links; } -static int mlx5e_set_settings(struct net_device *netdev, - struct ethtool_cmd *cmd) +static int mlx5e_set_link_ksettings(struct net_device *netdev, + const struct ethtool_link_ksettings *link_ksettings) { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; @@ -797,10 +769,10 @@ static int mlx5e_set_settings(struct net_device *netdev, u32 eth_proto_cap, eth_proto_admin; int err; - speed = ethtool_cmd_speed(cmd); + speed = link_ksettings->base.speed; - link_modes = cmd->autoneg == AUTONEG_ENABLE ? - mlx5e_ethtool2ptys_adver_link(cmd->advertising) : + link_modes = link_ksettings->base.autoneg == AUTONEG_ENABLE ? + mlx5e_ethtool2ptys_adver_link(link_ksettings->link_modes.advertising) : mlx5e_ethtool2ptys_speed_link(speed); err = mlx5_query_port_proto_cap(mdev, ð_proto_cap, MLX5_PTYS_EN); @@ -1380,8 +1352,8 @@ const struct ethtool_ops mlx5e_ethtool_ops = { .set_channels = mlx5e_set_channels, .get_coalesce = mlx5e_get_coalesce, .set_coalesce = mlx5e_set_coalesce, - .get_settings = mlx5e_get_settings, - .set_settings = mlx5e_set_settings, + .get_link_ksettings = mlx5e_get_link_ksettings, + .set_link_ksettings = mlx5e_set_link_ksettings, .get_rxfh_key_size = mlx5e_get_rxfh_key_size, .get_rxfh_indir_size = mlx5e_get_rxfh_indir_size, .get_rxfh = mlx5e_get_rxfh, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 39c06861c6b3..02a0f1796f7b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3380,6 +3380,7 @@ static struct mlx5_interface mlx5e_interface = { void mlx5e_init(void) { + mlx5e_build_ptys2ethtool_map(); mlx5_register_interface(&mlx5e_interface); } From 52244d960755936fa9c8ce54d583d0ed46f24fb6 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 23 Jun 2016 17:02:46 +0300 Subject: [PATCH 10/10] net/mlx5e: Report correct auto negotiation and allow toggling Previous to this patch auto negotiation was reported off although it was on by default in hardware. This patch reports the correct information to ethtool and allows the user to toggle it on/off. Added another parameter to set port proto function in order to pass the auto negotiation field to the hardware. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 44 +++++++++++++++---- .../net/ethernet/mellanox/mlx5/core/port.c | 36 +++++++++++++-- include/linux/mlx5/port.h | 15 ++++++- 3 files changed, 81 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 4c560e003d56..39a4d961a58e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -702,6 +702,8 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, u32 eth_proto_admin; u32 eth_proto_lp; u32 eth_proto_oper; + u8 an_disable_admin; + u8 an_status; int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); @@ -712,10 +714,12 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, goto err_query_ptys; } - eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); - eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); - eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + eth_proto_cap = MLX5_GET(ptys_reg, out, eth_proto_capability); + eth_proto_admin = MLX5_GET(ptys_reg, out, eth_proto_admin); + eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); + eth_proto_lp = MLX5_GET(ptys_reg, out, eth_proto_lp_advertise); + an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); + an_status = MLX5_GET(ptys_reg, out, an_status); ethtool_link_ksettings_zero_link_mode(link_ksettings, supported); ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising); @@ -729,6 +733,18 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, link_ksettings->base.port = get_connector_port(eth_proto_oper); get_lp_advertising(eth_proto_lp, link_ksettings); + if (an_status == MLX5_AN_COMPLETE) + ethtool_link_ksettings_add_link_mode(link_ksettings, + lp_advertising, Autoneg); + + link_ksettings->base.autoneg = an_disable_admin ? AUTONEG_DISABLE : + AUTONEG_ENABLE; + ethtool_link_ksettings_add_link_mode(link_ksettings, supported, + Autoneg); + if (!an_disable_admin) + ethtool_link_ksettings_add_link_mode(link_ksettings, + advertising, Autoneg); + err_query_ptys: return err; } @@ -764,9 +780,14 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u32 link_modes; - u32 speed; u32 eth_proto_cap, eth_proto_admin; + bool an_changes = false; + u8 an_disable_admin; + u8 an_disable_cap; + bool an_disable; + u32 link_modes; + u8 an_status; + u32 speed; int err; speed = link_ksettings->base.speed; @@ -797,10 +818,17 @@ static int mlx5e_set_link_ksettings(struct net_device *netdev, goto out; } - if (link_modes == eth_proto_admin) + mlx5_query_port_autoneg(mdev, MLX5_PTYS_EN, &an_status, + &an_disable_cap, &an_disable_admin); + + an_disable = link_ksettings->base.autoneg == AUTONEG_DISABLE; + an_changes = ((!an_disable && an_disable_admin) || + (an_disable && !an_disable_admin)); + + if (!an_changes && link_modes == eth_proto_admin) goto out; - mlx5_set_port_proto(mdev, link_modes, MLX5_PTYS_EN); + mlx5_set_port_ptys(mdev, an_disable, link_modes, MLX5_PTYS_EN); mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 1562e7310f5b..752c08127138 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -202,15 +202,24 @@ int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); -int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, - int proto_mask) +int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, int proto_mask) { - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u8 an_disable_admin; + u8 an_disable_cap; + u8 an_status; + + mlx5_query_port_autoneg(dev, proto_mask, &an_status, + &an_disable_cap, &an_disable_admin); + if (!an_disable_cap && an_disable) + return -EPERM; memset(in, 0, sizeof(in)); MLX5_SET(ptys_reg, in, local_port, 1); + MLX5_SET(ptys_reg, in, an_disable_admin, an_disable); MLX5_SET(ptys_reg, in, proto_mask, proto_mask); if (proto_mask == MLX5_PTYS_EN) MLX5_SET(ptys_reg, in, eth_proto_admin, proto_admin); @@ -220,7 +229,7 @@ int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PTYS, 0, 1); } -EXPORT_SYMBOL_GPL(mlx5_set_port_proto); +EXPORT_SYMBOL_GPL(mlx5_set_port_ptys); /* This function should be used after setting a port register only */ void mlx5_toggle_port_link(struct mlx5_core_dev *dev) @@ -530,6 +539,25 @@ int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) } EXPORT_SYMBOL_GPL(mlx5_query_port_pfc); +void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, + u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + + *an_status = 0; + *an_disable_cap = 0; + *an_disable_admin = 0; + + if (mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, 1)) + return; + + *an_status = MLX5_GET(ptys_reg, out, an_status); + *an_disable_cap = MLX5_GET(ptys_reg, out, an_disable_cap); + *an_disable_admin = MLX5_GET(ptys_reg, out, an_disable_admin); +} +EXPORT_SYMBOL_GPL(mlx5_query_port_autoneg); + int mlx5_max_tc(struct mlx5_core_dev *mdev) { u8 num_tc = MLX5_CAP_GEN(mdev, max_tc) ? : 8; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index 4adfac15f0e9..e3012cc64b8a 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -47,6 +47,14 @@ enum mlx5_module_id { MLX5_MODULE_ID_QSFP28 = 0x11, }; +enum mlx5_an_status { + MLX5_AN_UNAVAILABLE = 0, + MLX5_AN_COMPLETE = 1, + MLX5_AN_FAILED = 2, + MLX5_AN_LINK_UP = 3, + MLX5_AN_LINK_DOWN = 4, +}; + #define MLX5_EEPROM_MAX_BYTES 32 #define MLX5_EEPROM_IDENTIFIER_BYTE_MASK 0x000000ff #define MLX5_I2C_ADDR_LOW 0x50 @@ -65,14 +73,17 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, u8 *proto_oper, int proto_mask, u8 local_port); -int mlx5_set_port_proto(struct mlx5_core_dev *dev, u32 proto_admin, - int proto_mask); +int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, + u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration); +void mlx5_query_port_autoneg(struct mlx5_core_dev *dev, int proto_mask, + u8 *an_status, + u8 *an_disable_cap, u8 *an_disable_admin); int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port);