mlx5-fixes-2022-11-21

-----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEGhZs6bAKwk/OTgTpSD+KveBX+j4FAmN8MIsACgkQSD+KveBX
 +j6HLAf+JYTnln3GG5Keifdzfnl2kuA8CzdqvAbK4K+0xcYzAMqyu3/gx+THFh6+
 SkF6ZBmn9vxDEilZXA9D9g3BmxoHZARP0LOCDa+CWpzYY1IoGVL2JMMUqJ4TRscC
 dp0KnGX8FzsMwIxfqQaULZsjd2v90xrBPiyGfNY7ydprKvyLbpPbSvbkVoa/APo6
 vXxXTIoK7cepLF/nLwBqLhDrmt/asGcOhcVFXDLKMCYG/TvPelzRRmFvgh/yW3Zn
 p6d8DO+2Jd3oPujS93WgLEVzGvfO8Bkwx2b4WVvFo+GoZ7g9EeZoQqDhihVnq6i8
 uX4JhYqb0Dp41TyJJ/OrpFeM9ko+6g==
 =SMAf
 -----END PGP SIGNATURE-----

Merge tag 'mlx5-fixes-2022-11-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux

Saeed Mahameed says:

====================
mlx5 fixes 2022-11-21

This series provides bug fixes to mlx5 driver.

* tag 'mlx5-fixes-2022-11-21' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux:
  net/mlx5e: Fix possible race condition in macsec extended packet number update routine
  net/mlx5e: Fix MACsec update SecY
  net/mlx5e: Fix MACsec SA initialization routine
  net/mlx5e: Remove leftovers from old XSK queues enumeration
  net/mlx5e: Offload rule only when all encaps are valid
  net/mlx5e: Fix missing alignment in size of MTT/KLM entries
  net/mlx5: Fix sync reset event handler error flow
  net/mlx5: E-Switch, Set correctly vport destination
  net/mlx5: Lag, avoid lockdep warnings
  net/mlx5: Fix handling of entry refcount when command is not issued to FW
  net/mlx5: cmdif, Print info on any firmware cmd failure to tracepoint
  net/mlx5: SF: Fix probing active SFs during driver probe phase
  net/mlx5: Fix FW tracer timestamp calculation
  net/mlx5: Do not query pci info while pci disabled
====================

Link: https://lore.kernel.org/r/20221122022559.89459-1-saeed@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
Jakub Kicinski 2022-11-22 20:20:58 -08:00
commit 178a4ff119
18 changed files with 285 additions and 122 deletions

View File

@ -45,6 +45,8 @@
#include "mlx5_core.h"
#include "lib/eq.h"
#include "lib/tout.h"
#define CREATE_TRACE_POINTS
#include "diag/cmd_tracepoint.h"
enum {
CMD_IF_REV = 5,
@ -785,27 +787,14 @@ EXPORT_SYMBOL(mlx5_cmd_out_err);
static void cmd_status_print(struct mlx5_core_dev *dev, void *in, void *out)
{
u16 opcode, op_mod;
u32 syndrome;
u8 status;
u16 uid;
int err;
syndrome = MLX5_GET(mbox_out, out, syndrome);
status = MLX5_GET(mbox_out, out, status);
opcode = MLX5_GET(mbox_in, in, opcode);
op_mod = MLX5_GET(mbox_in, in, op_mod);
uid = MLX5_GET(mbox_in, in, uid);
err = cmd_status_to_err(status);
if (!uid && opcode != MLX5_CMD_OP_DESTROY_MKEY)
mlx5_cmd_out_err(dev, opcode, op_mod, out);
else
mlx5_core_dbg(dev,
"%s(0x%x) op_mod(0x%x) uid(%d) failed, status %s(0x%x), syndrome (0x%x), err(%d)\n",
mlx5_command_str(opcode), opcode, op_mod, uid,
cmd_status_str(status), status, syndrome, err);
}
int mlx5_cmd_check(struct mlx5_core_dev *dev, int err, void *in, void *out)
@ -1016,6 +1005,7 @@ static void cmd_work_handler(struct work_struct *work)
cmd_ent_get(ent);
set_bit(MLX5_CMD_ENT_STATE_PENDING_COMP, &ent->state);
cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* Skip sending command to fw if internal error */
if (mlx5_cmd_is_down(dev) || !opcode_allowed(&dev->cmd, ent->op)) {
ent->ret = -ENXIO;
@ -1023,7 +1013,6 @@ static void cmd_work_handler(struct work_struct *work)
return;
}
cmd_ent_get(ent); /* for the _real_ FW event on completion */
/* ring doorbell after the descriptor is valid */
mlx5_core_dbg(dev, "writing 0x%x to command doorbell\n", 1 << ent->idx);
wmb();
@ -1672,8 +1661,8 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
cmd_ent_put(ent); /* timeout work was canceled */
if (!forced || /* Real FW completion */
pci_channel_offline(dev->pdev) || /* FW is inaccessible */
dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
mlx5_cmd_is_down(dev) || /* No real FW completion is expected */
!opcode_allowed(cmd, ent->op))
cmd_ent_put(ent);
ent->ts2 = ktime_get_ns();
@ -1892,6 +1881,16 @@ out_in:
return err;
}
static void mlx5_cmd_err_trace(struct mlx5_core_dev *dev, u16 opcode, u16 op_mod, void *out)
{
u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
u8 status = MLX5_GET(mbox_out, out, status);
trace_mlx5_cmd(mlx5_command_str(opcode), opcode, op_mod,
cmd_status_str(status), status, syndrome,
cmd_status_to_err(status));
}
static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
u32 syndrome, int err)
{
@ -1914,7 +1913,7 @@ static void cmd_status_log(struct mlx5_core_dev *dev, u16 opcode, u8 status,
}
/* preserve -EREMOTEIO for outbox.status != OK, otherwise return err as is */
static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *out)
static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, u16 op_mod, void *out)
{
u32 syndrome = MLX5_GET(mbox_out, out, syndrome);
u8 status = MLX5_GET(mbox_out, out, status);
@ -1922,8 +1921,10 @@ static int cmd_status_err(struct mlx5_core_dev *dev, int err, u16 opcode, void *
if (err == -EREMOTEIO) /* -EREMOTEIO is preserved */
err = -EIO;
if (!err && status != MLX5_CMD_STAT_OK)
if (!err && status != MLX5_CMD_STAT_OK) {
err = -EREMOTEIO;
mlx5_cmd_err_trace(dev, opcode, op_mod, out);
}
cmd_status_log(dev, opcode, status, syndrome, err);
return err;
@ -1951,9 +1952,9 @@ int mlx5_cmd_do(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, false);
u16 opcode = MLX5_GET(mbox_in, in, opcode);
u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
err = cmd_status_err(dev, err, opcode, out);
return err;
return cmd_status_err(dev, err, opcode, op_mod, out);
}
EXPORT_SYMBOL(mlx5_cmd_do);
@ -1997,8 +1998,9 @@ int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
{
int err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL, true);
u16 opcode = MLX5_GET(mbox_in, in, opcode);
u16 op_mod = MLX5_GET(mbox_in, in, op_mod);
err = cmd_status_err(dev, err, opcode, out);
err = cmd_status_err(dev, err, opcode, op_mod, out);
return mlx5_cmd_check(dev, err, in, out);
}
EXPORT_SYMBOL(mlx5_cmd_exec_polling);
@ -2034,7 +2036,7 @@ static void mlx5_cmd_exec_cb_handler(int status, void *_work)
struct mlx5_async_ctx *ctx;
ctx = work->ctx;
status = cmd_status_err(ctx->dev, status, work->opcode, work->out);
status = cmd_status_err(ctx->dev, status, work->opcode, work->op_mod, work->out);
work->user_callback(status, work);
if (atomic_dec_and_test(&ctx->num_inflight))
complete(&ctx->inflight_done);
@ -2049,6 +2051,7 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
work->ctx = ctx;
work->user_callback = callback;
work->opcode = MLX5_GET(mbox_in, in, opcode);
work->op_mod = MLX5_GET(mbox_in, in, op_mod);
work->out = out;
if (WARN_ON(!atomic_inc_not_zero(&ctx->num_inflight)))
return -EIO;

View File

@ -0,0 +1,45 @@
/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mlx5
#if !defined(_MLX5_CMD_TP_H_) || defined(TRACE_HEADER_MULTI_READ)
#define _MLX5_CMD_TP_H_
#include <linux/tracepoint.h>
#include <linux/trace_seq.h>
TRACE_EVENT(mlx5_cmd,
TP_PROTO(const char *command_str, u16 opcode, u16 op_mod,
const char *status_str, u8 status, u32 syndrome, int err),
TP_ARGS(command_str, opcode, op_mod, status_str, status, syndrome, err),
TP_STRUCT__entry(__string(command_str, command_str)
__field(u16, opcode)
__field(u16, op_mod)
__string(status_str, status_str)
__field(u8, status)
__field(u32, syndrome)
__field(int, err)
),
TP_fast_assign(__assign_str(command_str, command_str);
__entry->opcode = opcode;
__entry->op_mod = op_mod;
__assign_str(status_str, status_str);
__entry->status = status;
__entry->syndrome = syndrome;
__entry->err = err;
),
TP_printk("%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x), err(%d)",
__get_str(command_str), __entry->opcode, __entry->op_mod,
__get_str(status_str), __entry->status, __entry->syndrome,
__entry->err)
);
#endif /* _MLX5_CMD_TP_H_ */
#undef TRACE_INCLUDE_PATH
#define TRACE_INCLUDE_PATH ./diag
#undef TRACE_INCLUDE_FILE
#define TRACE_INCLUDE_FILE cmd_tracepoint
#include <trace/define_trace.h>

View File

@ -638,7 +638,7 @@ static void mlx5_tracer_handle_timestamp_trace(struct mlx5_fw_tracer *tracer,
trace_timestamp = (timestamp_event.timestamp & MASK_52_7) |
(str_frmt->timestamp & MASK_6_0);
else
trace_timestamp = ((timestamp_event.timestamp & MASK_52_7) - 1) |
trace_timestamp = ((timestamp_event.timestamp - 1) & MASK_52_7) |
(str_frmt->timestamp & MASK_6_0);
mlx5_tracer_print_trace(str_frmt, dev, trace_timestamp);

View File

@ -224,15 +224,16 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
list_for_each_entry(flow, flow_list, tmp_list) {
if (!mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, SLOW))
continue;
spec = &flow->attr->parse_attr->spec;
/* update from encap rule to slow path rule */
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
/* mark the flow's encap dest as non-valid */
esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
/* update from encap rule to slow path rule */
spec = &flow->attr->parse_attr->spec;
rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
if (IS_ERR(rule)) {
err = PTR_ERR(rule);
@ -251,6 +252,7 @@ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
/* we know that the encap is valid */
e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
e->pkt_reformat = NULL;
}
static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
@ -762,8 +764,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
struct net_device **encap_dev,
bool *encap_valid)
struct net_device **encap_dev)
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5e_tc_flow_parse_attr *parse_attr;
@ -878,9 +879,8 @@ attach_flow:
if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
*encap_valid = true;
} else {
*encap_valid = false;
flow_flag_set(flow, SLOW);
}
mutex_unlock(&esw->offloads.encap_tbl_lock);

View File

@ -17,8 +17,7 @@ int mlx5e_attach_encap(struct mlx5e_priv *priv,
struct net_device *mirred_dev,
int out_index,
struct netlink_ext_ack *extack,
struct net_device **encap_dev,
bool *encap_valid);
struct net_device **encap_dev);
int mlx5e_attach_decap(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,

View File

@ -368,15 +368,15 @@ static int mlx5e_macsec_init_sa(struct macsec_context *ctx,
obj_attrs.aso_pdn = macsec->aso.pdn;
obj_attrs.epn_state = sa->epn_state;
if (is_tx) {
obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci);
key = &ctx->sa.tx_sa->key;
} else {
obj_attrs.ssci = cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci);
key = &ctx->sa.rx_sa->key;
key = (is_tx) ? &ctx->sa.tx_sa->key : &ctx->sa.rx_sa->key;
if (sa->epn_state.epn_enabled) {
obj_attrs.ssci = (is_tx) ? cpu_to_be32((__force u32)ctx->sa.tx_sa->ssci) :
cpu_to_be32((__force u32)ctx->sa.rx_sa->ssci);
memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt));
}
memcpy(&obj_attrs.salt, &key->salt, sizeof(key->salt));
obj_attrs.replay_window = ctx->secy->replay_window;
obj_attrs.replay_protect = ctx->secy->replay_protect;
@ -1155,7 +1155,7 @@ static int macsec_upd_secy_hw_address(struct macsec_context *ctx,
continue;
if (rx_sa->active) {
err = mlx5e_macsec_init_sa(ctx, rx_sa, false, false);
err = mlx5e_macsec_init_sa(ctx, rx_sa, true, false);
if (err)
goto out;
}
@ -1536,6 +1536,8 @@ static void macsec_async_event(struct work_struct *work)
async_work = container_of(work, struct mlx5e_macsec_async_work, work);
macsec = async_work->macsec;
mutex_lock(&macsec->lock);
mdev = async_work->mdev;
obj_id = async_work->obj_id;
macsec_sa = get_macsec_tx_sa_from_obj_id(macsec, obj_id);
@ -1557,6 +1559,7 @@ static void macsec_async_event(struct work_struct *work)
out_async_work:
kfree(async_work);
mutex_unlock(&macsec->lock);
}
static int macsec_obj_change_event(struct notifier_block *nb, unsigned long event, void *data)

View File

@ -35,7 +35,6 @@
#include "en.h"
#include "en/port.h"
#include "en/params.h"
#include "en/xsk/pool.h"
#include "en/ptp.h"
#include "lib/clock.h"
#include "en/fs_ethtool.h"
@ -412,15 +411,8 @@ void mlx5e_ethtool_get_channels(struct mlx5e_priv *priv,
struct ethtool_channels *ch)
{
mutex_lock(&priv->state_lock);
ch->max_combined = priv->max_nch;
ch->combined_count = priv->channels.params.num_channels;
if (priv->xsk.refcnt) {
/* The upper half are XSK queues. */
ch->max_combined *= 2;
ch->combined_count *= 2;
}
mutex_unlock(&priv->state_lock);
}
@ -454,16 +446,6 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
mutex_lock(&priv->state_lock);
/* Don't allow changing the number of channels if there is an active
* XSK, because the numeration of the XSK and regular RQs will change.
*/
if (priv->xsk.refcnt) {
err = -EINVAL;
netdev_err(priv->netdev, "%s: AF_XDP is active, cannot change the number of channels\n",
__func__);
goto out;
}
/* Don't allow changing the number of channels if HTB offload is active,
* because the numeration of the QoS SQs will change, while per-queue
* qdiscs are attached.

View File

@ -206,10 +206,11 @@ static void mlx5e_disable_blocking_events(struct mlx5e_priv *priv)
static u16 mlx5e_mpwrq_umr_octowords(u32 entries, enum mlx5e_mpwrq_umr_mode umr_mode)
{
u8 umr_entry_size = mlx5e_mpwrq_umr_entry_size(umr_mode);
u32 sz;
WARN_ON_ONCE(entries * umr_entry_size % MLX5_OCTWORD);
sz = ALIGN(entries * umr_entry_size, MLX5_UMR_MTT_ALIGNMENT);
return entries * umr_entry_size / MLX5_OCTWORD;
return sz / MLX5_OCTWORD;
}
static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,

View File

@ -1634,7 +1634,6 @@ set_encap_dests(struct mlx5e_priv *priv,
struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr,
struct netlink_ext_ack *extack,
bool *encap_valid,
bool *vf_tun)
{
struct mlx5e_tc_flow_parse_attr *parse_attr;
@ -1651,7 +1650,6 @@ set_encap_dests(struct mlx5e_priv *priv,
parse_attr = attr->parse_attr;
esw_attr = attr->esw_attr;
*vf_tun = false;
*encap_valid = true;
for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
struct net_device *out_dev;
@ -1668,7 +1666,7 @@ set_encap_dests(struct mlx5e_priv *priv,
goto out;
}
err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
extack, &encap_dev, encap_valid);
extack, &encap_dev);
dev_put(out_dev);
if (err)
goto out;
@ -1732,8 +1730,8 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
struct mlx5e_tc_flow_parse_attr *parse_attr;
struct mlx5_flow_attr *attr = flow->attr;
struct mlx5_esw_flow_attr *esw_attr;
bool vf_tun, encap_valid;
u32 max_prio, max_chain;
bool vf_tun;
int err = 0;
parse_attr = attr->parse_attr;
@ -1823,7 +1821,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
esw_attr->int_port = int_port;
}
err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
err = set_encap_dests(priv, flow, attr, extack, &vf_tun);
if (err)
goto err_out;
@ -1853,7 +1851,7 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
* (1) there's no error
* (2) there's an encap action and we don't have valid neigh
*/
if (!encap_valid || flow_flag_test(flow, SLOW))
if (flow_flag_test(flow, SLOW))
flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
else
flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
@ -3759,7 +3757,7 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
struct mlx5e_post_act *post_act = get_post_action(flow->priv);
struct mlx5_flow_attr *attr, *next_attr = NULL;
struct mlx5e_post_act_handle *handle;
bool vf_tun, encap_valid = true;
bool vf_tun;
int err;
/* This is going in reverse order as needed.
@ -3781,13 +3779,10 @@ alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
if (list_is_last(&attr->list, &flow->attrs))
break;
err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun);
err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
if (err)
goto out_free;
if (!encap_valid)
flow_flag_set(flow, SLOW);
err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
if (err)
goto out_free;

View File

@ -433,7 +433,7 @@ esw_setup_vport_dest(struct mlx5_flow_destination *dest, struct mlx5_flow_act *f
mlx5_lag_mpesw_is_activated(esw->dev))
dest[dest_idx].type = MLX5_FLOW_DESTINATION_TYPE_UPLINK;
}
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP) {
if (esw_attr->dests[attr_idx].flags & MLX5_ESW_DEST_ENCAP_VALID) {
if (pkt_reformat) {
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
flow_act->pkt_reformat = esw_attr->dests[attr_idx].pkt_reformat;

View File

@ -9,7 +9,8 @@ enum {
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
MLX5_FW_RESET_FLAGS_PENDING_COMP,
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
};
struct mlx5_fw_reset {
@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
err = mlx5_pci_link_toggle(dev);
if (err) {
mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
goto done;
set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
}
mlx5_enter_error_state(dev, true);
@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
goto out;
}
err = fw_reset->ret;
if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) {
mlx5_unload_one_devl_locked(dev);
mlx5_load_one_devl_locked(dev, false);
}
out:
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
return err;

View File

@ -228,9 +228,8 @@ static void mlx5_ldev_free(struct kref *ref)
if (ldev->nb.notifier_call)
unregister_netdevice_notifier_net(&init_net, &ldev->nb);
mlx5_lag_mp_cleanup(ldev);
mlx5_lag_mpesw_cleanup(ldev);
cancel_work_sync(&ldev->mpesw_work);
destroy_workqueue(ldev->wq);
mlx5_lag_mpesw_cleanup(ldev);
mutex_destroy(&ldev->lock);
kfree(ldev);
}

View File

@ -50,6 +50,19 @@ struct lag_tracker {
enum netdev_lag_hash hash_type;
};
enum mpesw_op {
MLX5_MPESW_OP_ENABLE,
MLX5_MPESW_OP_DISABLE,
};
struct mlx5_mpesw_work_st {
struct work_struct work;
struct mlx5_lag *lag;
enum mpesw_op op;
struct completion comp;
int result;
};
/* LAG data of a ConnectX card.
* It serves both its phys functions.
*/
@ -66,7 +79,6 @@ struct mlx5_lag {
struct lag_tracker tracker;
struct workqueue_struct *wq;
struct delayed_work bond_work;
struct work_struct mpesw_work;
struct notifier_block nb;
struct lag_mp lag_mp;
struct mlx5_lag_port_sel port_sel;

View File

@ -7,63 +7,95 @@
#include "eswitch.h"
#include "lib/mlx5.h"
void mlx5_mpesw_work(struct work_struct *work)
static int add_mpesw_rule(struct mlx5_lag *ldev)
{
struct mlx5_lag *ldev = container_of(work, struct mlx5_lag, mpesw_work);
struct mlx5_core_dev *dev = ldev->pf[MLX5_LAG_P1].dev;
int err;
mutex_lock(&ldev->lock);
mlx5_disable_lag(ldev);
mutex_unlock(&ldev->lock);
if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
return 0;
if (ldev->mode != MLX5_LAG_MODE_NONE) {
err = -EINVAL;
goto out_err;
}
err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
if (err) {
mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
goto out_err;
}
return 0;
out_err:
atomic_dec(&ldev->lag_mpesw.mpesw_rule_count);
return err;
}
static void mlx5_lag_disable_mpesw(struct mlx5_core_dev *dev)
static void del_mpesw_rule(struct mlx5_lag *ldev)
{
struct mlx5_lag *ldev = dev->priv.lag;
if (!queue_work(ldev->wq, &ldev->mpesw_work))
mlx5_core_warn(dev, "failed to queue work\n");
}
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = dev->priv.lag;
if (!ldev)
return;
mutex_lock(&ldev->lock);
if (!atomic_dec_return(&ldev->lag_mpesw.mpesw_rule_count) &&
ldev->mode == MLX5_LAG_MODE_MPESW)
mlx5_lag_disable_mpesw(dev);
mutex_unlock(&ldev->lock);
mlx5_disable_lag(ldev);
}
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
static void mlx5_mpesw_work(struct work_struct *work)
{
struct mlx5_mpesw_work_st *mpesww = container_of(work, struct mlx5_mpesw_work_st, work);
struct mlx5_lag *ldev = mpesww->lag;
mutex_lock(&ldev->lock);
if (mpesww->op == MLX5_MPESW_OP_ENABLE)
mpesww->result = add_mpesw_rule(ldev);
else if (mpesww->op == MLX5_MPESW_OP_DISABLE)
del_mpesw_rule(ldev);
mutex_unlock(&ldev->lock);
complete(&mpesww->comp);
}
static int mlx5_lag_mpesw_queue_work(struct mlx5_core_dev *dev,
enum mpesw_op op)
{
struct mlx5_lag *ldev = dev->priv.lag;
struct mlx5_mpesw_work_st *work;
int err = 0;
if (!ldev)
return 0;
mutex_lock(&ldev->lock);
if (atomic_add_return(1, &ldev->lag_mpesw.mpesw_rule_count) != 1)
goto out;
work = kzalloc(sizeof(*work), GFP_KERNEL);
if (!work)
return -ENOMEM;
if (ldev->mode != MLX5_LAG_MODE_NONE) {
INIT_WORK(&work->work, mlx5_mpesw_work);
init_completion(&work->comp);
work->op = op;
work->lag = ldev;
if (!queue_work(ldev->wq, &work->work)) {
mlx5_core_warn(dev, "failed to queue mpesw work\n");
err = -EINVAL;
goto out;
}
err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
if (err)
mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
wait_for_completion(&work->comp);
err = work->result;
out:
mutex_unlock(&ldev->lock);
kfree(work);
return err;
}
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev)
{
mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_DISABLE);
}
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
{
return mlx5_lag_mpesw_queue_work(dev, MLX5_MPESW_OP_ENABLE);
}
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
{
struct mlx5_lag *ldev = mdev->priv.lag;
@ -71,12 +103,9 @@ int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev)
if (!netif_is_bond_master(out_dev) || !ldev)
return 0;
mutex_lock(&ldev->lock);
if (ldev->mode == MLX5_LAG_MODE_MPESW) {
mutex_unlock(&ldev->lock);
if (ldev->mode == MLX5_LAG_MODE_MPESW)
return -EOPNOTSUPP;
}
mutex_unlock(&ldev->lock);
return 0;
}
@ -90,11 +119,10 @@ bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev)
void mlx5_lag_mpesw_init(struct mlx5_lag *ldev)
{
INIT_WORK(&ldev->mpesw_work, mlx5_mpesw_work);
atomic_set(&ldev->lag_mpesw.mpesw_rule_count, 0);
}
void mlx5_lag_mpesw_cleanup(struct mlx5_lag *ldev)
{
cancel_delayed_work_sync(&ldev->bond_work);
WARN_ON(atomic_read(&ldev->lag_mpesw.mpesw_rule_count));
}

View File

@ -12,7 +12,6 @@ struct lag_mpesw {
atomic_t mpesw_rule_count;
};
void mlx5_mpesw_work(struct work_struct *work);
int mlx5_lag_do_mirred(struct mlx5_core_dev *mdev, struct net_device *out_dev);
bool mlx5_lag_mpesw_is_activated(struct mlx5_core_dev *dev);
#if IS_ENABLED(CONFIG_MLX5_ESWITCH)

View File

@ -1798,7 +1798,8 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
res = state == pci_channel_io_perm_failure ?
PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
mlx5_pci_trace(dev, "Exit, result = %d, %s\n", res, result2str(res));
mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, result = %d, %s\n",
__func__, dev->state, dev->pci_status, res, result2str(res));
return res;
}
@ -1837,7 +1838,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
struct mlx5_core_dev *dev = pci_get_drvdata(pdev);
int err;
mlx5_pci_trace(dev, "Enter\n");
mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Enter\n",
__func__, dev->state, dev->pci_status);
err = mlx5_pci_enable_device(dev);
if (err) {
@ -1859,7 +1861,8 @@ static pci_ers_result_t mlx5_pci_slot_reset(struct pci_dev *pdev)
res = PCI_ERS_RESULT_RECOVERED;
out:
mlx5_pci_trace(dev, "Exit, err = %d, result = %d, %s\n", err, res, result2str(res));
mlx5_core_info(dev, "%s Device state = %d pci_status: %d. Exit, err = %d, result = %d, %s\n",
__func__, dev->state, dev->pci_status, err, res, result2str(res));
return res;
}

View File

@ -18,6 +18,10 @@ struct mlx5_sf_dev_table {
phys_addr_t base_address;
u64 sf_bar_length;
struct notifier_block nb;
struct mutex table_lock; /* Serializes sf life cycle and vhca state change handler */
struct workqueue_struct *active_wq;
struct work_struct work;
u8 stop_active_wq:1;
struct mlx5_core_dev *dev;
};
@ -168,6 +172,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
return 0;
sf_index = event->function_id - base_id;
mutex_lock(&table->table_lock);
sf_dev = xa_load(&table->devices, sf_index);
switch (event->new_vhca_state) {
case MLX5_VHCA_STATE_INVALID:
@ -191,6 +196,7 @@ mlx5_sf_dev_state_change_handler(struct notifier_block *nb, unsigned long event_
default:
break;
}
mutex_unlock(&table->table_lock);
return 0;
}
@ -215,6 +221,78 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
return 0;
}
static void mlx5_sf_dev_add_active_work(struct work_struct *work)
{
struct mlx5_sf_dev_table *table = container_of(work, struct mlx5_sf_dev_table, work);
u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
struct mlx5_core_dev *dev = table->dev;
u16 max_functions;
u16 function_id;
u16 sw_func_id;
int err = 0;
u8 state;
int i;
max_functions = mlx5_sf_max_functions(dev);
function_id = MLX5_CAP_GEN(dev, sf_base_id);
for (i = 0; i < max_functions; i++, function_id++) {
if (table->stop_active_wq)
return;
err = mlx5_cmd_query_vhca_state(dev, function_id, out, sizeof(out));
if (err)
/* A failure of specific vhca doesn't mean others will
* fail as well.
*/
continue;
state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
if (state != MLX5_VHCA_STATE_ACTIVE)
continue;
sw_func_id = MLX5_GET(query_vhca_state_out, out, vhca_state_context.sw_function_id);
mutex_lock(&table->table_lock);
/* Don't probe device which is already probe */
if (!xa_load(&table->devices, i))
mlx5_sf_dev_add(dev, i, function_id, sw_func_id);
/* There is a race where SF got inactive after the query
* above. e.g.: the query returns that the state of the
* SF is active, and after that the eswitch manager set it to
* inactive.
* This case cannot be managed in SW, since the probing of the
* SF is on one system, and the inactivation is on a different
* system.
* If the inactive is done after the SF perform init_hca(),
* the SF will fully probe and then removed. If it was
* done before init_hca(), the SF probe will fail.
*/
mutex_unlock(&table->table_lock);
}
}
/* In case SFs are generated externally, probe active SFs */
static int mlx5_sf_dev_queue_active_work(struct mlx5_sf_dev_table *table)
{
if (MLX5_CAP_GEN(table->dev, eswitch_manager))
return 0; /* the table is local */
/* Use a workqueue to probe active SFs, which are in large
* quantity and may take up to minutes to probe.
*/
table->active_wq = create_singlethread_workqueue("mlx5_active_sf");
if (!table->active_wq)
return -ENOMEM;
INIT_WORK(&table->work, &mlx5_sf_dev_add_active_work);
queue_work(table->active_wq, &table->work);
return 0;
}
static void mlx5_sf_dev_destroy_active_work(struct mlx5_sf_dev_table *table)
{
if (table->active_wq) {
table->stop_active_wq = true;
destroy_workqueue(table->active_wq);
}
}
void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
{
struct mlx5_sf_dev_table *table;
@ -240,11 +318,17 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
table->base_address = pci_resource_start(dev->pdev, 2);
table->max_sfs = max_sfs;
xa_init(&table->devices);
mutex_init(&table->table_lock);
dev->priv.sf_dev_table = table;
err = mlx5_vhca_event_notifier_register(dev, &table->nb);
if (err)
goto vhca_err;
err = mlx5_sf_dev_queue_active_work(table);
if (err)
goto add_active_err;
err = mlx5_sf_dev_vhca_arm_all(table);
if (err)
goto arm_err;
@ -252,6 +336,8 @@ void mlx5_sf_dev_table_create(struct mlx5_core_dev *dev)
return;
arm_err:
mlx5_sf_dev_destroy_active_work(table);
add_active_err:
mlx5_vhca_event_notifier_unregister(dev, &table->nb);
vhca_err:
table->max_sfs = 0;
@ -279,7 +365,9 @@ void mlx5_sf_dev_table_destroy(struct mlx5_core_dev *dev)
if (!table)
return;
mlx5_sf_dev_destroy_active_work(table);
mlx5_vhca_event_notifier_unregister(dev, &table->nb);
mutex_destroy(&table->table_lock);
/* Now that event handler is not running, it is safe to destroy
* the sf device without race.

View File

@ -981,6 +981,7 @@ struct mlx5_async_work {
struct mlx5_async_ctx *ctx;
mlx5_async_cbk_t user_callback;
u16 opcode; /* cmd opcode */
u16 op_mod; /* cmd op_mod */
void *out; /* pointer to the cmd output buffer */
};