net/mlx5: Fix sync reset event handler error flow
When sync reset now event handling fails on mlx5_pci_link_toggle() then no reset was done. However, since mlx5_cmd_fast_teardown_hca() was already done, the firmware function is closed and the driver is left without firmware functionality. Fix it by setting device error state and reopen the firmware resources. Reopening is done by the thread that was called for devlink reload fw_activate as it already holds the devlink lock. Fixes: 5ec697446f46 ("net/mlx5: Add support for devlink reload action fw activate") Signed-off-by: Moshe Shemesh <moshe@nvidia.com> Reviewed-by: Aya Levin <ayal@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
This commit is contained in:
parent
6d942e4044
commit
e1ad07b922
@ -9,7 +9,8 @@ enum {
|
||||
MLX5_FW_RESET_FLAGS_RESET_REQUESTED,
|
||||
MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST,
|
||||
MLX5_FW_RESET_FLAGS_PENDING_COMP,
|
||||
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS
|
||||
MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS,
|
||||
MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED
|
||||
};
|
||||
|
||||
struct mlx5_fw_reset {
|
||||
@ -406,7 +407,7 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
|
||||
err = mlx5_pci_link_toggle(dev);
|
||||
if (err) {
|
||||
mlx5_core_warn(dev, "mlx5_pci_link_toggle failed, no reset done, err %d\n", err);
|
||||
goto done;
|
||||
set_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags);
|
||||
}
|
||||
|
||||
mlx5_enter_error_state(dev, true);
|
||||
@ -482,6 +483,10 @@ int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev)
|
||||
goto out;
|
||||
}
|
||||
err = fw_reset->ret;
|
||||
if (test_and_clear_bit(MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, &fw_reset->reset_flags)) {
|
||||
mlx5_unload_one_devl_locked(dev);
|
||||
mlx5_load_one_devl_locked(dev, false);
|
||||
}
|
||||
out:
|
||||
clear_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags);
|
||||
return err;
|
||||
|
Loading…
x
Reference in New Issue
Block a user