diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 106513f772c3..6f290319b617 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1378,8 +1378,10 @@ static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) tx_ring->cons, tx_ring->prod); priv->port_stats.tx_timeout++; - en_dbg(DRV, priv, "Scheduling watchdog\n"); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) { + en_dbg(DRV, priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); + } } @@ -1733,6 +1735,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_deactivate_cq(priv, cq); goto tx_err; } + clear_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &tx_ring->state); if (t != TX_XDP) { tx_ring->tx_queue = netdev_get_tx_queue(dev, i); tx_ring->recycle_ring = NULL; @@ -1829,6 +1832,7 @@ int mlx4_en_start_port(struct net_device *dev) local_bh_enable(); } + clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state); netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -1999,7 +2003,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - watchdog_task); + restart_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; @@ -2377,7 +2381,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (netif_running(dev)) { mutex_lock(&mdev->state_lock); if (!mdev->device_up) { - /* NIC is probably restarting - let watchdog task reset + /* NIC is probably restarting - let restart task reset * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { @@ -2386,7 +2390,9 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, + &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); } } mutex_unlock(&mdev->state_lock); @@ -2792,7 +2798,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (err) { en_err(priv, "Failed starting port %d for XDP change\n", priv->port); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); } } @@ -3165,7 +3172,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); - INIT_WORK(&priv->watchdog_task, mlx4_en_restart); + INIT_WORK(&priv->restart_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ddb7268e415..59b097cda327 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -392,6 +392,35 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } +static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe, + u16 cqe_index, struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_info *tx_info; + struct mlx4_en_tx_desc *tx_desc; + u16 wqe_index; + int desc_size; + + en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n", + ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe), + false); + + wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask; + tx_info = &ring->tx_info[wqe_index]; + desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE; + en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn, + wqe_index, desc_size); + tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false); + + if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + return; + + en_err(priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); +} + int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int napi_budget) { @@ -438,13 +467,10 @@ int mlx4_en_process_tx_cq(struct net_device *dev, dma_rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == - MLX4_CQE_OPCODE_ERROR)) { - struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe; - - en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n", - cqe_err->vendor_err_syndrome, - cqe_err->syndrome); - } + MLX4_CQE_OPCODE_ERROR)) + if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state)) + mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index, + ring); /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a46efe37cfa9..30378e4c90b5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -271,6 +271,10 @@ struct mlx4_en_page_cache { } buf[MLX4_EN_CACHE_SIZE]; }; +enum { + MLX4_EN_TX_RING_STATE_RECOVERING, +}; + struct mlx4_en_priv; struct mlx4_en_tx_ring { @@ -317,6 +321,7 @@ struct mlx4_en_tx_ring { * Only queue_stopped might be used if BQL is not properly working. */ unsigned long queue_stopped; + unsigned long state; struct mlx4_hwq_resources sp_wqres; struct mlx4_qp sp_qp; struct mlx4_qp_context sp_context; @@ -530,6 +535,10 @@ struct mlx4_en_stats_bitmap { struct mutex mutex; /* for mutual access to stats bitmap */ }; +enum { + MLX4_EN_STATE_FLAG_RESTARTING, +}; + struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; @@ -595,7 +604,7 @@ struct mlx4_en_priv { struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; - struct work_struct watchdog_task; + struct work_struct restart_task; struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; @@ -641,6 +650,7 @@ struct mlx4_en_priv { u32 pflags; u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; u8 rss_hash_fn; + unsigned long state; }; enum mlx4_en_wol {