RDMA/hns: Add the detection for CMDQ status in the device initialization process
[ Upstream commit e8ea058edc2b225a68b307057a65599625daaebf ] CMDQ may fail during HNS ROCEE initialization. The following is the log when the execution fails: hns3 0000:bd:00.2: In reset process RoCE client reinit. hns3 0000:bd:00.2: CMDQ move tail from 840 to 839 hns3 0000:bd:00.2 hns_2: failed to set gid, ret = -11! hns3 0000:bd:00.2: CMDQ move tail from 840 to 839 <...> hns3 0000:bd:00.2: CMDQ move tail from 840 to 839 hns3 0000:bd:00.2: CMDQ move tail from 840 to 0 hns3 0000:bd:00.2: [cmd]token 14e mailbox 20 timeout. hns3 0000:bd:00.2 hns_2: set HEM step 0 failed! hns3 0000:bd:00.2 hns_2: set HEM address to HW failed! hns3 0000:bd:00.2 hns_2: failed to alloc mtpt, ret = -16. infiniband hns_2: Couldn't create ib_mad PD infiniband hns_2: Couldn't open port 1 hns3 0000:bd:00.2: Reset done, RoCE client reinit finished. However, even if ib_mad client registration failed, ib_register_device() still returns success to the driver. In the device initialization process, CMDQ execution fails because HW/FW is abnormal. Therefore, if CMDQ fails, the initialization function should set CMDQ to a fatal error state and return a failure to the caller. Fixes: 9a4435375cd1 ("IB/hns: Add driver files for hns RoCE driver") Link: https://lore.kernel.org/r/20220429093104.26687-1-liangwenpeng@huawei.com Signed-off-by: Yangyang Li <liyangyang20@huawei.com> Signed-off-by: Wenpeng Liang <liangwenpeng@huawei.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
4237eb9bdf
commit
e42ceeca70
@ -559,6 +559,11 @@ struct hns_roce_cmd_context {
|
||||
u16 busy;
|
||||
};
|
||||
|
||||
enum hns_roce_cmdq_state {
|
||||
HNS_ROCE_CMDQ_STATE_NORMAL,
|
||||
HNS_ROCE_CMDQ_STATE_FATAL_ERR,
|
||||
};
|
||||
|
||||
struct hns_roce_cmdq {
|
||||
struct dma_pool *pool;
|
||||
struct semaphore poll_sem;
|
||||
@ -578,6 +583,7 @@ struct hns_roce_cmdq {
|
||||
* close device, switch into poll mode(non event mode)
|
||||
*/
|
||||
u8 use_events;
|
||||
enum hns_roce_cmdq_state state;
|
||||
};
|
||||
|
||||
struct hns_roce_cmd_mailbox {
|
||||
|
@ -1273,6 +1273,16 @@ static int hns_roce_cmq_csq_done(struct hns_roce_dev *hr_dev)
|
||||
return tail == priv->cmq.csq.head;
|
||||
}
|
||||
|
||||
static void update_cmdq_status(struct hns_roce_dev *hr_dev)
|
||||
{
|
||||
struct hns_roce_v2_priv *priv = hr_dev->priv;
|
||||
struct hnae3_handle *handle = priv->handle;
|
||||
|
||||
if (handle->rinfo.reset_state == HNS_ROCE_STATE_RST_INIT ||
|
||||
handle->rinfo.instance_state == HNS_ROCE_STATE_INIT)
|
||||
hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
|
||||
}
|
||||
|
||||
static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
struct hns_roce_cmq_desc *desc, int num)
|
||||
{
|
||||
@ -1326,6 +1336,8 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
csq->head, tail);
|
||||
csq->head = tail;
|
||||
|
||||
update_cmdq_status(hr_dev);
|
||||
|
||||
ret = -EAGAIN;
|
||||
}
|
||||
|
||||
@ -1340,6 +1352,9 @@ static int hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
|
||||
bool busy;
|
||||
int ret;
|
||||
|
||||
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
|
||||
return -EIO;
|
||||
|
||||
if (!v2_chk_mbox_is_avail(hr_dev, &busy))
|
||||
return busy ? -EBUSY : 0;
|
||||
|
||||
@ -1536,6 +1551,9 @@ static void hns_roce_function_clear(struct hns_roce_dev *hr_dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
|
||||
return;
|
||||
|
||||
for (i = hr_dev->func_num - 1; i >= 0; i--) {
|
||||
__hns_roce_function_clear(hr_dev, i);
|
||||
if (i != 0)
|
||||
@ -2818,6 +2836,9 @@ static int v2_wait_mbox_complete(struct hns_roce_dev *hr_dev, u32 timeout,
|
||||
mb_st = (struct hns_roce_mbox_status *)desc.data;
|
||||
end = msecs_to_jiffies(timeout) + jiffies;
|
||||
while (v2_chk_mbox_is_avail(hr_dev, &busy)) {
|
||||
if (hr_dev->cmd.state == HNS_ROCE_CMDQ_STATE_FATAL_ERR)
|
||||
return -EIO;
|
||||
|
||||
status = 0;
|
||||
hns_roce_cmq_setup_basic_desc(&desc, HNS_ROCE_OPC_QUERY_MB_ST,
|
||||
true);
|
||||
|
Loading…
x
Reference in New Issue
Block a user