nvme: fix regression when disconnect a recovering ctrl
[ Upstream commit f7f70f4aa09dc43d7455c060143e86a017c30548 ] We encountered a problem that the disconnect command hangs. After analyzing the log and stack, we found that the triggering process is as follows: CPU0 CPU1 nvme_rdma_error_recovery_work nvme_rdma_teardown_io_queues nvme_do_delete_ctrl nvme_stop_queues nvme_remove_namespaces --clear ctrl->namespaces nvme_start_queues --no ns in ctrl->namespaces nvme_ns_remove return(because ctrl is deleting) blk_freeze_queue blk_mq_freeze_queue_wait --wait for ns to unquiesce to clean infligt IO, hang forever This problem was not found in older kernels because we will flush err work in nvme_stop_ctrl before nvme_remove_namespaces.It does not seem to be modified for functional reasons, the patch can be revert to solve the problem. Revert commit 794a4cb3d2f7 ("nvme: remove the .stop_ctrl callout") Signed-off-by: Ruozhu Li <liruozhu@huawei.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
1e4427aa2f
commit
7a2294c5f2
@ -4385,6 +4385,8 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl)
|
||||
nvme_stop_failfast_work(ctrl);
|
||||
flush_work(&ctrl->async_event_work);
|
||||
cancel_work_sync(&ctrl->fw_act_work);
|
||||
if (ctrl->ops->stop_ctrl)
|
||||
ctrl->ops->stop_ctrl(ctrl);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
|
||||
|
||||
|
@ -495,6 +495,7 @@ struct nvme_ctrl_ops {
|
||||
void (*free_ctrl)(struct nvme_ctrl *ctrl);
|
||||
void (*submit_async_event)(struct nvme_ctrl *ctrl);
|
||||
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
|
||||
void (*stop_ctrl)(struct nvme_ctrl *ctrl);
|
||||
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
|
||||
};
|
||||
|
||||
|
@ -1049,6 +1049,14 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||
|
||||
cancel_work_sync(&ctrl->err_work);
|
||||
cancel_delayed_work_sync(&ctrl->reconnect_work);
|
||||
}
|
||||
|
||||
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
|
||||
@ -2230,9 +2238,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = {
|
||||
|
||||
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
|
||||
{
|
||||
cancel_work_sync(&ctrl->err_work);
|
||||
cancel_delayed_work_sync(&ctrl->reconnect_work);
|
||||
|
||||
nvme_rdma_teardown_io_queues(ctrl, shutdown);
|
||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||
if (shutdown)
|
||||
@ -2282,6 +2287,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
|
||||
.submit_async_event = nvme_rdma_submit_async_event,
|
||||
.delete_ctrl = nvme_rdma_delete_ctrl,
|
||||
.get_address = nvmf_get_address,
|
||||
.stop_ctrl = nvme_rdma_stop_ctrl,
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -2163,9 +2163,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
|
||||
|
||||
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
|
||||
{
|
||||
cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
|
||||
cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
|
||||
|
||||
nvme_tcp_teardown_io_queues(ctrl, shutdown);
|
||||
blk_mq_quiesce_queue(ctrl->admin_q);
|
||||
if (shutdown)
|
||||
@ -2205,6 +2202,12 @@ out_fail:
|
||||
nvme_tcp_reconnect_or_remove(ctrl);
|
||||
}
|
||||
|
||||
static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
|
||||
{
|
||||
cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
|
||||
cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
|
||||
}
|
||||
|
||||
static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
|
||||
{
|
||||
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
|
||||
@ -2528,6 +2531,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
|
||||
.submit_async_event = nvme_tcp_submit_async_event,
|
||||
.delete_ctrl = nvme_tcp_delete_ctrl,
|
||||
.get_address = nvmf_get_address,
|
||||
.stop_ctrl = nvme_tcp_stop_ctrl,
|
||||
};
|
||||
|
||||
static bool
|
||||
|
Loading…
x
Reference in New Issue
Block a user