nvme-tcp: stop auth work after tearing down queues in error recovery

[ Upstream commit 1f1a4f89562d3b33b6ca4fc8a4f3bd4cd35ab4ea ] when starting error recovery there might be a authentication work running, and it involves I/O commands. Given the controller is tearing down there is no chance for the I/O to complete other than timing out which may unnecessarily take a full io timeout. So first tear down the queues, fail/cancel all inflight I/O (including potentially authentication) and only then stop authentication. This ensures that failover is not stalled due to blocked authentication I/O. Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Sasha Levin <sashal@kernel.org>
2022-11-13 13:24:23 +02:00 · 2022-11-13 13:24:23 +02:00 · e94e1ea596
commit e94e1ea596
parent 513e4b876e
1 changed files with 1 additions and 1 deletions
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@ -2128,7 +2128,6 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 				struct nvme_tcp_ctrl, err_work);
 	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;

-	nvme_auth_stop(ctrl);
 	nvme_stop_keep_alive(ctrl);
 	flush_work(&ctrl->async_event_work);
 	nvme_tcp_teardown_io_queues(ctrl, false);
@ -2136,6 +2135,7 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work)
 	nvme_start_queues(ctrl);
 	nvme_tcp_teardown_admin_queue(ctrl, false);
 	nvme_start_admin_queue(ctrl);
+	nvme_auth_stop(ctrl);

 	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
 		/* state change failure is ok if we started ctrl delete */