nvme-rdma: Support ctrl_loss_tmo
Before scheduling a reconnect attempt, check nr_reconnects against max_reconnects, if not exhausted (or max_reconnects is not -1), schedule a reconnect attempts, otherwise schedule ctrl removal. Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
42a45274c2
commit
fd8563ced8
@ -706,6 +706,26 @@ free_ctrl:
|
||||
kfree(ctrl);
|
||||
}
|
||||
|
||||
static void nvme_rdma_reconnect_or_remove(struct nvme_rdma_ctrl *ctrl)
|
||||
{
|
||||
/* If we are resetting/deleting then do nothing */
|
||||
if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) {
|
||||
WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW ||
|
||||
ctrl->ctrl.state == NVME_CTRL_LIVE);
|
||||
return;
|
||||
}
|
||||
|
||||
if (nvmf_should_reconnect(&ctrl->ctrl)) {
|
||||
dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
|
||||
ctrl->ctrl.opts->reconnect_delay);
|
||||
queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
|
||||
ctrl->ctrl.opts->reconnect_delay * HZ);
|
||||
} else {
|
||||
dev_info(ctrl->ctrl.device, "Removing controller...\n");
|
||||
queue_work(nvme_rdma_wq, &ctrl->delete_work);
|
||||
}
|
||||
}
|
||||
|
||||
static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
{
|
||||
struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
|
||||
@ -713,6 +733,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
bool changed;
|
||||
int ret;
|
||||
|
||||
++ctrl->ctrl.opts->nr_reconnects;
|
||||
|
||||
if (ctrl->queue_count > 1) {
|
||||
nvme_rdma_free_io_queues(ctrl);
|
||||
|
||||
@ -757,6 +779,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
|
||||
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
|
||||
WARN_ON_ONCE(!changed);
|
||||
ctrl->ctrl.opts->nr_reconnects = 0;
|
||||
|
||||
if (ctrl->queue_count > 1) {
|
||||
nvme_start_queues(&ctrl->ctrl);
|
||||
@ -771,13 +794,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
|
||||
stop_admin_q:
|
||||
blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
|
||||
requeue:
|
||||
/* Make sure we are not resetting/deleting */
|
||||
if (ctrl->ctrl.state == NVME_CTRL_RECONNECTING) {
|
||||
dev_info(ctrl->ctrl.device,
|
||||
"Failed reconnect attempt, requeueing...\n");
|
||||
queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
|
||||
ctrl->ctrl.opts->reconnect_delay * HZ);
|
||||
}
|
||||
dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
|
||||
ctrl->ctrl.opts->nr_reconnects);
|
||||
nvme_rdma_reconnect_or_remove(ctrl);
|
||||
}
|
||||
|
||||
static void nvme_rdma_error_recovery_work(struct work_struct *work)
|
||||
@ -804,11 +823,7 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
|
||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
||||
nvme_cancel_request, &ctrl->ctrl);
|
||||
|
||||
dev_info(ctrl->ctrl.device, "reconnecting in %d seconds\n",
|
||||
ctrl->ctrl.opts->reconnect_delay);
|
||||
|
||||
queue_delayed_work(nvme_rdma_wq, &ctrl->reconnect_work,
|
||||
ctrl->ctrl.opts->reconnect_delay * HZ);
|
||||
nvme_rdma_reconnect_or_remove(ctrl);
|
||||
}
|
||||
|
||||
static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
|
||||
@ -1986,7 +2001,7 @@ static struct nvmf_transport_ops nvme_rdma_transport = {
|
||||
.name = "rdma",
|
||||
.required_opts = NVMF_OPT_TRADDR,
|
||||
.allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
|
||||
NVMF_OPT_HOST_TRADDR,
|
||||
NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO,
|
||||
.create_ctrl = nvme_rdma_create_ctrl,
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user