nvme-fc: fix io timeout to abort I/O
Currently, an I/O timeout unconditionally invokes nvme_fc_error_recovery() which checks for LIVE or CONNECTING state. If live, the routine resets the controller which initiates a reconnect - which is valid. If CONNECTING, err_work is scheduled. Err_work then calls the terminate_io routine, which also checks for CONNECTING and noops any further action on outstanding I/O. The result is nothing happened to the timed out io. As such, if the command was dropped on the wire, it will never timeout / complete, and the connect process will hang. Change the behavior of the io timeout routine to unconditionally abort the I/O. I/O completion handling will note that an io failed due to an abort and will terminate the connection / association as needed. If the abort was unable to happen, continue with a call to nvme_fc_error_recovery(). To ensure something different happens in nvme_fc_error_recovery() rework it so at it will abort all I/Os on the association to force a failure. As I/O aborts now may occur outside of delete_association, counting for completion must be wary and only count those aborted during delete_association when TERMIO is set on the controller. Signed-off-by: James Smart <james.smart@broadcom.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
This commit is contained in:
parent
150dfb6c83
commit
52793d62a6
@ -1837,8 +1837,10 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
|
|||||||
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
|
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
|
||||||
if (opstate != FCPOP_STATE_ACTIVE)
|
if (opstate != FCPOP_STATE_ACTIVE)
|
||||||
atomic_set(&op->state, opstate);
|
atomic_set(&op->state, opstate);
|
||||||
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
|
else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
|
||||||
|
op->flags |= FCOP_FLAGS_TERMIO;
|
||||||
ctrl->iocnt++;
|
ctrl->iocnt++;
|
||||||
|
}
|
||||||
spin_unlock_irqrestore(&ctrl->lock, flags);
|
spin_unlock_irqrestore(&ctrl->lock, flags);
|
||||||
|
|
||||||
if (opstate != FCPOP_STATE_ACTIVE)
|
if (opstate != FCPOP_STATE_ACTIVE)
|
||||||
@ -1874,7 +1876,8 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
|
|||||||
|
|
||||||
if (opstate == FCPOP_STATE_ABORTED) {
|
if (opstate == FCPOP_STATE_ABORTED) {
|
||||||
spin_lock_irqsave(&ctrl->lock, flags);
|
spin_lock_irqsave(&ctrl->lock, flags);
|
||||||
if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
|
if (test_bit(FCCTRL_TERMIO, &ctrl->flags) &&
|
||||||
|
op->flags & FCOP_FLAGS_TERMIO) {
|
||||||
if (!--ctrl->iocnt)
|
if (!--ctrl->iocnt)
|
||||||
wake_up(&ctrl->ioabort_wait);
|
wake_up(&ctrl->ioabort_wait);
|
||||||
}
|
}
|
||||||
@ -2446,15 +2449,20 @@ nvme_fc_timeout(struct request *rq, bool reserved)
|
|||||||
{
|
{
|
||||||
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
|
||||||
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
||||||
|
struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
|
||||||
|
struct nvme_command *sqe = &cmdiu->sqe;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we can't individually ABTS an io without affecting the queue,
|
* Attempt to abort the offending command. Command completion
|
||||||
* thus killing the queue, and thus the association.
|
* will detect the aborted io and will fail the connection.
|
||||||
* So resolve by performing a controller reset, which will stop
|
|
||||||
* the host/io stack, terminate the association on the link,
|
|
||||||
* and recreate an association on the link.
|
|
||||||
*/
|
*/
|
||||||
nvme_fc_error_recovery(ctrl, "io timeout error");
|
dev_info(ctrl->ctrl.device,
|
||||||
|
"NVME-FC{%d.%d}: io timeout: opcode %d fctype %d w10/11: "
|
||||||
|
"x%08x/x%08x\n",
|
||||||
|
ctrl->cnum, op->queue->qnum, sqe->common.opcode,
|
||||||
|
sqe->connect.fctype, sqe->common.cdw10, sqe->common.cdw11);
|
||||||
|
if (__nvme_fc_abort_op(ctrl, op))
|
||||||
|
nvme_fc_error_recovery(ctrl, "io timeout abort failed");
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the io abort has been initiated. Have the reset timer
|
* the io abort has been initiated. Have the reset timer
|
||||||
@ -2726,6 +2734,7 @@ nvme_fc_complete_rq(struct request *rq)
|
|||||||
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
struct nvme_fc_ctrl *ctrl = op->ctrl;
|
||||||
|
|
||||||
atomic_set(&op->state, FCPOP_STATE_IDLE);
|
atomic_set(&op->state, FCPOP_STATE_IDLE);
|
||||||
|
op->flags &= ~FCOP_FLAGS_TERMIO;
|
||||||
|
|
||||||
nvme_fc_unmap_data(ctrl, rq, op);
|
nvme_fc_unmap_data(ctrl, rq, op);
|
||||||
nvme_complete_rq(rq);
|
nvme_complete_rq(rq);
|
||||||
@ -3090,6 +3099,61 @@ out_free_queue:
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This routine runs through all outstanding commands on the association
|
||||||
|
* and aborts them. This routine is typically be called by the
|
||||||
|
* delete_association routine. It is also called due to an error during
|
||||||
|
* reconnect. In that scenario, it is most likely a command that initializes
|
||||||
|
* the controller, including fabric Connect commands on io queues, that
|
||||||
|
* may have timed out or failed thus the io must be killed for the connect
|
||||||
|
* thread to see the error.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* If io queues are present, stop them and terminate all outstanding
|
||||||
|
* ios on them. As FC allocates FC exchange for each io, the
|
||||||
|
* transport must contact the LLDD to terminate the exchange,
|
||||||
|
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
|
||||||
|
* to tell us what io's are busy and invoke a transport routine
|
||||||
|
* to kill them with the LLDD. After terminating the exchange
|
||||||
|
* the LLDD will call the transport's normal io done path, but it
|
||||||
|
* will have an aborted status. The done path will return the
|
||||||
|
* io requests back to the block layer as part of normal completions
|
||||||
|
* (but with error status).
|
||||||
|
*/
|
||||||
|
if (ctrl->ctrl.queue_count > 1) {
|
||||||
|
nvme_stop_queues(&ctrl->ctrl);
|
||||||
|
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
||||||
|
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
||||||
|
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
|
||||||
|
if (start_queues)
|
||||||
|
nvme_start_queues(&ctrl->ctrl);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Other transports, which don't have link-level contexts bound
|
||||||
|
* to sqe's, would try to gracefully shutdown the controller by
|
||||||
|
* writing the registers for shutdown and polling (call
|
||||||
|
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
|
||||||
|
* just aborted and we will wait on those contexts, and given
|
||||||
|
* there was no indication of how live the controlelr is on the
|
||||||
|
* link, don't send more io to create more contexts for the
|
||||||
|
* shutdown. Let the controller fail via keepalive failure if
|
||||||
|
* its still present.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* clean up the admin queue. Same thing as above.
|
||||||
|
*/
|
||||||
|
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
||||||
|
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
||||||
|
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
||||||
|
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This routine stops operation of the controller on the host side.
|
* This routine stops operation of the controller on the host side.
|
||||||
* On the host os stack side: Admin and IO queues are stopped,
|
* On the host os stack side: Admin and IO queues are stopped,
|
||||||
@ -3110,46 +3174,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
|
|||||||
ctrl->iocnt = 0;
|
ctrl->iocnt = 0;
|
||||||
spin_unlock_irqrestore(&ctrl->lock, flags);
|
spin_unlock_irqrestore(&ctrl->lock, flags);
|
||||||
|
|
||||||
/*
|
__nvme_fc_abort_outstanding_ios(ctrl, false);
|
||||||
* If io queues are present, stop them and terminate all outstanding
|
|
||||||
* ios on them. As FC allocates FC exchange for each io, the
|
|
||||||
* transport must contact the LLDD to terminate the exchange,
|
|
||||||
* thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
|
|
||||||
* to tell us what io's are busy and invoke a transport routine
|
|
||||||
* to kill them with the LLDD. After terminating the exchange
|
|
||||||
* the LLDD will call the transport's normal io done path, but it
|
|
||||||
* will have an aborted status. The done path will return the
|
|
||||||
* io requests back to the block layer as part of normal completions
|
|
||||||
* (but with error status).
|
|
||||||
*/
|
|
||||||
if (ctrl->ctrl.queue_count > 1) {
|
|
||||||
nvme_stop_queues(&ctrl->ctrl);
|
|
||||||
blk_mq_tagset_busy_iter(&ctrl->tag_set,
|
|
||||||
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
|
||||||
blk_mq_tagset_wait_completed_request(&ctrl->tag_set);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Other transports, which don't have link-level contexts bound
|
|
||||||
* to sqe's, would try to gracefully shutdown the controller by
|
|
||||||
* writing the registers for shutdown and polling (call
|
|
||||||
* nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
|
|
||||||
* just aborted and we will wait on those contexts, and given
|
|
||||||
* there was no indication of how live the controlelr is on the
|
|
||||||
* link, don't send more io to create more contexts for the
|
|
||||||
* shutdown. Let the controller fail via keepalive failure if
|
|
||||||
* its still present.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* clean up the admin queue. Same thing as above.
|
|
||||||
* use blk_mq_tagset_busy_itr() and the transport routine to
|
|
||||||
* terminate the exchanges.
|
|
||||||
*/
|
|
||||||
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
|
|
||||||
blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
|
|
||||||
nvme_fc_terminate_exchange, &ctrl->ctrl);
|
|
||||||
blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set);
|
|
||||||
|
|
||||||
/* kill the aens as they are a separate path */
|
/* kill the aens as they are a separate path */
|
||||||
nvme_fc_abort_aen_ops(ctrl);
|
nvme_fc_abort_aen_ops(ctrl);
|
||||||
@ -3263,22 +3288,27 @@ static void
|
|||||||
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
|
__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* if state is connecting - the error occurred as part of a
|
* if state is CONNECTING - the error occurred as part of a
|
||||||
* reconnect attempt. The create_association error paths will
|
* reconnect attempt. Abort any ios on the association and
|
||||||
* clean up any outstanding io.
|
* let the create_association error paths resolve things.
|
||||||
*
|
|
||||||
* if it's a different state - ensure all pending io is
|
|
||||||
* terminated. Given this can delay while waiting for the
|
|
||||||
* aborted io to return, we recheck adapter state below
|
|
||||||
* before changing state.
|
|
||||||
*/
|
*/
|
||||||
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) {
|
if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) {
|
||||||
nvme_stop_keep_alive(&ctrl->ctrl);
|
__nvme_fc_abort_outstanding_ios(ctrl, true);
|
||||||
|
return;
|
||||||
/* will block will waiting for io to terminate */
|
|
||||||
nvme_fc_delete_association(ctrl);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For any other state, kill the association. As this routine
|
||||||
|
* is a common io abort routine for resetting and such, after
|
||||||
|
* the association is terminated, ensure that the state is set
|
||||||
|
* to CONNECTING.
|
||||||
|
*/
|
||||||
|
|
||||||
|
nvme_stop_keep_alive(&ctrl->ctrl);
|
||||||
|
|
||||||
|
/* will block will waiting for io to terminate */
|
||||||
|
nvme_fc_delete_association(ctrl);
|
||||||
|
|
||||||
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
|
if (ctrl->ctrl.state != NVME_CTRL_CONNECTING &&
|
||||||
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
|
!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
|
||||||
dev_err(ctrl->ctrl.device,
|
dev_err(ctrl->ctrl.device,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user