scsi: core: Ensure that the SCSI error handler gets woken up
If scsi_eh_scmd_add() is called concurrently with
scsi_host_queue_ready() while shost->host_blocked > 0 then it can
happen that neither function wakes up the SCSI error handler. Fix
this by making every function that decreases the host_busy counter
wake up the error handler if necessary and by protecting the
host_failed checks with the SCSI host lock.
Reported-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
References: https://marc.info/?l=linux-kernel&m=150461610630736
Fixes: commit 7466501608
("scsi: convert host_busy to atomic_t")
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Tested-by: Stuart Hayes <stuart.w.hayes@gmail.com>
Cc: Konstantin Khorenko <khorenko@virtuozzo.com>
Cc: Stuart Hayes <stuart.w.hayes@gmail.com>
Cc: Pavel Tikhomirov <ptikhomirov@virtuozzo.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
This commit is contained in:
parent
156baec397
commit
3bd6f43f5c
@ -318,6 +318,9 @@ static void scsi_host_dev_release(struct device *dev)
|
|||||||
|
|
||||||
scsi_proc_hostdir_rm(shost->hostt);
|
scsi_proc_hostdir_rm(shost->hostt);
|
||||||
|
|
||||||
|
/* Wait for functions invoked through call_rcu(&shost->rcu, ...) */
|
||||||
|
rcu_barrier();
|
||||||
|
|
||||||
if (shost->tmf_work_q)
|
if (shost->tmf_work_q)
|
||||||
destroy_workqueue(shost->tmf_work_q);
|
destroy_workqueue(shost->tmf_work_q);
|
||||||
if (shost->ehandler)
|
if (shost->ehandler)
|
||||||
@ -325,6 +328,8 @@ static void scsi_host_dev_release(struct device *dev)
|
|||||||
if (shost->work_q)
|
if (shost->work_q)
|
||||||
destroy_workqueue(shost->work_q);
|
destroy_workqueue(shost->work_q);
|
||||||
|
|
||||||
|
destroy_rcu_head(&shost->rcu);
|
||||||
|
|
||||||
if (shost->shost_state == SHOST_CREATED) {
|
if (shost->shost_state == SHOST_CREATED) {
|
||||||
/*
|
/*
|
||||||
* Free the shost_dev device name here if scsi_host_alloc()
|
* Free the shost_dev device name here if scsi_host_alloc()
|
||||||
@ -399,6 +404,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
|
|||||||
INIT_LIST_HEAD(&shost->starved_list);
|
INIT_LIST_HEAD(&shost->starved_list);
|
||||||
init_waitqueue_head(&shost->host_wait);
|
init_waitqueue_head(&shost->host_wait);
|
||||||
mutex_init(&shost->scan_mutex);
|
mutex_init(&shost->scan_mutex);
|
||||||
|
init_rcu_head(&shost->rcu);
|
||||||
|
|
||||||
index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
|
index = ida_simple_get(&host_index_ida, 0, 0, GFP_KERNEL);
|
||||||
if (index < 0)
|
if (index < 0)
|
||||||
|
@ -220,6 +220,17 @@ static void scsi_eh_reset(struct scsi_cmnd *scmd)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void scsi_eh_inc_host_failed(struct rcu_head *head)
|
||||||
|
{
|
||||||
|
struct Scsi_Host *shost = container_of(head, typeof(*shost), rcu);
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
spin_lock_irqsave(shost->host_lock, flags);
|
||||||
|
shost->host_failed++;
|
||||||
|
scsi_eh_wakeup(shost);
|
||||||
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* scsi_eh_scmd_add - add scsi cmd to error handling.
|
* scsi_eh_scmd_add - add scsi cmd to error handling.
|
||||||
* @scmd: scmd to run eh on.
|
* @scmd: scmd to run eh on.
|
||||||
@ -242,9 +253,12 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
|
|||||||
|
|
||||||
scsi_eh_reset(scmd);
|
scsi_eh_reset(scmd);
|
||||||
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
|
list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
|
||||||
shost->host_failed++;
|
|
||||||
scsi_eh_wakeup(shost);
|
|
||||||
spin_unlock_irqrestore(shost->host_lock, flags);
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
/*
|
||||||
|
* Ensure that all tasks observe the host state change before the
|
||||||
|
* host_failed change.
|
||||||
|
*/
|
||||||
|
call_rcu(&shost->rcu, scsi_eh_inc_host_failed);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -318,23 +318,40 @@ static void scsi_init_cmd_errh(struct scsi_cmnd *cmd)
|
|||||||
cmd->cmd_len = scsi_command_size(cmd->cmnd);
|
cmd->cmd_len = scsi_command_size(cmd->cmnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Decrement the host_busy counter and wake up the error handler if necessary.
|
||||||
|
* Avoid as follows that the error handler is not woken up if shost->host_busy
|
||||||
|
* == shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination
|
||||||
|
* with an RCU read lock in this function to ensure that this function in its
|
||||||
|
* entirety either finishes before scsi_eh_scmd_add() increases the
|
||||||
|
* host_failed counter or that it notices the shost state change made by
|
||||||
|
* scsi_eh_scmd_add().
|
||||||
|
*/
|
||||||
|
static void scsi_dec_host_busy(struct Scsi_Host *shost)
|
||||||
|
{
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
atomic_dec(&shost->host_busy);
|
||||||
|
if (unlikely(scsi_host_in_recovery(shost))) {
|
||||||
|
spin_lock_irqsave(shost->host_lock, flags);
|
||||||
|
if (shost->host_failed || shost->host_eh_scheduled)
|
||||||
|
scsi_eh_wakeup(shost);
|
||||||
|
spin_unlock_irqrestore(shost->host_lock, flags);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
}
|
||||||
|
|
||||||
void scsi_device_unbusy(struct scsi_device *sdev)
|
void scsi_device_unbusy(struct scsi_device *sdev)
|
||||||
{
|
{
|
||||||
struct Scsi_Host *shost = sdev->host;
|
struct Scsi_Host *shost = sdev->host;
|
||||||
struct scsi_target *starget = scsi_target(sdev);
|
struct scsi_target *starget = scsi_target(sdev);
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
atomic_dec(&shost->host_busy);
|
scsi_dec_host_busy(shost);
|
||||||
|
|
||||||
if (starget->can_queue > 0)
|
if (starget->can_queue > 0)
|
||||||
atomic_dec(&starget->target_busy);
|
atomic_dec(&starget->target_busy);
|
||||||
|
|
||||||
if (unlikely(scsi_host_in_recovery(shost) &&
|
|
||||||
(shost->host_failed || shost->host_eh_scheduled))) {
|
|
||||||
spin_lock_irqsave(shost->host_lock, flags);
|
|
||||||
scsi_eh_wakeup(shost);
|
|
||||||
spin_unlock_irqrestore(shost->host_lock, flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
atomic_dec(&sdev->device_busy);
|
atomic_dec(&sdev->device_busy);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1532,7 +1549,7 @@ starved:
|
|||||||
list_add_tail(&sdev->starved_entry, &shost->starved_list);
|
list_add_tail(&sdev->starved_entry, &shost->starved_list);
|
||||||
spin_unlock_irq(shost->host_lock);
|
spin_unlock_irq(shost->host_lock);
|
||||||
out_dec:
|
out_dec:
|
||||||
atomic_dec(&shost->host_busy);
|
scsi_dec_host_busy(shost);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2018,7 +2035,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|||||||
return BLK_STS_OK;
|
return BLK_STS_OK;
|
||||||
|
|
||||||
out_dec_host_busy:
|
out_dec_host_busy:
|
||||||
atomic_dec(&shost->host_busy);
|
scsi_dec_host_busy(shost);
|
||||||
out_dec_target_busy:
|
out_dec_target_busy:
|
||||||
if (scsi_target(sdev)->can_queue > 0)
|
if (scsi_target(sdev)->can_queue > 0)
|
||||||
atomic_dec(&scsi_target(sdev)->target_busy);
|
atomic_dec(&scsi_target(sdev)->target_busy);
|
||||||
|
@ -571,6 +571,8 @@ struct Scsi_Host {
|
|||||||
struct blk_mq_tag_set tag_set;
|
struct blk_mq_tag_set tag_set;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct rcu_head rcu;
|
||||||
|
|
||||||
atomic_t host_busy; /* commands actually active on low-level */
|
atomic_t host_busy; /* commands actually active on low-level */
|
||||||
atomic_t host_blocked;
|
atomic_t host_blocked;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user