NVMe: Failed controller initialization fixes
This fixes an infinite device reset loop that may occur on devices that fail initialization. If the drive fails to become ready for any reason that does not involve an admin command timeout, the probe task should assume the drive is unavailable and remove it from the topology. In the case an admin command times out during device probing, the driver's existing reset action will handle removing the drive. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
ffe7704d59
commit
de3eff2bad
@ -2931,6 +2931,18 @@ static int nvme_dev_resume(struct nvme_dev *dev)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void nvme_dead_ctrl(struct nvme_dev *dev)
|
||||||
|
{
|
||||||
|
dev_warn(dev->dev, "Device failed to resume\n");
|
||||||
|
kref_get(&dev->kref);
|
||||||
|
if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
|
||||||
|
dev->instance))) {
|
||||||
|
dev_err(dev->dev,
|
||||||
|
"Failed to start controller remove task\n");
|
||||||
|
kref_put(&dev->kref, nvme_free_dev);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void nvme_dev_reset(struct nvme_dev *dev)
|
static void nvme_dev_reset(struct nvme_dev *dev)
|
||||||
{
|
{
|
||||||
bool in_probe = work_busy(&dev->probe_work);
|
bool in_probe = work_busy(&dev->probe_work);
|
||||||
@ -2944,14 +2956,7 @@ static void nvme_dev_reset(struct nvme_dev *dev)
|
|||||||
/* Fail this device if reset occured during probe to avoid
|
/* Fail this device if reset occured during probe to avoid
|
||||||
* infinite initialization loops. */
|
* infinite initialization loops. */
|
||||||
if (in_probe) {
|
if (in_probe) {
|
||||||
dev_warn(dev->dev, "Device failed to resume\n");
|
nvme_dead_ctrl(dev);
|
||||||
kref_get(&dev->kref);
|
|
||||||
if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
|
|
||||||
dev->instance))) {
|
|
||||||
dev_err(dev->dev,
|
|
||||||
"Failed to start controller remove task\n");
|
|
||||||
kref_put(&dev->kref, nvme_free_dev);
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
/* Schedule device resume asynchronously so the reset work is available
|
/* Schedule device resume asynchronously so the reset work is available
|
||||||
@ -3086,16 +3091,8 @@ static void nvme_async_probe(struct work_struct *work)
|
|||||||
{
|
{
|
||||||
struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
|
struct nvme_dev *dev = container_of(work, struct nvme_dev, probe_work);
|
||||||
|
|
||||||
if (nvme_dev_resume(dev))
|
if (nvme_dev_resume(dev) && !work_busy(&dev->reset_work))
|
||||||
goto reset;
|
nvme_dead_ctrl(dev);
|
||||||
return;
|
|
||||||
reset:
|
|
||||||
spin_lock(&dev_list_lock);
|
|
||||||
if (!work_busy(&dev->reset_work)) {
|
|
||||||
dev->reset_workfn = nvme_reset_failed_dev;
|
|
||||||
queue_work(nvme_workq, &dev->reset_work);
|
|
||||||
}
|
|
||||||
spin_unlock(&dev_list_lock);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
|
static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user