Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer fixes from Jens Axboe:
 "The major part is an update to the NVMe driver, fixing various issues
  around surprise removal and hung controllers.  Most of that is from
  Keith, and parts are simple blk-mq fixes or exports/additions of minor
  functions to aid this effort, and parts are changes directly to the
  NVMe driver.

  Apart from the above, this contains:

   - Small blk-mq change from me, killing an unused member of the
     hardware queue structure.

   - Small fix from Ming Lei, fixing up a few drivers that didn't
     properly check for ERR_PTR() returns from blk_mq_init_queue()"

* 'for-linus' of git://git.kernel.dk/linux-block:
  NVMe: Fix locking on abort handling
  NVMe: Start and stop h/w queues on reset
  NVMe: Command abort handling fixes
  NVMe: Admin queue removal handling
  NVMe: Reference count admin queue usage
  NVMe: Start all requests
  blk-mq: End unstarted requests on a dying queue
  blk-mq: Allow requests to never expire
  blk-mq: Add helper to abort requeued requests
  blk-mq: Let drivers cancel requeue_work
  blk-mq: Export if requests were started
  blk-mq: Wake tasks entering queue on dying
  blk-mq: get rid of ->cmd_size in the hardware queue
  block: fix checking return value of blk_mq_init_queue
  block: wake up waiters when a queue is marked dying
  NVMe: Fix double free irq
  blk-mq: Export freeze/unfreeze functions
  blk-mq: Exit queue on alloc failure
This commit is contained in:
Linus Torvalds 2015-01-15 10:27:56 +13:00
commit 31238e61b5
11 changed files with 240 additions and 64 deletions

View File

@ -473,6 +473,25 @@ void blk_queue_bypass_end(struct request_queue *q)
} }
EXPORT_SYMBOL_GPL(blk_queue_bypass_end); EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
void blk_set_queue_dying(struct request_queue *q)
{
queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
if (q->mq_ops)
blk_mq_wake_waiters(q);
else {
struct request_list *rl;
blk_queue_for_each_rl(rl, q) {
if (rl->rq_pool) {
wake_up(&rl->wait[BLK_RW_SYNC]);
wake_up(&rl->wait[BLK_RW_ASYNC]);
}
}
}
}
EXPORT_SYMBOL_GPL(blk_set_queue_dying);
/** /**
* blk_cleanup_queue - shutdown a request queue * blk_cleanup_queue - shutdown a request queue
* @q: request queue to shutdown * @q: request queue to shutdown
@ -486,7 +505,7 @@ void blk_cleanup_queue(struct request_queue *q)
/* mark @q DYING, no new request or merges will be allowed afterwards */ /* mark @q DYING, no new request or merges will be allowed afterwards */
mutex_lock(&q->sysfs_lock); mutex_lock(&q->sysfs_lock);
queue_flag_set_unlocked(QUEUE_FLAG_DYING, q); blk_set_queue_dying(q);
spin_lock_irq(lock); spin_lock_irq(lock);
/* /*

View File

@ -68,9 +68,9 @@ bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
} }
/* /*
* Wakeup all potentially sleeping on normal (non-reserved) tags * Wakeup all potentially sleeping on tags
*/ */
static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
{ {
struct blk_mq_bitmap_tags *bt; struct blk_mq_bitmap_tags *bt;
int i, wake_index; int i, wake_index;
@ -85,6 +85,12 @@ static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags)
wake_index = bt_index_inc(wake_index); wake_index = bt_index_inc(wake_index);
} }
if (include_reserve) {
bt = &tags->breserved_tags;
if (waitqueue_active(&bt->bs[0].wait))
wake_up(&bt->bs[0].wait);
}
} }
/* /*
@ -100,7 +106,7 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
atomic_dec(&tags->active_queues); atomic_dec(&tags->active_queues);
blk_mq_tag_wakeup_all(tags); blk_mq_tag_wakeup_all(tags, false);
} }
/* /*
@ -584,7 +590,7 @@ int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
* static and should never need resizing. * static and should never need resizing.
*/ */
bt_update_count(&tags->bitmap_tags, tdepth); bt_update_count(&tags->bitmap_tags, tdepth);
blk_mq_tag_wakeup_all(tags); blk_mq_tag_wakeup_all(tags, false);
return 0; return 0;
} }

View File

@ -54,6 +54,7 @@ extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags);
extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page);
extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag);
extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth);
extern void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool);
enum { enum {
BLK_MQ_TAG_CACHE_MIN = 1, BLK_MQ_TAG_CACHE_MIN = 1,

View File

@ -107,7 +107,7 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
wake_up_all(&q->mq_freeze_wq); wake_up_all(&q->mq_freeze_wq);
} }
static void blk_mq_freeze_queue_start(struct request_queue *q) void blk_mq_freeze_queue_start(struct request_queue *q)
{ {
bool freeze; bool freeze;
@ -120,6 +120,7 @@ static void blk_mq_freeze_queue_start(struct request_queue *q)
blk_mq_run_queues(q, false); blk_mq_run_queues(q, false);
} }
} }
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
static void blk_mq_freeze_queue_wait(struct request_queue *q) static void blk_mq_freeze_queue_wait(struct request_queue *q)
{ {
@ -136,7 +137,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
blk_mq_freeze_queue_wait(q); blk_mq_freeze_queue_wait(q);
} }
static void blk_mq_unfreeze_queue(struct request_queue *q) void blk_mq_unfreeze_queue(struct request_queue *q)
{ {
bool wake; bool wake;
@ -149,6 +150,24 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
wake_up_all(&q->mq_freeze_wq); wake_up_all(&q->mq_freeze_wq);
} }
} }
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
void blk_mq_wake_waiters(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
queue_for_each_hw_ctx(q, hctx, i)
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_wakeup_all(hctx->tags, true);
/*
* If we are called because the queue has now been marked as
* dying, we need to ensure that processes currently waiting on
* the queue are notified as well.
*/
wake_up_all(&q->mq_freeze_wq);
}
bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
{ {
@ -258,8 +277,10 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp,
ctx = alloc_data.ctx; ctx = alloc_data.ctx;
} }
blk_mq_put_ctx(ctx); blk_mq_put_ctx(ctx);
if (!rq) if (!rq) {
blk_mq_queue_exit(q);
return ERR_PTR(-EWOULDBLOCK); return ERR_PTR(-EWOULDBLOCK);
}
return rq; return rq;
} }
EXPORT_SYMBOL(blk_mq_alloc_request); EXPORT_SYMBOL(blk_mq_alloc_request);
@ -383,6 +404,12 @@ void blk_mq_complete_request(struct request *rq)
} }
EXPORT_SYMBOL(blk_mq_complete_request); EXPORT_SYMBOL(blk_mq_complete_request);
int blk_mq_request_started(struct request *rq)
{
return test_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
}
EXPORT_SYMBOL_GPL(blk_mq_request_started);
void blk_mq_start_request(struct request *rq) void blk_mq_start_request(struct request *rq)
{ {
struct request_queue *q = rq->q; struct request_queue *q = rq->q;
@ -500,12 +527,38 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head)
} }
EXPORT_SYMBOL(blk_mq_add_to_requeue_list); EXPORT_SYMBOL(blk_mq_add_to_requeue_list);
void blk_mq_cancel_requeue_work(struct request_queue *q)
{
cancel_work_sync(&q->requeue_work);
}
EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
void blk_mq_kick_requeue_list(struct request_queue *q) void blk_mq_kick_requeue_list(struct request_queue *q)
{ {
kblockd_schedule_work(&q->requeue_work); kblockd_schedule_work(&q->requeue_work);
} }
EXPORT_SYMBOL(blk_mq_kick_requeue_list); EXPORT_SYMBOL(blk_mq_kick_requeue_list);
void blk_mq_abort_requeue_list(struct request_queue *q)
{
unsigned long flags;
LIST_HEAD(rq_list);
spin_lock_irqsave(&q->requeue_lock, flags);
list_splice_init(&q->requeue_list, &rq_list);
spin_unlock_irqrestore(&q->requeue_lock, flags);
while (!list_empty(&rq_list)) {
struct request *rq;
rq = list_first_entry(&rq_list, struct request, queuelist);
list_del_init(&rq->queuelist);
rq->errors = -EIO;
blk_mq_end_request(rq, rq->errors);
}
}
EXPORT_SYMBOL(blk_mq_abort_requeue_list);
static inline bool is_flush_request(struct request *rq, static inline bool is_flush_request(struct request *rq,
struct blk_flush_queue *fq, unsigned int tag) struct blk_flush_queue *fq, unsigned int tag)
{ {
@ -566,13 +619,24 @@ void blk_mq_rq_timed_out(struct request *req, bool reserved)
break; break;
} }
} }
static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved) struct request *rq, void *priv, bool reserved)
{ {
struct blk_mq_timeout_data *data = priv; struct blk_mq_timeout_data *data = priv;
if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
/*
* If a request wasn't started before the queue was
* marked dying, kill it here or it'll go unnoticed.
*/
if (unlikely(blk_queue_dying(rq->q))) {
rq->errors = -EIO;
blk_mq_complete_request(rq);
}
return;
}
if (rq->cmd_flags & REQ_NO_TIMEOUT)
return; return;
if (time_after_eq(jiffies, rq->deadline)) { if (time_after_eq(jiffies, rq->deadline)) {
@ -1601,7 +1665,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
hctx->queue = q; hctx->queue = q;
hctx->queue_num = hctx_idx; hctx->queue_num = hctx_idx;
hctx->flags = set->flags; hctx->flags = set->flags;
hctx->cmd_size = set->cmd_size;
blk_mq_init_cpu_notifier(&hctx->cpu_notifier, blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
blk_mq_hctx_notify, hctx); blk_mq_hctx_notify, hctx);

View File

@ -32,6 +32,7 @@ void blk_mq_free_queue(struct request_queue *q);
void blk_mq_clone_flush_request(struct request *flush_rq, void blk_mq_clone_flush_request(struct request *flush_rq,
struct request *orig_rq); struct request *orig_rq);
int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
void blk_mq_wake_waiters(struct request_queue *q);
/* /*
* CPU hotplug helpers * CPU hotplug helpers

View File

@ -190,6 +190,9 @@ void blk_add_timer(struct request *req)
struct request_queue *q = req->q; struct request_queue *q = req->q;
unsigned long expiry; unsigned long expiry;
if (req->cmd_flags & REQ_NO_TIMEOUT)
return;
/* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */ /* blk-mq has its own handler, so we don't need ->rq_timed_out_fn */
if (!q->mq_ops && !q->rq_timed_out_fn) if (!q->mq_ops && !q->rq_timed_out_fn)
return; return;

View File

@ -530,7 +530,7 @@ static int null_add_dev(void)
goto out_cleanup_queues; goto out_cleanup_queues;
nullb->q = blk_mq_init_queue(&nullb->tag_set); nullb->q = blk_mq_init_queue(&nullb->tag_set);
if (!nullb->q) { if (IS_ERR(nullb->q)) {
rv = -ENOMEM; rv = -ENOMEM;
goto out_cleanup_tags; goto out_cleanup_tags;
} }

View File

@ -215,6 +215,7 @@ static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx,
cmd->fn = handler; cmd->fn = handler;
cmd->ctx = ctx; cmd->ctx = ctx;
cmd->aborted = 0; cmd->aborted = 0;
blk_mq_start_request(blk_mq_rq_from_pdu(cmd));
} }
/* Special values must be less than 0x1000 */ /* Special values must be less than 0x1000 */
@ -431,8 +432,13 @@ static void req_completion(struct nvme_queue *nvmeq, void *ctx,
if (unlikely(status)) { if (unlikely(status)) {
if (!(status & NVME_SC_DNR || blk_noretry_request(req)) if (!(status & NVME_SC_DNR || blk_noretry_request(req))
&& (jiffies - req->start_time) < req->timeout) { && (jiffies - req->start_time) < req->timeout) {
unsigned long flags;
blk_mq_requeue_request(req); blk_mq_requeue_request(req);
blk_mq_kick_requeue_list(req->q); spin_lock_irqsave(req->q->queue_lock, flags);
if (!blk_queue_stopped(req->q))
blk_mq_kick_requeue_list(req->q);
spin_unlock_irqrestore(req->q->queue_lock, flags);
return; return;
} }
req->errors = nvme_error_status(status); req->errors = nvme_error_status(status);
@ -664,8 +670,6 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
} }
} }
blk_mq_start_request(req);
nvme_set_info(cmd, iod, req_completion); nvme_set_info(cmd, iod, req_completion);
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
if (req->cmd_flags & REQ_DISCARD) if (req->cmd_flags & REQ_DISCARD)
@ -835,6 +839,7 @@ static int nvme_submit_async_admin_req(struct nvme_dev *dev)
if (IS_ERR(req)) if (IS_ERR(req))
return PTR_ERR(req); return PTR_ERR(req);
req->cmd_flags |= REQ_NO_TIMEOUT;
cmd_info = blk_mq_rq_to_pdu(req); cmd_info = blk_mq_rq_to_pdu(req);
nvme_set_info(cmd_info, req, async_req_completion); nvme_set_info(cmd_info, req, async_req_completion);
@ -1016,14 +1021,19 @@ static void nvme_abort_req(struct request *req)
struct nvme_command cmd; struct nvme_command cmd;
if (!nvmeq->qid || cmd_rq->aborted) { if (!nvmeq->qid || cmd_rq->aborted) {
unsigned long flags;
spin_lock_irqsave(&dev_list_lock, flags);
if (work_busy(&dev->reset_work)) if (work_busy(&dev->reset_work))
return; goto out;
list_del_init(&dev->node); list_del_init(&dev->node);
dev_warn(&dev->pci_dev->dev, dev_warn(&dev->pci_dev->dev,
"I/O %d QID %d timeout, reset controller\n", "I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid); req->tag, nvmeq->qid);
dev->reset_workfn = nvme_reset_failed_dev; dev->reset_workfn = nvme_reset_failed_dev;
queue_work(nvme_workq, &dev->reset_work); queue_work(nvme_workq, &dev->reset_work);
out:
spin_unlock_irqrestore(&dev_list_lock, flags);
return; return;
} }
@ -1064,15 +1074,22 @@ static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
void *ctx; void *ctx;
nvme_completion_fn fn; nvme_completion_fn fn;
struct nvme_cmd_info *cmd; struct nvme_cmd_info *cmd;
static struct nvme_completion cqe = { struct nvme_completion cqe;
.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
}; if (!blk_mq_request_started(req))
return;
cmd = blk_mq_rq_to_pdu(req); cmd = blk_mq_rq_to_pdu(req);
if (cmd->ctx == CMD_CTX_CANCELLED) if (cmd->ctx == CMD_CTX_CANCELLED)
return; return;
if (blk_queue_dying(req->q))
cqe.status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1);
else
cqe.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1);
dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
req->tag, nvmeq->qid); req->tag, nvmeq->qid);
ctx = cancel_cmd_info(cmd, &fn); ctx = cancel_cmd_info(cmd, &fn);
@ -1084,17 +1101,29 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req); struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
struct nvme_queue *nvmeq = cmd->nvmeq; struct nvme_queue *nvmeq = cmd->nvmeq;
dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
nvmeq->qid);
if (nvmeq->dev->initialized)
nvme_abort_req(req);
/* /*
* The aborted req will be completed on receiving the abort req. * The aborted req will be completed on receiving the abort req.
* We enable the timer again. If hit twice, it'll cause a device reset, * We enable the timer again. If hit twice, it'll cause a device reset,
* as the device then is in a faulty state. * as the device then is in a faulty state.
*/ */
return BLK_EH_RESET_TIMER; int ret = BLK_EH_RESET_TIMER;
dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
nvmeq->qid);
spin_lock_irq(&nvmeq->q_lock);
if (!nvmeq->dev->initialized) {
/*
* Force cancelled command frees the request, which requires we
* return BLK_EH_NOT_HANDLED.
*/
nvme_cancel_queue_ios(nvmeq->hctx, req, nvmeq, reserved);
ret = BLK_EH_NOT_HANDLED;
} else
nvme_abort_req(req);
spin_unlock_irq(&nvmeq->q_lock);
return ret;
} }
static void nvme_free_queue(struct nvme_queue *nvmeq) static void nvme_free_queue(struct nvme_queue *nvmeq)
@ -1131,10 +1160,16 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
*/ */
static int nvme_suspend_queue(struct nvme_queue *nvmeq) static int nvme_suspend_queue(struct nvme_queue *nvmeq)
{ {
int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; int vector;
spin_lock_irq(&nvmeq->q_lock); spin_lock_irq(&nvmeq->q_lock);
if (nvmeq->cq_vector == -1) {
spin_unlock_irq(&nvmeq->q_lock);
return 1;
}
vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
nvmeq->dev->online_queues--; nvmeq->dev->online_queues--;
nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock); spin_unlock_irq(&nvmeq->q_lock);
irq_set_affinity_hint(vector, NULL); irq_set_affinity_hint(vector, NULL);
@ -1169,11 +1204,13 @@ static void nvme_disable_queue(struct nvme_dev *dev, int qid)
adapter_delete_sq(dev, qid); adapter_delete_sq(dev, qid);
adapter_delete_cq(dev, qid); adapter_delete_cq(dev, qid);
} }
if (!qid && dev->admin_q)
blk_mq_freeze_queue_start(dev->admin_q);
nvme_clear_queue(nvmeq); nvme_clear_queue(nvmeq);
} }
static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
int depth, int vector) int depth)
{ {
struct device *dmadev = &dev->pci_dev->dev; struct device *dmadev = &dev->pci_dev->dev;
struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL); struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
@ -1199,7 +1236,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
nvmeq->cq_phase = 1; nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
nvmeq->q_depth = depth; nvmeq->q_depth = depth;
nvmeq->cq_vector = vector;
nvmeq->qid = qid; nvmeq->qid = qid;
dev->queue_count++; dev->queue_count++;
dev->queues[qid] = nvmeq; dev->queues[qid] = nvmeq;
@ -1244,6 +1280,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
struct nvme_dev *dev = nvmeq->dev; struct nvme_dev *dev = nvmeq->dev;
int result; int result;
nvmeq->cq_vector = qid - 1;
result = adapter_alloc_cq(dev, qid, nvmeq); result = adapter_alloc_cq(dev, qid, nvmeq);
if (result < 0) if (result < 0)
return result; return result;
@ -1355,6 +1392,14 @@ static struct blk_mq_ops nvme_mq_ops = {
.timeout = nvme_timeout, .timeout = nvme_timeout,
}; };
static void nvme_dev_remove_admin(struct nvme_dev *dev)
{
if (dev->admin_q && !blk_queue_dying(dev->admin_q)) {
blk_cleanup_queue(dev->admin_q);
blk_mq_free_tag_set(&dev->admin_tagset);
}
}
static int nvme_alloc_admin_tags(struct nvme_dev *dev) static int nvme_alloc_admin_tags(struct nvme_dev *dev)
{ {
if (!dev->admin_q) { if (!dev->admin_q) {
@ -1370,21 +1415,20 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
return -ENOMEM; return -ENOMEM;
dev->admin_q = blk_mq_init_queue(&dev->admin_tagset); dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
if (!dev->admin_q) { if (IS_ERR(dev->admin_q)) {
blk_mq_free_tag_set(&dev->admin_tagset); blk_mq_free_tag_set(&dev->admin_tagset);
return -ENOMEM; return -ENOMEM;
} }
} if (!blk_get_queue(dev->admin_q)) {
nvme_dev_remove_admin(dev);
return -ENODEV;
}
} else
blk_mq_unfreeze_queue(dev->admin_q);
return 0; return 0;
} }
static void nvme_free_admin_tags(struct nvme_dev *dev)
{
if (dev->admin_q)
blk_mq_free_tag_set(&dev->admin_tagset);
}
static int nvme_configure_admin_queue(struct nvme_dev *dev) static int nvme_configure_admin_queue(struct nvme_dev *dev)
{ {
int result; int result;
@ -1416,7 +1460,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
nvmeq = dev->queues[0]; nvmeq = dev->queues[0];
if (!nvmeq) { if (!nvmeq) {
nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, 0); nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
if (!nvmeq) if (!nvmeq)
return -ENOMEM; return -ENOMEM;
} }
@ -1439,18 +1483,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
if (result) if (result)
goto free_nvmeq; goto free_nvmeq;
result = nvme_alloc_admin_tags(dev); nvmeq->cq_vector = 0;
result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
if (result) if (result)
goto free_nvmeq; goto free_nvmeq;
result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
if (result)
goto free_tags;
return result; return result;
free_tags:
nvme_free_admin_tags(dev);
free_nvmeq: free_nvmeq:
nvme_free_queues(dev, 0); nvme_free_queues(dev, 0);
return result; return result;
@ -1944,7 +1983,7 @@ static void nvme_create_io_queues(struct nvme_dev *dev)
unsigned i; unsigned i;
for (i = dev->queue_count; i <= dev->max_qid; i++) for (i = dev->queue_count; i <= dev->max_qid; i++)
if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1)) if (!nvme_alloc_queue(dev, i, dev->q_depth))
break; break;
for (i = dev->online_queues; i <= dev->queue_count - 1; i++) for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
@ -2235,13 +2274,18 @@ static void nvme_wait_dq(struct nvme_delq_ctx *dq, struct nvme_dev *dev)
break; break;
if (!schedule_timeout(ADMIN_TIMEOUT) || if (!schedule_timeout(ADMIN_TIMEOUT) ||
fatal_signal_pending(current)) { fatal_signal_pending(current)) {
/*
* Disable the controller first since we can't trust it
* at this point, but leave the admin queue enabled
* until all queue deletion requests are flushed.
* FIXME: This may take a while if there are more h/w
* queues than admin tags.
*/
set_current_state(TASK_RUNNING); set_current_state(TASK_RUNNING);
nvme_disable_ctrl(dev, readq(&dev->bar->cap)); nvme_disable_ctrl(dev, readq(&dev->bar->cap));
nvme_disable_queue(dev, 0); nvme_clear_queue(dev->queues[0]);
send_sig(SIGKILL, dq->worker->task, 1);
flush_kthread_worker(dq->worker); flush_kthread_worker(dq->worker);
nvme_disable_queue(dev, 0);
return; return;
} }
} }
@ -2318,7 +2362,6 @@ static void nvme_del_queue_start(struct kthread_work *work)
{ {
struct nvme_queue *nvmeq = container_of(work, struct nvme_queue, struct nvme_queue *nvmeq = container_of(work, struct nvme_queue,
cmdinfo.work); cmdinfo.work);
allow_signal(SIGKILL);
if (nvme_delete_sq(nvmeq)) if (nvme_delete_sq(nvmeq))
nvme_del_queue_end(nvmeq); nvme_del_queue_end(nvmeq);
} }
@ -2376,6 +2419,34 @@ static void nvme_dev_list_remove(struct nvme_dev *dev)
kthread_stop(tmp); kthread_stop(tmp);
} }
static void nvme_freeze_queues(struct nvme_dev *dev)
{
struct nvme_ns *ns;
list_for_each_entry(ns, &dev->namespaces, list) {
blk_mq_freeze_queue_start(ns->queue);
spin_lock(ns->queue->queue_lock);
queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
spin_unlock(ns->queue->queue_lock);
blk_mq_cancel_requeue_work(ns->queue);
blk_mq_stop_hw_queues(ns->queue);
}
}
static void nvme_unfreeze_queues(struct nvme_dev *dev)
{
struct nvme_ns *ns;
list_for_each_entry(ns, &dev->namespaces, list) {
queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, ns->queue);
blk_mq_unfreeze_queue(ns->queue);
blk_mq_start_stopped_hw_queues(ns->queue, true);
blk_mq_kick_requeue_list(ns->queue);
}
}
static void nvme_dev_shutdown(struct nvme_dev *dev) static void nvme_dev_shutdown(struct nvme_dev *dev)
{ {
int i; int i;
@ -2384,8 +2455,10 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
dev->initialized = 0; dev->initialized = 0;
nvme_dev_list_remove(dev); nvme_dev_list_remove(dev);
if (dev->bar) if (dev->bar) {
nvme_freeze_queues(dev);
csts = readl(&dev->bar->csts); csts = readl(&dev->bar->csts);
}
if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) { if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
for (i = dev->queue_count - 1; i >= 0; i--) { for (i = dev->queue_count - 1; i >= 0; i--) {
struct nvme_queue *nvmeq = dev->queues[i]; struct nvme_queue *nvmeq = dev->queues[i];
@ -2400,12 +2473,6 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
nvme_dev_unmap(dev); nvme_dev_unmap(dev);
} }
static void nvme_dev_remove_admin(struct nvme_dev *dev)
{
if (dev->admin_q && !blk_queue_dying(dev->admin_q))
blk_cleanup_queue(dev->admin_q);
}
static void nvme_dev_remove(struct nvme_dev *dev) static void nvme_dev_remove(struct nvme_dev *dev)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
@ -2413,8 +2480,10 @@ static void nvme_dev_remove(struct nvme_dev *dev)
list_for_each_entry(ns, &dev->namespaces, list) { list_for_each_entry(ns, &dev->namespaces, list) {
if (ns->disk->flags & GENHD_FL_UP) if (ns->disk->flags & GENHD_FL_UP)
del_gendisk(ns->disk); del_gendisk(ns->disk);
if (!blk_queue_dying(ns->queue)) if (!blk_queue_dying(ns->queue)) {
blk_mq_abort_requeue_list(ns->queue);
blk_cleanup_queue(ns->queue); blk_cleanup_queue(ns->queue);
}
} }
} }
@ -2495,6 +2564,7 @@ static void nvme_free_dev(struct kref *kref)
nvme_free_namespaces(dev); nvme_free_namespaces(dev);
nvme_release_instance(dev); nvme_release_instance(dev);
blk_mq_free_tag_set(&dev->tagset); blk_mq_free_tag_set(&dev->tagset);
blk_put_queue(dev->admin_q);
kfree(dev->queues); kfree(dev->queues);
kfree(dev->entry); kfree(dev->entry);
kfree(dev); kfree(dev);
@ -2591,15 +2661,20 @@ static int nvme_dev_start(struct nvme_dev *dev)
} }
nvme_init_queue(dev->queues[0], 0); nvme_init_queue(dev->queues[0], 0);
result = nvme_alloc_admin_tags(dev);
if (result)
goto disable;
result = nvme_setup_io_queues(dev); result = nvme_setup_io_queues(dev);
if (result) if (result)
goto disable; goto free_tags;
nvme_set_irq_hints(dev); nvme_set_irq_hints(dev);
return result; return result;
free_tags:
nvme_dev_remove_admin(dev);
disable: disable:
nvme_disable_queue(dev, 0); nvme_disable_queue(dev, 0);
nvme_dev_list_remove(dev); nvme_dev_list_remove(dev);
@ -2639,6 +2714,9 @@ static int nvme_dev_resume(struct nvme_dev *dev)
dev->reset_workfn = nvme_remove_disks; dev->reset_workfn = nvme_remove_disks;
queue_work(nvme_workq, &dev->reset_work); queue_work(nvme_workq, &dev->reset_work);
spin_unlock(&dev_list_lock); spin_unlock(&dev_list_lock);
} else {
nvme_unfreeze_queues(dev);
nvme_set_irq_hints(dev);
} }
dev->initialized = 1; dev->initialized = 1;
return 0; return 0;
@ -2776,11 +2854,10 @@ static void nvme_remove(struct pci_dev *pdev)
pci_set_drvdata(pdev, NULL); pci_set_drvdata(pdev, NULL);
flush_work(&dev->reset_work); flush_work(&dev->reset_work);
misc_deregister(&dev->miscdev); misc_deregister(&dev->miscdev);
nvme_dev_remove(dev);
nvme_dev_shutdown(dev); nvme_dev_shutdown(dev);
nvme_dev_remove(dev);
nvme_dev_remove_admin(dev); nvme_dev_remove_admin(dev);
nvme_free_queues(dev, 0); nvme_free_queues(dev, 0);
nvme_free_admin_tags(dev);
nvme_release_prp_pools(dev); nvme_release_prp_pools(dev);
kref_put(&dev->kref, nvme_free_dev); kref_put(&dev->kref, nvme_free_dev);
} }

View File

@ -638,7 +638,7 @@ static int virtblk_probe(struct virtio_device *vdev)
goto out_put_disk; goto out_put_disk;
q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set);
if (!q) { if (IS_ERR(q)) {
err = -ENOMEM; err = -ENOMEM;
goto out_free_tags; goto out_free_tags;
} }

View File

@ -34,7 +34,6 @@ struct blk_mq_hw_ctx {
unsigned long flags; /* BLK_MQ_F_* flags */ unsigned long flags; /* BLK_MQ_F_* flags */
struct request_queue *queue; struct request_queue *queue;
unsigned int queue_num;
struct blk_flush_queue *fq; struct blk_flush_queue *fq;
void *driver_data; void *driver_data;
@ -54,7 +53,7 @@ struct blk_mq_hw_ctx {
unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
unsigned int numa_node; unsigned int numa_node;
unsigned int cmd_size; /* per-request extra data */ unsigned int queue_num;
atomic_t nr_active; atomic_t nr_active;
@ -195,13 +194,16 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
int blk_mq_request_started(struct request *rq);
void blk_mq_start_request(struct request *rq); void blk_mq_start_request(struct request *rq);
void blk_mq_end_request(struct request *rq, int error); void blk_mq_end_request(struct request *rq, int error);
void __blk_mq_end_request(struct request *rq, int error); void __blk_mq_end_request(struct request *rq, int error);
void blk_mq_requeue_request(struct request *rq); void blk_mq_requeue_request(struct request *rq);
void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
void blk_mq_cancel_requeue_work(struct request_queue *q);
void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q);
void blk_mq_abort_requeue_list(struct request_queue *q);
void blk_mq_complete_request(struct request *rq); void blk_mq_complete_request(struct request *rq);
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
@ -212,6 +214,8 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
void *priv); void *priv);
void blk_mq_unfreeze_queue(struct request_queue *q);
void blk_mq_freeze_queue_start(struct request_queue *q);
/* /*
* Driver command data is immediately after the request. So subtract request * Driver command data is immediately after the request. So subtract request

View File

@ -190,6 +190,7 @@ enum rq_flag_bits {
__REQ_PM, /* runtime pm request */ __REQ_PM, /* runtime pm request */
__REQ_HASHED, /* on IO scheduler merge hash */ __REQ_HASHED, /* on IO scheduler merge hash */
__REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */
__REQ_NO_TIMEOUT, /* requests may never expire */
__REQ_NR_BITS, /* stops here */ __REQ_NR_BITS, /* stops here */
}; };
@ -243,5 +244,6 @@ enum rq_flag_bits {
#define REQ_PM (1ULL << __REQ_PM) #define REQ_PM (1ULL << __REQ_PM)
#define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_HASHED (1ULL << __REQ_HASHED)
#define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
#define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT)
#endif /* __LINUX_BLK_TYPES_H */ #endif /* __LINUX_BLK_TYPES_H */