From 81c1cd98351bec779c4587865b62bfbac72ee811 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 4 Apr 2017 18:18:12 -0400 Subject: [PATCH 01/13] nvme/pci: Don't set reserved SQ create flags The QPRIO field is only valid if weighted round robin arbitration is used, and this driver doesn't enable that controller configuration option. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d9e2bd07ed56..af783a33e93a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -785,7 +785,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { struct nvme_command c; - int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + int flags = NVME_QUEUE_PHYS_CONTIG; /* * Note: we (ab)use the fact the the prp fields survive if no data From f9f38e33389c019ec880f6825119c94867c1fde0 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Mon, 10 Apr 2017 12:51:07 -0300 Subject: [PATCH 02/13] nvme: improve performance for virtual NVMe devices This change provides a mechanism to reduce the number of MMIO doorbell writes for the NVMe driver. When running in a virtualized environment like QEMU, the cost of an MMIO is quite hefy here. The main idea for the patch is provide the device two memory location locations: 1) to store the doorbell values so they can be lookup without the doorbell MMIO write 2) to store an event index. I believe the doorbell value is obvious, the event index not so much. Similar to the virtio specification, the virtual device can tell the driver (guest OS) not to write MMIO unless you are writing past this value. FYI: doorbell values are written by the nvme driver (guest OS) and the event index is written by the virtual device (host OS). The patch implements a new admin command that will communicate where these two memory locations reside. If the command fails, the nvme driver will work as before without any optimizations. Contributions: Eric Northup Frank Swiderski Ted Tso Keith Busch Just to give an idea on the performance boost with the vendor extension: Running fio [1], a stock NVMe driver I get about 200K read IOPs with my vendor patch I get about 1000K read IOPs. This was running with a null device i.e. the backing device simply returned success on every read IO request. [1] Running on a 4 core machine: fio --time_based --name=benchmark --runtime=30 --filename=/dev/nvme0n1 --nrfiles=1 --ioengine=libaio --iodepth=32 --direct=1 --invalidate=1 --verify=0 --verify_fatal=0 --numjobs=4 --rw=randread --blocksize=4k --randrepeat=false Signed-off-by: Rob Nelson [mlin: port for upstream] Signed-off-by: Ming Lin [koike: updated for upstream] Signed-off-by: Helen Koike Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 143 +++++++++++++++++++++++++++++++++++++++- include/linux/nvme.h | 13 ++++ 2 files changed, 154 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index af783a33e93a..a363fecb8d82 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -103,8 +103,22 @@ struct nvme_dev { u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; + u32 *dbbuf_dbs; + dma_addr_t dbbuf_dbs_dma_addr; + u32 *dbbuf_eis; + dma_addr_t dbbuf_eis_dma_addr; }; +static inline unsigned int sq_idx(unsigned int qid, u32 stride) +{ + return qid * 2 * stride; +} + +static inline unsigned int cq_idx(unsigned int qid, u32 stride) +{ + return (qid * 2 + 1) * stride; +} + static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) { return container_of(ctrl, struct nvme_dev, ctrl); @@ -133,6 +147,10 @@ struct nvme_queue { u16 qid; u8 cq_phase; u8 cqe_seen; + u32 *dbbuf_sq_db; + u32 *dbbuf_cq_db; + u32 *dbbuf_sq_ei; + u32 *dbbuf_cq_ei; }; /* @@ -171,6 +189,112 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ns) != 4096); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); +} + +static inline unsigned int nvme_dbbuf_size(u32 stride) +{ + return ((num_possible_cpus() + 1) * 8 * stride); +} + +static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) + return 0; + + dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_dbs_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_dbs) + return -ENOMEM; + dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, + &dev->dbbuf_eis_dma_addr, + GFP_KERNEL); + if (!dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + return -ENOMEM; + } + + return 0; +} + +static void nvme_dbbuf_dma_free(struct nvme_dev *dev) +{ + unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + + if (dev->dbbuf_dbs) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); + dev->dbbuf_dbs = NULL; + } + if (dev->dbbuf_eis) { + dma_free_coherent(dev->dev, mem_size, + dev->dbbuf_eis, dev->dbbuf_eis_dma_addr); + dev->dbbuf_eis = NULL; + } +} + +static void nvme_dbbuf_init(struct nvme_dev *dev, + struct nvme_queue *nvmeq, int qid) +{ + if (!dev->dbbuf_dbs || !qid) + return; + + nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)]; + nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; +} + +static void nvme_dbbuf_set(struct nvme_dev *dev) +{ + struct nvme_command c; + + if (!dev->dbbuf_dbs) + return; + + memset(&c, 0, sizeof(c)); + c.dbbuf.opcode = nvme_admin_dbbuf; + c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); + c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); + + if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { + dev_warn(dev->dev, "unable to set dbbuf\n"); + /* Free memory and continue on */ + nvme_dbbuf_dma_free(dev); + } +} + +static inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) +{ + return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old); +} + +/* Update dbbuf and return true if an MMIO is required */ +static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, + volatile u32 *dbbuf_ei) +{ + if (dbbuf_db) { + u16 old_value; + + /* + * Ensure that the queue is written before updating + * the doorbell in memory + */ + wmb(); + + old_value = *dbbuf_db; + *dbbuf_db = value; + + if (!nvme_dbbuf_need_event(*dbbuf_ei, value, old_value)) + return false; + } + + return true; } /* @@ -297,7 +421,9 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, if (++tail == nvmeq->q_depth) tail = 0; - writel(tail, nvmeq->q_db); + if (nvme_dbbuf_update_and_check_event(tail, nvmeq->dbbuf_sq_db, + nvmeq->dbbuf_sq_ei)) + writel(tail, nvmeq->q_db); nvmeq->sq_tail = tail; } @@ -686,7 +812,9 @@ static void __nvme_process_cq(struct nvme_queue *nvmeq, unsigned int *tag) return; if (likely(nvmeq->cq_vector >= 0)) - writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, + nvmeq->dbbuf_cq_ei)) + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); nvmeq->cq_head = head; nvmeq->cq_phase = phase; @@ -1070,6 +1198,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth)); + nvme_dbbuf_init(dev, nvmeq, qid); dev->online_queues++; spin_unlock_irq(&nvmeq->q_lock); } @@ -1542,6 +1671,8 @@ static int nvme_dev_add(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->tagset)) return 0; dev->ctrl.tagset = &dev->tagset; + + nvme_dbbuf_set(dev); } else { blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); @@ -1728,6 +1859,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) { struct nvme_dev *dev = to_nvme_dev(ctrl); + nvme_dbbuf_dma_free(dev); put_device(dev->dev); if (dev->tagset.tags) blk_mq_free_tag_set(&dev->tagset); @@ -1795,6 +1927,13 @@ static void nvme_reset_work(struct work_struct *work) dev->ctrl.opal_dev = NULL; } + if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { + result = nvme_dbbuf_dma_alloc(dev); + if (result) + dev_warn(dev->dev, + "unable to allocate dma for dbbuf\n"); + } + result = nvme_setup_io_queues(dev); if (result) goto out; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9061780b141f..b625bacf37ef 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -245,6 +245,7 @@ enum { NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, }; struct nvme_lbaf { @@ -603,6 +604,7 @@ enum nvme_admin_opcode { nvme_admin_download_fw = 0x11, nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, + nvme_admin_dbbuf = 0x7C, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, nvme_admin_security_recv = 0x82, @@ -874,6 +876,16 @@ struct nvmf_property_get_command { __u8 resv4[16]; }; +struct nvme_dbbuf { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __u32 rsvd12[6]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -893,6 +905,7 @@ struct nvme_command { struct nvmf_connect_command connect; struct nvmf_property_set_command prop_set; struct nvmf_property_get_command prop_get; + struct nvme_dbbuf dbbuf; }; }; From 1c05cf9058027dd80630d54aa3527eb2e3152e80 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 18 Apr 2017 17:32:15 -0600 Subject: [PATCH 03/13] nvmet: convert from kmap to nvmet_copy_from_sgl This is safer as it doesn't rely on the data being stored in a single page in an sgl. It also aids our effort to start phasing out users of sg_page. See [1]. For this we kmalloc some memory, copy to it and free at the end. Note: we can't allocate this memory on the stack as the kbuild test robot reports some frame size overflows on i386. [1] https://lwn.net/Articles/720053/ Signed-off-by: Logan Gunthorpe Reviewed-by: Christoph Hellwig Reviewed-by: Max Gurtovoy Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fabrics-cmd.c | 32 ++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 2a3c15b57f6e..3cc17269504b 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -122,7 +122,15 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -143,7 +151,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) } status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, - le32_to_cpu(c->kato), &ctrl); + le32_to_cpu(c->kato), &ctrl); if (status) goto out; @@ -158,7 +166,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); } @@ -170,7 +179,15 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - d = kmap(sg_page(req->sg)) + req->sg->offset; + d = kmalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + status = NVME_SC_INTERNAL; + goto complete; + } + + status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); + if (status) + goto out; /* zero out initial completion result, assign values as needed */ req->rsp->result.u32 = 0; @@ -183,8 +200,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) } status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, - le16_to_cpu(d->cntlid), - req, &ctrl); + le16_to_cpu(d->cntlid), + req, &ctrl); if (status) goto out; @@ -205,7 +222,8 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) pr_info("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); out: - kunmap(sg_page(req->sg)); + kfree(d); +complete: nvmet_req_complete(req, status); return; From 39498faef7c02f9f6de4060ccdc7e8975a6e690b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:28 -0700 Subject: [PATCH 04/13] nvmet_fc: add target feature flags for upcall isr contexts Two new feature flags were added to control whether upcalls to the transport result in context switches or stay in the calling context. NVMET_FCTGTFEAT_CMD_IN_ISR: By default, if the flag is not set, the transport assumes the lldd is in a non-isr context and in the cpu context it should be for the io queue. As such, the cmd handler is called directly in the calling context. If the flag is set, indicating the upcall is an isr context, the transport mandates a transition to a workqueue. The workqueue assigned to the queue is used for the context. NVMET_FCTGTFEAT_OPDONE_IN_ISR By default, if the flag is not set, the transport assumes the lldd is in a non-isr context and in the cpu context it should be for the io queue. As such, the fcp operation done callback is called directly in the calling context. If the flag is set, indicating the upcall is an isr context, the transport mandates a transition to a workqueue. The workqueue assigned to the queue is used for the context. Updated lpfc for flags Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fc.c | 37 +++++++++++++++++++++++++++++++--- drivers/nvme/target/fcloop.c | 5 +++-- drivers/scsi/lpfc/lpfc_nvmet.c | 4 +++- include/linux/nvme-fc-driver.h | 16 +++++++++++++++ 4 files changed, 56 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 2c0709f0a7d3..a4fad3d0b660 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -86,6 +86,7 @@ struct nvmet_fc_fcp_iod { struct nvmet_req req; struct work_struct work; + struct work_struct done_work; struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_queue *queue; @@ -213,6 +214,7 @@ static DEFINE_IDA(nvmet_fc_tgtport_cnt); static void nvmet_fc_handle_ls_rqst_work(struct work_struct *work); static void nvmet_fc_handle_fcp_rqst_work(struct work_struct *work); +static void nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work); static void nvmet_fc_tgt_a_put(struct nvmet_fc_tgt_assoc *assoc); static int nvmet_fc_tgt_a_get(struct nvmet_fc_tgt_assoc *assoc); static void nvmet_fc_tgt_q_put(struct nvmet_fc_tgt_queue *queue); @@ -414,6 +416,7 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, for (i = 0; i < queue->sqsize; fod++, i++) { INIT_WORK(&fod->work, nvmet_fc_handle_fcp_rqst_work); + INIT_WORK(&fod->done_work, nvmet_fc_fcp_rqst_op_done_work); fod->tgtport = tgtport; fod->queue = queue; fod->active = false; @@ -1807,10 +1810,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, } } +/* + * actual done handler for FCP operations when completed by the lldd + */ static void -nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) { - struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; bool abort; @@ -1905,6 +1911,28 @@ nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) } } +static void +nvmet_fc_fcp_rqst_op_done_work(struct work_struct *work) +{ + struct nvmet_fc_fcp_iod *fod = + container_of(work, struct nvmet_fc_fcp_iod, done_work); + + nvmet_fc_fod_op_done(fod); +} + +static void +nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue = fod->queue; + + if (fod->tgtport->ops->target_features & NVMET_FCTGTFEAT_OPDONE_IN_ISR) + /* context switch so completion is not in ISR context */ + queue_work_on(queue->cpu, queue->work_q, &fod->done_work); + else + nvmet_fc_fod_op_done(fod); +} + /* * actual completion handler after execution by the nvmet layer */ @@ -2149,7 +2177,10 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, ((queue->qid - 1) % tgtport->ops->max_hw_queues) : 0; memcpy(&fod->cmdiubuf, cmdiubuf, cmdiubuf_len); - queue_work_on(queue->cpu, queue->work_q, &fod->work); + if (tgtport->ops->target_features & NVMET_FCTGTFEAT_CMD_IN_ISR) + queue_work_on(queue->cpu, queue->work_q, &fod->work); + else + nvmet_fc_handle_fcp_rqst(tgtport, fod); return 0; } diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 4e8e6a22bce1..a5cdeca39013 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -575,8 +575,9 @@ struct nvmet_fc_target_template tgttemplate = { .max_dif_sgl_segments = FCLOOP_SGL_SEGS, .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ - .target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED, + .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), }; diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 7ca868f394da..176b4e035bcf 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -669,7 +669,9 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED; + NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | + NVMET_FCTGTFEAT_CMD_IN_ISR | + NVMET_FCTGTFEAT_OPDONE_IN_ISR; #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) error = nvmet_fc_register_targetport(&pinfo, &lpfc_tgttemplate, diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 16eb264980c2..d70a9c98bc23 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -655,6 +655,22 @@ enum { * on. The transport should pick a cpu to schedule the work * on. */ + NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2), + /* Bit 2: When 0, the LLDD is calling the cmd rcv handler + * in a non-isr context, allowing the transport to finish + * op completion in the calling context. When 1, the LLDD + * is calling the cmd rcv handler in an ISR context, + * requiring the transport to transition to a workqueue + * for op completion. + */ + NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3), + /* Bit 3: When 0, the LLDD is calling the op done handler + * in a non-isr context, allowing the transport to finish + * op completion in the calling context. When 1, the LLDD + * is calling the op done handler in an ISR context, + * requiring the transport to transition to a workqueue + * for op completion. + */ }; From 19b58d9473e8e3d38e7f3602a07c8febfbd07bc1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:29 -0700 Subject: [PATCH 05/13] nvmet_fc: add req_release to lldd api With the advent of the opdone calls changing context, the lldd can no longer assume that once the op->done call returns for RSP operations that the request struct is no longer being accessed. As such, revise the lldd api for a req_release callback that the transport will call when the job is complete. This will also be used with abort cases. Fixed text in api header for change in io complete semantics. Revised lpfc to support the new req_release api. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fc.c | 8 +++- drivers/nvme/target/fcloop.c | 15 ++++--- drivers/scsi/lpfc/lpfc_nvmet.c | 73 +++++++++++++++++++++++++++++++--- drivers/scsi/lpfc/lpfc_nvmet.h | 7 +++- include/linux/nvme-fc-driver.h | 59 ++++++++++++++++----------- 5 files changed, 125 insertions(+), 37 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a4fad3d0b660..d7068f039904 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -482,6 +482,8 @@ static void nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_fcp_iod *fod) { + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; spin_lock_irqsave(&queue->qlock, flags); @@ -493,6 +495,8 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, * release the reference taken at queue lookup and fod allocation */ nvmet_fc_tgt_q_put(queue); + + tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); } static int @@ -849,7 +853,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, int ret, idx; if (!template->xmt_ls_rsp || !template->fcp_op || - !template->targetport_delete || + !template->fcp_req_release || !template->targetport_delete || !template->max_hw_queues || !template->max_sgl_segments || !template->max_dif_sgl_segments || !template->dma_boundary) { ret = -EINVAL; @@ -2124,7 +2128,7 @@ nvmet_fc_handle_fcp_rqst_work(struct work_struct *work) * If this routine returns error, the lldd should abort the exchange. * * @target_port: pointer to the (registered) target port the FCP CMD IU - * was receive on. + * was received on. * @fcpreq: pointer to a fcpreq request structure to be used to reference * the exchange corresponding to the FCP Exchange. * @cmdiubuf: pointer to the buffer containing the FCP CMD IU diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index a5cdeca39013..2c4d68201ba7 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -492,14 +492,18 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, tgt_fcpreq->fcp_error = fcp_err; tgt_fcpreq->done(tgt_fcpreq); - if ((!fcp_err) && (op == NVMET_FCOP_RSP || - op == NVMET_FCOP_READDATA_RSP || - op == NVMET_FCOP_ABORT)) - schedule_work(&tfcp_req->work); - return 0; } +static void +fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + + schedule_work(&tfcp_req->work); +} + static void fcloop_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, @@ -570,6 +574,7 @@ struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, .xmt_ls_rsp = fcloop_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, + .fcp_req_release = fcloop_fcp_req_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 176b4e035bcf..1846c7eb086a 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -408,9 +408,7 @@ out: if (phba->ktime_on) lpfc_nvmet_ktime(phba, ctxp); #endif - /* Let Abort cmpl repost the context */ - if (!(ctxp->flag & LPFC_NVMET_ABORT_OP)) - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + /* lpfc_nvmet_xmt_fcp_release() will recycle the context */ } else { ctxp->entry_cnt++; start_clean = offsetof(struct lpfc_iocbq, wqe); @@ -634,10 +632,47 @@ lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport) complete(&tport->tport_unreg_done); } +static void +lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *rsp) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; + struct lpfc_nvmet_rcv_ctx *ctxp = + container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_hba *phba = ctxp->phba; + unsigned long flags; + bool aborting = false; + + spin_lock_irqsave(&ctxp->ctxlock, flags); + if (ctxp->flag & LPFC_NVMET_ABORT_OP) { + aborting = true; + ctxp->flag |= LPFC_NVMET_CTX_RLS; + } + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + + if (aborting) + /* let the abort path do the real release */ + return; + + /* Sanity check */ + if (ctxp->state != LPFC_NVMET_STE_DONE) { + atomic_inc(&lpfc_nvmep->xmt_fcp_drop); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, + "6117 Bad state IO x%x aborted\n", + ctxp->oxid); + } + + lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid, + ctxp->state, 0); + + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); +} + static struct nvmet_fc_target_template lpfc_tgttemplate = { .targetport_delete = lpfc_nvmet_targetport_delete, .xmt_ls_rsp = lpfc_nvmet_xmt_ls_rsp, .fcp_op = lpfc_nvmet_xmt_fcp_op, + .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, @@ -834,6 +869,7 @@ dropit: ctxp->wqeq = NULL; ctxp->state = LPFC_NVMET_STE_RCV; ctxp->rqb_buffer = (void *)nvmebuf; + spin_lock_init(&ctxp->ctxlock); lpfc_nvmeio_data(phba, "NVMET LS RCV: xri x%x sz %d from %06x\n", oxid, size, sid); @@ -1595,6 +1631,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; uint32_t status, result; + unsigned long flags; + bool released = false; ctxp = cmdwqe->context2; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -1609,7 +1647,18 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, result, wcqe->word3); ctxp->state = LPFC_NVMET_STE_DONE; - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + spin_lock_irqsave(&ctxp->ctxlock, flags); + if (ctxp->flag & LPFC_NVMET_CTX_RLS) + released = true; + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + + /* + * if transport has released ctx, then can reuse it. Otherwise, + * will be recycled by transport release call. + */ + if (released) + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); cmdwqe->context2 = NULL; cmdwqe->context3 = NULL; @@ -1632,7 +1681,9 @@ lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, { struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; + unsigned long flags; uint32_t status, result; + bool released = false; ctxp = cmdwqe->context2; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -1654,7 +1705,19 @@ lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, ctxp->state, ctxp->oxid); } ctxp->state = LPFC_NVMET_STE_DONE; - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + spin_lock_irqsave(&ctxp->ctxlock, flags); + if (ctxp->flag & LPFC_NVMET_CTX_RLS) + released = true; + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + + /* + * if transport has released ctx, then can reuse it. Otherwise, + * will be recycled by transport release call. + */ + if (released) + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + cmdwqe->context2 = NULL; cmdwqe->context3 = NULL; } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index ca96f05c1604..02735fc6fd41 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -81,6 +81,7 @@ struct lpfc_nvmet_rcv_ctx { struct lpfc_iocbq *wqeq; struct lpfc_iocbq *abort_wqeq; dma_addr_t txrdy_phys; + spinlock_t ctxlock; /* protect flag access */ uint32_t *txrdy; uint32_t sid; uint32_t offset; @@ -97,8 +98,10 @@ struct lpfc_nvmet_rcv_ctx { #define LPFC_NVMET_STE_RSP 4 #define LPFC_NVMET_STE_DONE 5 uint16_t flag; -#define LPFC_NVMET_IO_INP 1 -#define LPFC_NVMET_ABORT_OP 2 +#define LPFC_NVMET_IO_INP 0x1 +#define LPFC_NVMET_ABORT_OP 0x2 +#define LPFC_NVMET_CTX_RLS 0x4 + struct rqb_dmabuf *rqb_buffer; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index d70a9c98bc23..d98ddb2feabc 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -741,12 +741,12 @@ struct nvmet_fc_target_port { * be freed/released. * Entrypoint is Mandatory. * - * @fcp_op: Called to perform a data transfer, transmit a response, or - * abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same - * LLDD-supplied exchange structure specified in the - * nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received. - * The op field in the structure shall indicate the operation for - * the LLDD to perform relative to the io. + * @fcp_op: Called to perform a data transfer or transmit a response. + * The nvmefc_tgt_fcp_req structure is the same LLDD-supplied + * exchange structure specified in the nvmet_fc_rcv_fcp_req() call + * made when the FCP CMD IU was received. The op field in the + * structure shall indicate the operation for the LLDD to perform + * relative to the io. * NVMET_FCOP_READDATA operation: the LLDD is to send the * payload data (described by sglist) to the host in 1 or * more FC sequences (preferrably 1). Note: the fc-nvme layer @@ -768,29 +768,35 @@ struct nvmet_fc_target_port { * successfully, the LLDD is to update the nvmefc_tgt_fcp_req * transferred_length field and may subsequently transmit the * FCP_RSP iu payload (described by rspbuf, rspdma, rsplen). - * The LLDD is to await FCP_CONF reception to confirm the RSP - * reception by the host. The LLDD may retramsit the FCP_RSP iu - * if necessary per FC-NVME. Upon reception of FCP_CONF, or upon - * FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req - * fcp_error field and consider the operation complete.. - * NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload - * (described by rspbuf, rspdma, rsplen). The LLDD is to await - * FCP_CONF reception to confirm the RSP reception by the host. - * The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME. - * Upon reception of FCP_CONF, or upon FCP_CONF failure, the + * If FCP_CONF is supported, the LLDD is to await FCP_CONF + * reception to confirm the RSP reception by the host. The LLDD + * may retramsit the FCP_RSP iu if necessary per FC-NVME. Upon + * transmission of the FCP_RSP iu if FCP_CONF is not supported, + * or upon success/failure of FCP_CONF if it is supported, the * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and - * consider the operation complete.. + * consider the operation complete. + * NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload + * (described by rspbuf, rspdma, rsplen). If FCP_CONF is + * supported, the LLDD is to await FCP_CONF reception to confirm + * the RSP reception by the host. The LLDD may retramsit the + * FCP_RSP iu if FCP_CONF is not received per FC-NVME. Upon + * transmission of the FCP_RSP iu if FCP_CONF is not supported, + * or upon success/failure of FCP_CONF if it is supported, the + * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and + * consider the operation complete. * NVMET_FCOP_ABORT: the LLDD is to terminate the exchange * corresponding to the fcp operation. The LLDD shall send * ABTS and follow FC exchange abort-multi rules, including * ABTS retries and possible logout. * Upon completing the indicated operation, the LLDD is to set the * status fields for the operation (tranferred_length and fcp_error - * status) in the request, then all the "done" routine - * indicated in the fcp request. Upon return from the "done" - * routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation - * the fc-nvme layer will not longer reference the fcp request, - * allowing the LLDD to free/release the fcp request. + * status) in the request, then call the "done" routine + * indicated in the fcp request. After the operation completes, + * regardless of whether the FCP_RSP iu was successfully transmit, + * the LLDD-supplied exchange structure must remain valid until the + * transport calls the fcp_req_release() callback to return ownership + * of the exchange structure back to the LLDD so that it may be used + * for another fcp command. * Note: when calling the done routine for READDATA or WRITEDATA * operations, the fc-nvme layer may immediate convert, in the same * thread and before returning to the LLDD, the fcp operation to @@ -802,6 +808,11 @@ struct nvmet_fc_target_port { * Returns 0 on success, - on failure (Ex: -EIO) * Entrypoint is Mandatory. * + * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req + * to the LLDD after all operations on the fcp operation are complete. + * This may be due to the command completing or upon completion of + * abort cleanup. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -836,7 +847,9 @@ struct nvmet_fc_target_template { int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_ls_req *tls_req); int (*fcp_op)(struct nvmet_fc_target_port *tgtport, - struct nvmefc_tgt_fcp_req *); + struct nvmefc_tgt_fcp_req *fcpreq); + void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); u32 max_hw_queues; u16 max_sgl_segments; From ce79bfc2c7b415f0f296adc59a54c9c781d63617 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:30 -0700 Subject: [PATCH 06/13] nvme_fcloop: split job struct from transport for req_release Current design has the fcloop job struct, used for both initiator and target processing, allocated as part of the initiator request structure. On aborts, the initiator side (based on the request) may terminate, yet the target side wants to continue processing. the target side can't do that if the initiator side goes away. Revise fcloop to allocate an independent target side structure when it starts an io from the initiator. Added a lock to the request struct as well to synchronize pointer updates on abort calls. Modified target downcalls to recognize conditions where initiator has aborted the io (thus nulled the pointer between job structs), thus avoid referencing sgl lists which are gone and no longer making upcalls to the initiator. In conditions where the targetport is no longer connected, have the initiator return an access failure rather than simulating a command completion. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fcloop.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 2c4d68201ba7..dbcafd30dd9b 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -251,6 +251,10 @@ struct fcloop_fcpreq { struct nvmefc_tgt_fcp_req tgt_fcp_req; }; +struct fcloop_ini_fcpreq { + struct nvmefc_fcp_req *fcpreq; + struct fcloop_fcpreq *tfcp_req; +}; static inline struct fcloop_lsreq * tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) @@ -355,6 +359,8 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) fcpreq->status = tfcp_req->status; fcpreq->done(fcpreq); } + + kfree(tfcp_req); } @@ -364,20 +370,23 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { - struct fcloop_fcpreq *tfcp_req = fcpreq->private; struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req; int ret = 0; - INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); + if (!rport->targetport) + return -ECONNREFUSED; - if (!rport->targetport) { - tfcp_req->status = NVME_SC_FC_TRANSPORT_ERROR; - schedule_work(&tfcp_req->work); - return ret; - } + tfcp_req = kzalloc(sizeof(*tfcp_req), GFP_KERNEL); + if (!tfcp_req) + return -ENOMEM; + inireq->fcpreq = fcpreq; + inireq->tfcp_req = tfcp_req; tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, fcpreq->cmdaddr, fcpreq->cmdlen); @@ -567,7 +576,7 @@ struct nvme_fc_port_template fctemplate = { .local_priv_sz = sizeof(struct fcloop_lport), .remote_priv_sz = sizeof(struct fcloop_rport), .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), - .fcprqst_priv_sz = sizeof(struct fcloop_fcpreq), + .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), }; struct nvmet_fc_target_template tgttemplate = { From a97ec51b37efacb84f286979876675a8143035b0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:31 -0700 Subject: [PATCH 07/13] nvmet_fc: Rework target side abort handling target transport: ---------------------- There are cases when there is a need to abort in-progress target operations (writedata) so that controller termination or errors can clean up. That can't happen currently as the abort is another target op type, so it can't be used till the running one finishes (and it may not). Solve by removing the abort op type and creating a separate downcall from the transport to the lldd to request an io to be aborted. The transport will abort ios on queue teardown or io errors. In general the transport tries to call the lldd abort only when the io state is idle. Meaning: ops that transmit data (readdata or rsp) will always finish their transmit (or the lldd will see a state on the link or initiator port that fails the transmit) and the done call for the operation will occur. The transport will wait for the op done upcall before calling the abort function, and as the io is idle, the io can be cleaned up immediately after the abort call; Similarly, ios that are not waiting for data or transmitting data must be in the nvmet layer being processed. The transport will wait for the nvmet layer completion before calling the abort function, and as the io is idle, the io can be cleaned up immediately after the abort call; As for ops that are waiting for data (writedata), they may be outstanding indefinitely if the lldd doesn't see a condition where the initiatior port or link is bad. In those cases, the transport will call the abort function and wait for the lldd's op done upcall for the operation, where it will then clean up the io. Additionally, if a lldd receives an ABTS and matches it to an outstanding request in the transport, A new new transport upcall was created to abort the outstanding request in the transport. The transport expects any outstanding op call (readdata or writedata) will completed by the lldd and the operation upcall made. The transport doesn't act on the reported abort (e.g. clean up the io) until an op done upcall occurs, a new op is attempted, or the nvmet layer completes the io processing. fcloop: ---------------------- Updated to support the new target apis. On fcp io aborts from the initiator, the loopback context is updated to NULL out the half that has completed. The initiator side is immediately called after the abort request with an io completion (abort status). On fcp io aborts from the target, the io is stopped and the initiator side sees it as an aborted io. Target side ops, perhaps in progress while the initiator side is done, continue but noop the data movement as there's no structure on the initiator side to reference. patch also contains: ---------------------- Revised lpfc to support the new abort api commonized rsp buffer syncing and nulling of private data based on calling paths. errors in op done calls don't take action on the fod. They're bad operations which implies the fod may be bad. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fc.c | 205 ++++++++++++++++++++++++--------- drivers/nvme/target/fcloop.c | 152 ++++++++++++++++++++---- drivers/scsi/lpfc/lpfc_nvmet.c | 49 ++++---- include/linux/nvme-fc-driver.h | 25 ++-- 4 files changed, 325 insertions(+), 106 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index d7068f039904..e5d30bbb1b10 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -82,6 +82,8 @@ struct nvmet_fc_fcp_iod { enum nvmet_fcp_datadir io_dir; bool active; bool abort; + bool aborted; + bool writedataactive; spinlock_t flock; struct nvmet_req req; @@ -420,6 +422,9 @@ nvmet_fc_prep_fcp_iodlist(struct nvmet_fc_tgtport *tgtport, fod->tgtport = tgtport; fod->queue = queue; fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->fcpreq = NULL; list_add_tail(&fod->fcp_list, &queue->fod_list); spin_lock_init(&fod->flock); @@ -466,7 +471,6 @@ nvmet_fc_alloc_fcp_iod(struct nvmet_fc_tgt_queue *queue) if (fod) { list_del(&fod->fcp_list); fod->active = true; - fod->abort = false; /* * no queue reference is taken, as it was taken by the * queue lookup just prior to the allocation. The iod @@ -486,9 +490,18 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, struct nvmet_fc_tgtport *tgtport = fod->tgtport; unsigned long flags; + fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, + sizeof(fod->rspiubuf), DMA_TO_DEVICE); + + fcpreq->nvmet_fc_private = NULL; + spin_lock_irqsave(&queue->qlock, flags); list_add_tail(&fod->fcp_list, &fod->queue->fod_list); fod->active = false; + fod->abort = false; + fod->aborted = false; + fod->writedataactive = false; + fod->fcpreq = NULL; spin_unlock_irqrestore(&queue->qlock, flags); /* @@ -622,33 +635,13 @@ nvmet_fc_tgt_q_get(struct nvmet_fc_tgt_queue *queue) } -static void -nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, - struct nvmefc_tgt_fcp_req *fcpreq) -{ - int ret; - - fcpreq->op = NVMET_FCOP_ABORT; - fcpreq->offset = 0; - fcpreq->timeout = 0; - fcpreq->transfer_length = 0; - fcpreq->transferred_length = 0; - fcpreq->fcp_error = 0; - fcpreq->sg_cnt = 0; - - ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fcpreq); - if (ret) - /* should never reach here !! */ - WARN_ON(1); -} - - static void nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) { + struct nvmet_fc_tgtport *tgtport = queue->assoc->tgtport; struct nvmet_fc_fcp_iod *fod = queue->fod; unsigned long flags; - int i; + int i, writedataactive; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); @@ -659,7 +652,20 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) if (fod->active) { spin_lock(&fod->flock); fod->abort = true; + writedataactive = fod->writedataactive; spin_unlock(&fod->flock); + /* + * only call lldd abort routine if waiting for + * writedata. other outstanding ops should finish + * on their own. + */ + if (writedataactive) { + spin_lock(&fod->flock); + fod->aborted = true; + spin_unlock(&fod->flock); + tgtport->ops->fcp_abort( + &tgtport->fc_target_port, fod->fcpreq); + } } } spin_unlock_irqrestore(&queue->qlock, flags); @@ -853,6 +859,7 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, int ret, idx; if (!template->xmt_ls_rsp || !template->fcp_op || + !template->fcp_abort || !template->fcp_req_release || !template->targetport_delete || !template->max_hw_queues || !template->max_sgl_segments || !template->max_dif_sgl_segments || !template->dma_boundary) { @@ -1717,6 +1724,26 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, static void nvmet_fc_xmt_fcp_op_done(struct nvmefc_tgt_fcp_req *fcpreq); +static void +nvmet_fc_abort_op(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_fcp_iod *fod) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + + /* data no longer needed */ + nvmet_fc_free_tgt_pgs(fod); + + /* + * if an ABTS was received or we issued the fcp_abort early + * don't call abort routine again. + */ + /* no need to take lock - lock was taken earlier to get here */ + if (!fod->aborted) + tgtport->ops->fcp_abort(&tgtport->fc_target_port, fcpreq); + + nvmet_fc_free_fcp_iod(fod->queue, fod); +} + static void nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) @@ -1730,7 +1757,7 @@ nvmet_fc_xmt_fcp_rsp(struct nvmet_fc_tgtport *tgtport, ret = tgtport->ops->fcp_op(&tgtport->fc_target_port, fod->fcpreq); if (ret) - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } static void @@ -1739,6 +1766,7 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; struct scatterlist *sg, *datasg; + unsigned long flags; u32 tlen, sg_off; int ret; @@ -1803,10 +1831,13 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, */ fod->abort = true; - if (op == NVMET_FCOP_WRITEDATA) + if (op == NVMET_FCOP_WRITEDATA) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = false; + spin_unlock_irqrestore(&fod->flock, flags); nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); - else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { + } else /* NVMET_FCOP_READDATA or NVMET_FCOP_READDATA_RSP */ { fcpreq->fcp_error = ret; fcpreq->transferred_length = 0; nvmet_fc_xmt_fcp_op_done(fod->fcpreq); @@ -1814,6 +1845,27 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, } } +static inline bool +__nvmet_fc_fod_op_abort(struct nvmet_fc_fcp_iod *fod, bool abort) +{ + struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; + struct nvmet_fc_tgtport *tgtport = fod->tgtport; + + /* if in the middle of an io and we need to tear down */ + if (abort) { + if (fcpreq->op == NVMET_FCOP_WRITEDATA) { + nvmet_req_complete(&fod->req, + NVME_SC_FC_TRANSPORT_ERROR); + return true; + } + + nvmet_fc_abort_op(tgtport, fod); + return true; + } + + return false; +} + /* * actual done handler for FCP operations when completed by the lldd */ @@ -1827,22 +1879,20 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) spin_lock_irqsave(&fod->flock, flags); abort = fod->abort; + fod->writedataactive = false; spin_unlock_irqrestore(&fod->flock, flags); - /* if in the middle of an io and we need to tear down */ - if (abort && fcpreq->op != NVMET_FCOP_ABORT) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_req_complete(&fod->req, fcpreq->fcp_error); - return; - } - switch (fcpreq->op) { case NVMET_FCOP_WRITEDATA: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { + spin_lock(&fod->flock); + fod->abort = true; + spin_unlock(&fod->flock); + nvmet_req_complete(&fod->req, NVME_SC_FC_TRANSPORT_ERROR); return; @@ -1850,6 +1900,10 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) fod->offset += fcpreq->transferred_length; if (fod->offset != fod->total_length) { + spin_lock_irqsave(&fod->flock, flags); + fod->writedataactive = true; + spin_unlock_irqrestore(&fod->flock, flags); + /* transfer the next chunk */ nvmet_fc_transfer_fcp_data(tgtport, fod, NVMET_FCOP_WRITEDATA); @@ -1864,12 +1918,11 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; if (fcpreq->fcp_error || fcpreq->transferred_length != fcpreq->transfer_length) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -1878,8 +1931,6 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) if (fcpreq->op == NVMET_FCOP_READDATA_RSP) { /* data no longer needed */ nvmet_fc_free_tgt_pgs(fod); - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); nvmet_fc_free_fcp_iod(fod->queue, fod); return; } @@ -1902,15 +1953,12 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) break; case NVMET_FCOP_RSP: - case NVMET_FCOP_ABORT: - fc_dma_sync_single_for_cpu(tgtport->dev, fod->rspdma, - sizeof(fod->rspiubuf), DMA_TO_DEVICE); + if (__nvmet_fc_fod_op_abort(fod, abort)) + return; nvmet_fc_free_fcp_iod(fod->queue, fod); break; default: - nvmet_fc_free_tgt_pgs(fod); - nvmet_fc_abort_op(tgtport, fod->fcpreq); break; } } @@ -1958,10 +2006,7 @@ __nvmet_fc_fcp_nvme_cmd_done(struct nvmet_fc_tgtport *tgtport, fod->queue->sqhd = cqe->sq_head; if (abort) { - /* data no longer needed */ - nvmet_fc_free_tgt_pgs(fod); - - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); return; } @@ -2057,8 +2102,8 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, &fod->queue->nvme_cq, &fod->queue->nvme_sq, &nvmet_fc_tgt_fcp_ops); - if (!ret) { /* bad SQE content */ - nvmet_fc_abort_op(tgtport, fod->fcpreq); + if (!ret) { /* bad SQE content or invalid ctrl state */ + nvmet_fc_abort_op(tgtport, fod); return; } @@ -2098,7 +2143,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, return; transport_error: - nvmet_fc_abort_op(tgtport, fod->fcpreq); + nvmet_fc_abort_op(tgtport, fod); } /* @@ -2151,7 +2196,6 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4))) return -EIO; - queue = nvmet_fc_find_target_queue(tgtport, be64_to_cpu(cmdiu->connection_id)); if (!queue) @@ -2190,6 +2234,59 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, } EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_req); +/** + * nvmet_fc_rcv_fcp_abort - transport entry point called by an LLDD + * upon the reception of an ABTS for a FCP command + * + * Notify the transport that an ABTS has been received for a FCP command + * that had been given to the transport via nvmet_fc_rcv_fcp_req(). The + * LLDD believes the command is still being worked on + * (template_ops->fcp_req_release() has not been called). + * + * The transport will wait for any outstanding work (an op to the LLDD, + * which the lldd should complete with error due to the ABTS; or the + * completion from the nvmet layer of the nvme command), then will + * stop processing and call the nvmet_fc_rcv_fcp_req() callback to + * return the i/o context to the LLDD. The LLDD may send the BA_ACC + * to the ABTS either after return from this function (assuming any + * outstanding op work has been terminated) or upon the callback being + * called. + * + * @target_port: pointer to the (registered) target port the FCP CMD IU + * was received on. + * @fcpreq: pointer to the fcpreq request structure that corresponds + * to the exchange that received the ABTS. + */ +void +nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *target_port, + struct nvmefc_tgt_fcp_req *fcpreq) +{ + struct nvmet_fc_fcp_iod *fod = fcpreq->nvmet_fc_private; + struct nvmet_fc_tgt_queue *queue; + unsigned long flags; + + if (!fod || fod->fcpreq != fcpreq) + /* job appears to have already completed, ignore abort */ + return; + + queue = fod->queue; + + spin_lock_irqsave(&queue->qlock, flags); + if (fod->active) { + /* + * mark as abort. The abort handler, invoked upon completion + * of any work, will detect the aborted status and do the + * callback. + */ + spin_lock(&fod->flock); + fod->abort = true; + fod->aborted = true; + spin_unlock(&fod->flock); + } + spin_unlock_irqrestore(&queue->qlock, flags); +} +EXPORT_SYMBOL_GPL(nvmet_fc_rcv_fcp_abort); + enum { FCT_TRADDR_ERR = 0, FCT_TRADDR_WWNN = 1 << 0, diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index dbcafd30dd9b..aaa3dbe22bd5 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -246,7 +246,10 @@ struct fcloop_lsreq { struct fcloop_fcpreq { struct fcloop_tport *tport; struct nvmefc_fcp_req *fcpreq; + spinlock_t reqlock; u16 status; + bool active; + bool aborted; struct work_struct work; struct nvmefc_tgt_fcp_req tgt_fcp_req; }; @@ -254,6 +257,7 @@ struct fcloop_fcpreq { struct fcloop_ini_fcpreq { struct nvmefc_fcp_req *fcpreq; struct fcloop_fcpreq *tfcp_req; + struct work_struct iniwork; }; static inline struct fcloop_lsreq * @@ -345,7 +349,21 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *tport, } /* - * FCP IO operation done. call back up initiator "done" flows. + * FCP IO operation done by initiator abort. + * call back up initiator "done" flows. + */ +static void +fcloop_tgt_fcprqst_ini_done_work(struct work_struct *work) +{ + struct fcloop_ini_fcpreq *inireq = + container_of(work, struct fcloop_ini_fcpreq, iniwork); + + inireq->fcpreq->done(inireq->fcpreq); +} + +/* + * FCP IO operation done by target completion. + * call back up initiator "done" flows. */ static void fcloop_tgt_fcprqst_done_work(struct work_struct *work) @@ -353,9 +371,13 @@ fcloop_tgt_fcprqst_done_work(struct work_struct *work) struct fcloop_fcpreq *tfcp_req = container_of(work, struct fcloop_fcpreq, work); struct fcloop_tport *tport = tfcp_req->tport; - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; - if (tport->remoteport) { + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + spin_unlock(&tfcp_req->reqlock); + + if (tport->remoteport && fcpreq) { fcpreq->status = tfcp_req->status; fcpreq->done(fcpreq); } @@ -384,8 +406,10 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, inireq->fcpreq = fcpreq; inireq->tfcp_req = tfcp_req; + INIT_WORK(&inireq->iniwork, fcloop_tgt_fcprqst_ini_done_work); tfcp_req->fcpreq = fcpreq; tfcp_req->tport = rport->targetport->private; + spin_lock_init(&tfcp_req->reqlock); INIT_WORK(&tfcp_req->work, fcloop_tgt_fcprqst_done_work); ret = nvmet_fc_rcv_fcp_req(rport->targetport, &tfcp_req->tgt_fcp_req, @@ -453,50 +477,86 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *tgt_fcpreq) { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - struct nvmefc_fcp_req *fcpreq = tfcp_req->fcpreq; + struct nvmefc_fcp_req *fcpreq; u32 rsplen = 0, xfrlen = 0; - int fcp_err = 0; + int fcp_err = 0, active, aborted; u8 op = tgt_fcpreq->op; + spin_lock(&tfcp_req->reqlock); + fcpreq = tfcp_req->fcpreq; + active = tfcp_req->active; + aborted = tfcp_req->aborted; + tfcp_req->active = true; + spin_unlock(&tfcp_req->reqlock); + + if (unlikely(active)) + /* illegal - call while i/o active */ + return -EALREADY; + + if (unlikely(aborted)) { + /* target transport has aborted i/o prior */ + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = 0; + tgt_fcpreq->fcp_error = -ECANCELED; + tgt_fcpreq->done(tgt_fcpreq); + return 0; + } + + /* + * if fcpreq is NULL, the I/O has been aborted (from + * initiator side). For the target side, act as if all is well + * but don't actually move data. + */ + switch (op) { case NVMET_FCOP_WRITEDATA: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } break; case NVMET_FCOP_READDATA: case NVMET_FCOP_READDATA_RSP: xfrlen = tgt_fcpreq->transfer_length; - fcloop_fcp_copy_data(op, tgt_fcpreq->sg, fcpreq->first_sgl, - tgt_fcpreq->offset, xfrlen); - fcpreq->transferred_length += xfrlen; + if (fcpreq) { + fcloop_fcp_copy_data(op, tgt_fcpreq->sg, + fcpreq->first_sgl, tgt_fcpreq->offset, + xfrlen); + fcpreq->transferred_length += xfrlen; + } if (op == NVMET_FCOP_READDATA) break; /* Fall-Thru to RSP handling */ case NVMET_FCOP_RSP: - rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? - fcpreq->rsplen : tgt_fcpreq->rsplen); - memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); - if (rsplen < tgt_fcpreq->rsplen) - fcp_err = -E2BIG; - fcpreq->rcv_rsplen = rsplen; - fcpreq->status = 0; + if (fcpreq) { + rsplen = ((fcpreq->rsplen < tgt_fcpreq->rsplen) ? + fcpreq->rsplen : tgt_fcpreq->rsplen); + memcpy(fcpreq->rspaddr, tgt_fcpreq->rspaddr, rsplen); + if (rsplen < tgt_fcpreq->rsplen) + fcp_err = -E2BIG; + fcpreq->rcv_rsplen = rsplen; + fcpreq->status = 0; + } tfcp_req->status = 0; break; - case NVMET_FCOP_ABORT: - tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; - break; - default: fcp_err = -EINVAL; break; } + spin_lock(&tfcp_req->reqlock); + tfcp_req->active = false; + spin_unlock(&tfcp_req->reqlock); + tgt_fcpreq->transferred_length = xfrlen; tgt_fcpreq->fcp_error = fcp_err; tgt_fcpreq->done(tgt_fcpreq); @@ -504,6 +564,32 @@ fcloop_fcp_op(struct nvmet_fc_target_port *tgtport, return 0; } +static void +fcloop_tgt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *tgt_fcpreq) +{ + struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); + int active; + + /* + * mark aborted only in case there were 2 threads in transport + * (one doing io, other doing abort) and only kills ops posted + * after the abort request + */ + spin_lock(&tfcp_req->reqlock); + active = tfcp_req->active; + tfcp_req->aborted = true; + spin_unlock(&tfcp_req->reqlock); + + tfcp_req->status = NVME_SC_FC_TRANSPORT_ABORTED; + + /* + * nothing more to do. If io wasn't active, the transport should + * immediately call the req_release. If it was active, the op + * will complete, and the lldd should call req_release. + */ +} + static void fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *tgt_fcpreq) @@ -526,6 +612,27 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, void *hw_queue_handle, struct nvmefc_fcp_req *fcpreq) { + struct fcloop_rport *rport = remoteport->private; + struct fcloop_ini_fcpreq *inireq = fcpreq->private; + struct fcloop_fcpreq *tfcp_req = inireq->tfcp_req; + + if (!tfcp_req) + /* abort has already been called */ + return; + + if (rport->targetport) + nvmet_fc_rcv_fcp_abort(rport->targetport, + &tfcp_req->tgt_fcp_req); + + /* break initiator/target relationship for io */ + spin_lock(&tfcp_req->reqlock); + inireq->tfcp_req = NULL; + tfcp_req->fcpreq = NULL; + spin_unlock(&tfcp_req->reqlock); + + /* post the aborted io completion */ + fcpreq->status = -ECANCELED; + schedule_work(&inireq->iniwork); } static void @@ -583,6 +690,7 @@ struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, .xmt_ls_rsp = fcloop_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, + .fcp_abort = fcloop_tgt_fcp_abort, .fcp_req_release = fcloop_fcp_req_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 1846c7eb086a..d488c3318d4b 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -542,27 +542,6 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, } #endif - if (rsp->op == NVMET_FCOP_ABORT) { - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6103 Abort op: oxri x%x %d cnt %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - - lpfc_nvmeio_data(phba, "NVMET FCP ABRT: " - "xri x%x state x%x cnt x%x\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - - atomic_inc(&lpfc_nvmep->xmt_fcp_abort); - ctxp->entry_cnt++; - ctxp->flag |= LPFC_NVMET_ABORT_OP; - if (ctxp->flag & LPFC_NVMET_IO_INP) - lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid, - ctxp->oxid); - else - lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, - ctxp->oxid); - return 0; - } - /* Sanity check */ if (ctxp->state == LPFC_NVMET_STE_ABORT) { atomic_inc(&lpfc_nvmep->xmt_fcp_drop); @@ -632,6 +611,33 @@ lpfc_nvmet_targetport_delete(struct nvmet_fc_target_port *targetport) complete(&tport->tport_unreg_done); } +static void +lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *req) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; + struct lpfc_nvmet_rcv_ctx *ctxp = + container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_hba *phba = ctxp->phba; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6103 Abort op: oxri x%x %d cnt %d\n", + ctxp->oxid, ctxp->state, ctxp->entry_cnt); + + lpfc_nvmeio_data(phba, "NVMET FCP ABRT: xri x%x state x%x cnt x%x\n", + ctxp->oxid, ctxp->state, ctxp->entry_cnt); + + atomic_inc(&lpfc_nvmep->xmt_fcp_abort); + ctxp->entry_cnt++; + ctxp->flag |= LPFC_NVMET_ABORT_OP; + if (ctxp->flag & LPFC_NVMET_IO_INP) + lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid, + ctxp->oxid); + else + lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, + ctxp->oxid); +} + static void lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) @@ -672,6 +678,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .targetport_delete = lpfc_nvmet_targetport_delete, .xmt_ls_rsp = lpfc_nvmet_xmt_ls_rsp, .fcp_op = lpfc_nvmet_xmt_fcp_op, + .fcp_abort = lpfc_nvmet_xmt_fcp_abort, .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .max_hw_queues = 1, diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index d98ddb2feabc..0db37158a61d 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -533,9 +533,6 @@ enum { * rsp as well */ NVMET_FCOP_RSP = 4, /* send rsp frame */ - NVMET_FCOP_ABORT = 5, /* abort exchange via ABTS */ - NVMET_FCOP_BA_ACC = 6, /* send BA_ACC */ - NVMET_FCOP_BA_RJT = 7, /* send BA_RJT */ }; /** @@ -572,8 +569,6 @@ enum { * upon compeletion of the operation. The nvmet-fc layer will also set a * private pointer for its own use in the done routine. * - * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !! - * * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op * entrypoint. * @op: Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx) @@ -784,10 +779,6 @@ struct nvmet_fc_target_port { * or upon success/failure of FCP_CONF if it is supported, the * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and * consider the operation complete. - * NVMET_FCOP_ABORT: the LLDD is to terminate the exchange - * corresponding to the fcp operation. The LLDD shall send - * ABTS and follow FC exchange abort-multi rules, including - * ABTS retries and possible logout. * Upon completing the indicated operation, the LLDD is to set the * status fields for the operation (tranferred_length and fcp_error * status) in the request, then call the "done" routine @@ -808,6 +799,17 @@ struct nvmet_fc_target_port { * Returns 0 on success, - on failure (Ex: -EIO) * Entrypoint is Mandatory. * + * @fcp_abort: Called by the transport to abort an active command. + * The command may be in-between operations (nothing active in LLDD) + * or may have an active WRITEDATA operation pending. The LLDD is to + * initiate the ABTS process for the command and return from the + * callback. The ABTS does not need to be complete on the command. + * The fcp_abort callback inherently cannot fail. After the + * fcp_abort() callback completes, the transport will wait for any + * outstanding operation (if there was one) to complete, then will + * call the fcp_req_release() callback to return the command's + * exchange context back to the LLDD. + * * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req * to the LLDD after all operations on the fcp operation are complete. * This may be due to the command completing or upon completion of @@ -848,6 +850,8 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_ls_req *tls_req); int (*fcp_op)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*fcp_abort)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); @@ -877,4 +881,7 @@ int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq, void *cmdiubuf, u32 cmdiubuf_len); +void nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); + #endif /* _NVME_FC_DRIVER_H */ From 568ad51e5d32c7f0a8e3138bc9df2badfcce0304 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:32 -0700 Subject: [PATCH 08/13] nvmet_fc: add missing reference in add_port Add missing reference in add_port Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index e5d30bbb1b10..4a44fd348567 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2377,6 +2377,7 @@ nvmet_fc_add_port(struct nvmet_port *port) if (!tgtport->port) { tgtport->port = port; port->priv = tgtport; + nvmet_fc_tgtport_get(tgtport); ret = 0; } else ret = -EALREADY; From c913a8b0d4c564e62c2a767d089378a3d2ffa00c Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:35:08 -0700 Subject: [PATCH 09/13] nvme_fc: Move LS's to rport Link LS's on the remoteport rather than the controller. LS's are between nport's. Makes more sense, especially on async teardown where the controller is torn down regardless of the LS (LS is more of a notifier to the target of the teardown), to have them on the remoteport. While revising ls send/done routines, issues were seen relative to refcounting and cleanup, especially in async path. Reworked these code paths. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fc.c | 119 ++++++++++++++++++++++------------------- 1 file changed, 65 insertions(+), 54 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 450733c8cd24..a0c5e5420c41 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -64,13 +64,13 @@ struct nvme_fc_queue { struct nvmefc_ls_req_op { struct nvmefc_ls_req ls_req; - struct nvme_fc_ctrl *ctrl; + struct nvme_fc_rport *rport; struct nvme_fc_queue *queue; struct request *rq; int ls_error; struct completion ls_done; - struct list_head lsreq_list; /* ctrl->ls_req_list */ + struct list_head lsreq_list; /* rport->ls_req_list */ bool req_queued; }; @@ -120,6 +120,9 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; + struct list_head ls_req_list; + struct device *dev; /* physical device for dma */ + struct nvme_fc_lport *lport; spinlock_t lock; struct kref ref; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ @@ -144,7 +147,6 @@ struct nvme_fc_ctrl { u64 cap; struct list_head ctrl_list; /* rport->ctrl_list */ - struct list_head ls_req_list; struct blk_mq_tag_set admin_tag_set; struct blk_mq_tag_set tag_set; @@ -419,9 +421,12 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, INIT_LIST_HEAD(&newrec->endp_list); INIT_LIST_HEAD(&newrec->ctrl_list); + INIT_LIST_HEAD(&newrec->ls_req_list); kref_init(&newrec->ref); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + newrec->dev = lport->dev; + newrec->lport = lport; newrec->remoteport.private = &newrec[1]; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; @@ -444,7 +449,6 @@ out_kfree_rport: out_reghost_failed: *portptr = NULL; return ret; - } EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); @@ -624,16 +628,16 @@ static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); static void -__nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, - struct nvmefc_ls_req_op *lsop) +__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) { + struct nvme_fc_rport *rport = lsop->rport; struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); if (!lsop->req_queued) { - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); return; } @@ -641,56 +645,71 @@ __nvme_fc_finish_ls_req(struct nvme_fc_ctrl *ctrl, lsop->req_queued = false; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - fc_dma_unmap_single(ctrl->dev, lsreq->rqstdma, + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, (lsreq->rqstlen + lsreq->rsplen), DMA_BIDIRECTIONAL); - nvme_fc_ctrl_put(ctrl); + nvme_fc_rport_put(rport); } static int -__nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, +__nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; unsigned long flags; - int ret; + int ret = 0; - if (!nvme_fc_ctrl_get(ctrl)) + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return -ECONNREFUSED; + + if (!nvme_fc_rport_get(rport)) return -ESHUTDOWN; lsreq->done = done; - lsop->ctrl = ctrl; + lsop->rport = rport; lsop->req_queued = false; INIT_LIST_HEAD(&lsop->lsreq_list); init_completion(&lsop->ls_done); - lsreq->rqstdma = fc_dma_map_single(ctrl->dev, lsreq->rqstaddr, + lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, lsreq->rqstlen + lsreq->rsplen, DMA_BIDIRECTIONAL); - if (fc_dma_mapping_error(ctrl->dev, lsreq->rqstdma)) { - nvme_fc_ctrl_put(ctrl); - dev_err(ctrl->dev, - "els request command failed EFAULT.\n"); - return -EFAULT; + if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_putrport; } lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&rport->lock, flags); - list_add_tail(&lsop->lsreq_list, &ctrl->ls_req_list); + list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); lsop->req_queued = true; - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&rport->lock, flags); - ret = ctrl->lport->ops->ls_req(&ctrl->lport->localport, - &ctrl->rport->remoteport, lsreq); + ret = rport->lport->ops->ls_req(&rport->lport->localport, + &rport->remoteport, lsreq); if (ret) - lsop->ls_error = ret; + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&rport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&rport->lock, flags); + fc_dma_unmap_single(rport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_putrport: + nvme_fc_rport_put(rport); return ret; } @@ -705,15 +724,15 @@ nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) } static int -nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) +nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) { struct nvmefc_ls_req *lsreq = &lsop->ls_req; struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; int ret; - ret = __nvme_fc_send_ls_req(ctrl, lsop, nvme_fc_send_ls_req_done); + ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); - if (!ret) + if (!ret) { /* * No timeout/not interruptible as we need the struct * to exist until the lldd calls us back. Thus mandate @@ -722,14 +741,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) */ wait_for_completion(&lsop->ls_done); - __nvme_fc_finish_ls_req(ctrl, lsop); + __nvme_fc_finish_ls_req(lsop); - if (ret) { - dev_err(ctrl->dev, - "ls request command failed (%d).\n", ret); - return ret; + ret = lsop->ls_error; } + if (ret) + return ret; + /* ACC or RJT payload ? */ if (rjt->w0.ls_cmd == FCNVME_LS_RJT) return -ENXIO; @@ -737,19 +756,14 @@ nvme_fc_send_ls_req(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req_op *lsop) return 0; } -static void -nvme_fc_send_ls_req_async(struct nvme_fc_ctrl *ctrl, +static int +nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop, void (*done)(struct nvmefc_ls_req *req, int status)) { - int ret; - - ret = __nvme_fc_send_ls_req(ctrl, lsop, done); - /* don't wait for completion */ - if (ret) - done(&lsop->ls_req, ret); + return __nvme_fc_send_ls_req(rport, lsop, done); } /* Validation Error indexes into the string table below */ @@ -839,7 +853,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, lsreq->rsplen = sizeof(*assoc_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -947,7 +961,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, lsreq->rsplen = sizeof(*conn_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - ret = nvme_fc_send_ls_req(ctrl, lsop); + ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) goto out_free_buffer; @@ -998,14 +1012,8 @@ static void nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) { struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); - struct nvme_fc_ctrl *ctrl = lsop->ctrl; - __nvme_fc_finish_ls_req(ctrl, lsop); - - if (status) - dev_err(ctrl->dev, - "disconnect assoc ls request command failed (%d).\n", - status); + __nvme_fc_finish_ls_req(lsop); /* fc-nvme iniator doesn't care about success or failure of cmd */ @@ -1036,6 +1044,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) struct fcnvme_ls_disconnect_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; + int ret; lsop = kzalloc((sizeof(*lsop) + ctrl->lport->ops->lsrqst_priv_sz + @@ -1078,7 +1087,10 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) lsreq->rsplen = sizeof(*discon_acc); lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; - nvme_fc_send_ls_req_async(ctrl, lsop, nvme_fc_disconnect_assoc_done); + ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, + nvme_fc_disconnect_assoc_done); + if (ret) + kfree(lsop); /* only meaningful part to terminating the association */ ctrl->association_id = 0; @@ -2302,7 +2314,6 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->ctrl_list); - INIT_LIST_HEAD(&ctrl->ls_req_list); ctrl->lport = lport; ctrl->rport = rport; ctrl->dev = lport->dev; From 8d64daf7dcd851b249717ac88e2d4989f9ad2467 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:35:09 -0700 Subject: [PATCH 10/13] nvme_fc: Add ls aborts on remote port teardown remoteport teardown never aborted the LS opertions. Add support. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/host/fc.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index a0c5e5420c41..596b3a453b54 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -61,12 +61,19 @@ struct nvme_fc_queue { unsigned long flags; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ +enum nvme_fcop_flags { + FCOP_FLAGS_TERMIO = (1 << 0), + FCOP_FLAGS_RELEASED = (1 << 1), + FCOP_FLAGS_COMPLETE = (1 << 2), +}; + struct nvmefc_ls_req_op { struct nvmefc_ls_req ls_req; struct nvme_fc_rport *rport; struct nvme_fc_queue *queue; struct request *rq; + u32 flags; int ls_error; struct completion ls_done; @@ -491,6 +498,30 @@ nvme_fc_rport_get(struct nvme_fc_rport *rport) return kref_get_unless_zero(&rport->ref); } +static int +nvme_fc_abort_lsops(struct nvme_fc_rport *rport) +{ + struct nvmefc_ls_req_op *lsop; + unsigned long flags; + +restart: + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { + if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { + lsop->flags |= FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&rport->lock, flags); + rport->lport->ops->ls_abort(&rport->lport->localport, + &rport->remoteport, + &lsop->ls_req); + goto restart; + } + } + spin_unlock_irqrestore(&rport->lock, flags); + + return 0; +} + /** * nvme_fc_unregister_remoteport - transport entry point called by an * LLDD to deregister/remove a previously @@ -526,6 +557,8 @@ nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) spin_unlock_irqrestore(&rport->lock, flags); + nvme_fc_abort_lsops(rport); + nvme_fc_rport_put(rport); return 0; } From 4363135761e085fbb5ddb16dfbd4322a4d2e5037 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 12 Apr 2017 15:15:18 -0700 Subject: [PATCH 11/13] nvmet_fc: Change traddr field separator to a colon The FC-NVME spec revised syntax to avoid comma separators. Sync with the change in the parser for traddr on port attachments. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 4a44fd348567..074bd3743b5f 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2316,7 +2316,7 @@ nvmet_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf) if (!options) return -ENOMEM; - while ((p = strsep(&o, ",\n")) != NULL) { + while ((p = strsep(&o, ":\n")) != NULL) { if (!*p) continue; From 7776db1ccc123d5944a8c170c9c45f7e91d49643 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 24 Feb 2017 17:59:28 -0500 Subject: [PATCH 12/13] nvme/pci: Poll CQ on timeout If an IO timeout occurs, it's helpful to know if the controller did not post a completion or the driver missed an interrupt. While we never expect the latter, this patch will make it possible to tell the difference so we don't have to guess. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Tested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/pci.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a363fecb8d82..212f7e0db84f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -846,10 +846,8 @@ static irqreturn_t nvme_irq_check(int irq, void *data) return IRQ_NONE; } -static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +static int __nvme_poll(struct nvme_queue *nvmeq, unsigned int tag) { - struct nvme_queue *nvmeq = hctx->driver_data; - if (nvme_cqe_valid(nvmeq, nvmeq->cq_head, nvmeq->cq_phase)) { spin_lock_irq(&nvmeq->q_lock); __nvme_process_cq(nvmeq, &tag); @@ -862,6 +860,13 @@ static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) return 0; } +static int nvme_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) +{ + struct nvme_queue *nvmeq = hctx->driver_data; + + return __nvme_poll(nvmeq, tag); +} + static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl, int aer_idx) { struct nvme_dev *dev = to_nvme_dev(ctrl); @@ -959,6 +964,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct request *abort_req; struct nvme_command cmd; + /* + * Did we miss an interrupt? + */ + if (__nvme_poll(nvmeq, req->tag)) { + dev_warn(dev->ctrl.device, + "I/O %d QID %d timeout, completion polled\n", + req->tag, nvmeq->qid); + return BLK_EH_HANDLED; + } + /* * Shutdown immediately if controller times out while starting. The * reset work will see the pci device disabled when it gets the forced From e02ab0230489461fd9fdd8028841dae6f03a4874 Mon Sep 17 00:00:00 2001 From: Junxiong Guan Date: Fri, 21 Apr 2017 12:59:07 +0200 Subject: [PATCH 13/13] nvme: let dm-mpath distinguish nvme error codes Currently most IOs which return the nvme error codes are retried on the other path if those IOs returns EIO from NVMe driver. This patch let Multipath distinguish nvme media error codes and some generic or cmd-specific nvme error codes so that multipath will not retry those kinds of IO, to save bandwidth. Signed-off-by: Junxiong Guan Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8dc664798293..263946b23628 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -75,6 +75,20 @@ static int nvme_error_status(struct request *req) return -ENOSPC; default: return -EIO; + + /* + * XXX: these errors are a nasty side-band protocol to + * drivers/md/dm-mpath.c:noretry_error() that aren't documented + * anywhere.. + */ + case NVME_SC_CMD_SEQ_ERROR: + return -EILSEQ; + case NVME_SC_ONCS_NOT_SUPPORTED: + return -EOPNOTSUPP; + case NVME_SC_WRITE_FAULT: + case NVME_SC_READ_ERROR: + case NVME_SC_UNWRITTEN_BLOCK: + return -ENODATA; } }