From 12b2aaadb6d5ef77434e8db21f469f46fe2d392e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 27 May 2021 18:16:38 -0700 Subject: [PATCH 1/6] nvme-rdma: fix in-casule data send for chained sgls We have only 2 inline sg entries and we allow 4 sg entries for the send wr sge. Larger sgls entries will be chained. However when we build in-capsule send wr sge, we iterate without taking into account that the sgl may be chained and still fit in-capsule (which can happen if the sgl is bigger than 2, but lower-equal to 4). Fix in-capsule data mapping to correctly iterate chained sgls. Fixes: 38e1800275d3 ("nvme-rdma: Avoid preallocating big SGL for data") Reported-by: Walker, Benjamin Signed-off-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 37943dc4c2c1..4697a94c0945 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1320,16 +1320,17 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, int count) { struct nvme_sgl_desc *sg = &c->common.dptr.sgl; - struct scatterlist *sgl = req->data_sgl.sg_table.sgl; struct ib_sge *sge = &req->sge[1]; + struct scatterlist *sgl; u32 len = 0; int i; - for (i = 0; i < count; i++, sgl++, sge++) { + for_each_sg(req->data_sgl.sg_table.sgl, sgl, count, i) { sge->addr = sg_dma_address(sgl); sge->length = sg_dma_len(sgl); sge->lkey = queue->device->pd->local_dma_lkey; len += sge->length; + sge++; } sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); From a6c144f3d2e230f2b3ac5ed8c51e0f0391556197 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 26 May 2021 17:23:15 +0200 Subject: [PATCH 2/6] nvme-loop: reset queue count to 1 in nvme_loop_destroy_io_queues() The queue count is increased in nvme_loop_init_io_queues(), so we need to reset it to 1 at the end of nvme_loop_destroy_io_queues(). Otherwise the function is not re-entrant safe, and crash will happen during concurrent reset and remove calls. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index cb30cb942e1d..93fca31e5043 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -299,6 +299,7 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); } + ctrl->ctrl.queue_count = 1; } static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) From 1c5f8e882a05de5c011e8c3fbeceb0d1c590eb53 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 26 May 2021 17:23:16 +0200 Subject: [PATCH 3/6] nvme-loop: clear NVME_LOOP_Q_LIVE when nvme_loop_configure_admin_queue() fails When the call to nvme_enable_ctrl() in nvme_loop_configure_admin_queue() fails the NVME_LOOP_Q_LIVE flag is not cleared. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 93fca31e5043..8643c71953ad 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -406,6 +406,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) return 0; out_cleanup_queue: + clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); blk_cleanup_queue(ctrl->ctrl.admin_q); out_cleanup_fabrics_q: blk_cleanup_queue(ctrl->ctrl.fabrics_q); From 4237de2f73a669e4f89ac0aa2b44fb1a1d9ec583 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 26 May 2021 17:23:17 +0200 Subject: [PATCH 4/6] nvme-loop: check for NVME_LOOP_Q_LIVE in nvme_loop_destroy_admin_queue() We need to check the NVME_LOOP_Q_LIVE flag in nvme_loop_destroy_admin_queue() to protect against duplicate invocations eg during concurrent reset and remove calls. Signed-off-by: Hannes Reinecke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 8643c71953ad..209ad4bc2695 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -263,7 +263,8 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) { - clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); + if (!test_and_clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags)) + return; nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); blk_cleanup_queue(ctrl->ctrl.admin_q); blk_cleanup_queue(ctrl->ctrl.fabrics_q); From 6622f9acd29cd4f6272720e827e6406f5a970cb0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Wed, 26 May 2021 17:23:18 +0200 Subject: [PATCH 5/6] nvme-loop: do not warn for deleted controllers during reset During concurrent reset and delete calls the reset workqueue is flushed, causing nvme_loop_reset_ctrl_work() to be executed when the controller is in state DELETING or DELETING_NOIO. But this is expected, so we shouldn't issue a WARN_ON here. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 209ad4bc2695..a5c4a1865026 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -465,8 +465,10 @@ static void nvme_loop_reset_ctrl_work(struct work_struct *work) nvme_loop_shutdown_ctrl(ctrl); if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { - /* state change failure should never happen */ - WARN_ON_ONCE(1); + if (ctrl->ctrl.state != NVME_CTRL_DELETING && + ctrl->ctrl.state != NVME_CTRL_DELETING_NOIO) + /* state change failure for non-deleted ctrl? */ + WARN_ON_ONCE(1); return; } From bcd9a0797d73eeff659582f23277e7ab6e5f18f3 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 1 Jun 2021 19:22:05 +0300 Subject: [PATCH 6/6] nvmet: fix freeing unallocated p2pmem In case p2p device was found but the p2p pool is empty, the nvme target is still trying to free the sgl from the p2p pool instead of the regular sgl pool and causing a crash (BUG() is called). Instead, assign the p2p_dev for the request only if it was allocated from p2p pool. This is the crash that was caused: [Sun May 30 19:13:53 2021] ------------[ cut here ]------------ [Sun May 30 19:13:53 2021] kernel BUG at lib/genalloc.c:518! [Sun May 30 19:13:53 2021] invalid opcode: 0000 [#1] SMP PTI ... [Sun May 30 19:13:53 2021] kernel BUG at lib/genalloc.c:518! ... [Sun May 30 19:13:53 2021] RIP: 0010:gen_pool_free_owner+0xa8/0xb0 ... [Sun May 30 19:13:53 2021] Call Trace: [Sun May 30 19:13:53 2021] ------------[ cut here ]------------ [Sun May 30 19:13:53 2021] pci_free_p2pmem+0x2b/0x70 [Sun May 30 19:13:53 2021] pci_p2pmem_free_sgl+0x4f/0x80 [Sun May 30 19:13:53 2021] nvmet_req_free_sgls+0x1e/0x80 [nvmet] [Sun May 30 19:13:53 2021] kernel BUG at lib/genalloc.c:518! [Sun May 30 19:13:53 2021] nvmet_rdma_release_rsp+0x4e/0x1f0 [nvmet_rdma] [Sun May 30 19:13:53 2021] nvmet_rdma_send_done+0x1c/0x60 [nvmet_rdma] Fixes: c6e3f1339812 ("nvmet: add metadata support for block devices") Reviewed-by: Israel Rukshin Signed-off-by: Max Gurtovoy Reviewed-by: Logan Gunthorpe Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4b29a5bac896..b20b8d0a1144 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1005,19 +1005,23 @@ static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) return req->transfer_len - req->metadata_len; } -static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req) +static int nvmet_req_alloc_p2pmem_sgls(struct pci_dev *p2p_dev, + struct nvmet_req *req) { - req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt, + req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, nvmet_data_transfer_len(req)); if (!req->sg) goto out_err; if (req->metadata_len) { - req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev, + req->metadata_sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->metadata_sg_cnt, req->metadata_len); if (!req->metadata_sg) goto out_free_sg; } + + req->p2p_dev = p2p_dev; + return 0; out_free_sg: pci_p2pmem_free_sgl(req->p2p_dev, req->sg); @@ -1025,25 +1029,19 @@ out_err: return -ENOMEM; } -static bool nvmet_req_find_p2p_dev(struct nvmet_req *req) +static struct pci_dev *nvmet_req_find_p2p_dev(struct nvmet_req *req) { - if (!IS_ENABLED(CONFIG_PCI_P2PDMA)) - return false; - - if (req->sq->ctrl && req->sq->qid && req->ns) { - req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, - req->ns->nsid); - if (req->p2p_dev) - return true; - } - - req->p2p_dev = NULL; - return false; + if (!IS_ENABLED(CONFIG_PCI_P2PDMA) || + !req->sq->ctrl || !req->sq->qid || !req->ns) + return NULL; + return radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, req->ns->nsid); } int nvmet_req_alloc_sgls(struct nvmet_req *req) { - if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req)) + struct pci_dev *p2p_dev = nvmet_req_find_p2p_dev(req); + + if (p2p_dev && !nvmet_req_alloc_p2pmem_sgls(p2p_dev, req)) return 0; req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, @@ -1072,6 +1070,7 @@ void nvmet_req_free_sgls(struct nvmet_req *req) pci_p2pmem_free_sgl(req->p2p_dev, req->sg); if (req->metadata_sg) pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); + req->p2p_dev = NULL; } else { sgl_free(req->sg); if (req->metadata_sg)