From b0907cadabcae6f1248f37a32a6e777f9ff6d4aa Mon Sep 17 00:00:00 2001 From: Adrian Huang Date: Tue, 10 Jan 2023 09:45:12 +0800 Subject: [PATCH 1/9] md: fix incorrect declaration about claim_rdev in md_import_device Commit fb541ca4c365 ("md: remove lock_bdev / unlock_bdev") removes wrappers for blkdev_get/blkdev_put. However, the uninitialized local static variable of pointer type 'claim_rdev' in md_import_device() is NULL, which leads to the following warning call trace: WARNING: CPU: 22 PID: 1037 at block/bdev.c:577 bd_prepare_to_claim+0x131/0x150 CPU: 22 PID: 1037 Comm: mdadm Not tainted 6.2.0-rc3+ #69 .. RIP: 0010:bd_prepare_to_claim+0x131/0x150 .. Call Trace: ? _raw_spin_unlock+0x15/0x30 ? iput+0x6a/0x220 blkdev_get_by_dev.part.0+0x4b/0x300 md_import_device+0x126/0x1d0 new_dev_store+0x184/0x240 md_attr_store+0x80/0xf0 kernfs_fop_write_iter+0x128/0x1c0 vfs_write+0x2be/0x3c0 ksys_write+0x5f/0xe0 do_syscall_64+0x38/0x90 entry_SYSCALL_64_after_hwframe+0x72/0xdc It turns out the md device cannot be used: md: could not open device unknown-block(259,0). md: md127 stopped. Fix the issue by declaring the local static variable of struct type and passing the pointer of the variable to blkdev_get_by_dev(). Fixes: fb541ca4c365 ("md: remove lock_bdev / unlock_bdev") Cc: Christoph Hellwig Signed-off-by: Adrian Huang Reviewed-by: Christoph Hellwig Signed-off-by: Song Liu --- drivers/md/md.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 8af639296b3c..02b0240e7c71 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -3644,7 +3644,7 @@ EXPORT_SYMBOL_GPL(md_rdev_init); */ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor) { - static struct md_rdev *claim_rdev; /* just for claiming the bdev */ + static struct md_rdev claim_rdev; /* just for claiming the bdev */ struct md_rdev *rdev; sector_t size; int err; @@ -3662,7 +3662,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe rdev->bdev = blkdev_get_by_dev(newdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, - super_format == -2 ? claim_rdev : rdev); + super_format == -2 ? &claim_rdev : rdev); if (IS_ERR(rdev->bdev)) { pr_warn("md: could not open device unknown-block(%u,%u).\n", MAJOR(newdev), MINOR(newdev)); From 216f764716f34fe68cedc7296ae2043a7727e640 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 3 Jan 2023 16:47:55 +0800 Subject: [PATCH 2/9] block, bfq: switch 'bfqg->ref' to use atomic refcount apis The updating of 'bfqg->ref' should be protected by 'bfqd->lock', however, during code review, we found that bfq_pd_free() update 'bfqg->ref' without holding the lock, which is problematic: 1) bfq_pd_free() triggered by removing cgroup is called asynchronously; 2) bfqq will grab bfqg reference, and exit bfqq will drop the reference, which can concurrent with 1). Unfortunately, 'bfqd->lock' can't be held here because 'bfqd' might already be freed in bfq_pd_free(). Fix the problem by using atomic refcount apis. Signed-off-by: Yu Kuai Reviewed-by: Jan Kara Link: https://lore.kernel.org/r/20230103084755.1256479-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/bfq-cgroup.c | 8 +++----- block/bfq-iosched.h | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 1b2829e99dad..7d9b15f0dbd5 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -316,14 +316,12 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq) static void bfqg_get(struct bfq_group *bfqg) { - bfqg->ref++; + refcount_inc(&bfqg->ref); } static void bfqg_put(struct bfq_group *bfqg) { - bfqg->ref--; - - if (bfqg->ref == 0) + if (refcount_dec_and_test(&bfqg->ref)) kfree(bfqg); } @@ -530,7 +528,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q, } /* see comments in bfq_bic_update_cgroup for why refcounting */ - bfqg_get(bfqg); + refcount_set(&bfqg->ref, 1); return &bfqg->pd; } diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 41aa151ccc22..466e4865ace6 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -928,7 +928,7 @@ struct bfq_group { char blkg_path[128]; /* reference counter (see comments in bfq_bic_update_cgroup) */ - int ref; + refcount_t ref; /* Is bfq_group still online? */ bool online; From 3e9900f3bd7ba30d60f82b162b70a1dffe4e8e24 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 16 Jan 2023 08:51:05 -0700 Subject: [PATCH 3/9] pktcdvd: check for NULL returna fter calling bio_split_to_limits() The revert of the removal of this driver happened after we fixed up the split limits for NOWAIT issue, hence it got missed. Ensure that we check for a NULL bio after splitting, in case it should be retried. Marking this as fixing both commits, so that stable backport will do this correctly. Cc: stable@vger.kernel.org Fixes: 9cea62b2cbab ("block: don't allow splitting of a REQ_NOWAIT bio") Fixes: 4b83e99ee709 ("Revert "pktcdvd: remove driver."") Signed-off-by: Jens Axboe --- drivers/block/pktcdvd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index 4cea3b08087e..2f1a92509271 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c @@ -2400,6 +2400,8 @@ static void pkt_submit_bio(struct bio *bio) struct bio *split; bio = bio_split_to_limits(bio); + if (!bio) + return; pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", (unsigned long long)bio->bi_iter.bi_sector, From e3ff8887e7db757360f97634e0d6f4b8e27a8c46 Mon Sep 17 00:00:00 2001 From: Yu Kuai Date: Tue, 3 Jan 2023 19:28:33 +0800 Subject: [PATCH 4/9] blk-cgroup: fix missing pd_online_fn() while activating policy If the policy defines pd_online_fn(), it should be called after pd_init_fn(), like blkg_create(). Signed-off-by: Yu Kuai Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20230103112833.2013432-1-yukuai1@huaweicloud.com Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ce6a2b7d3dfb..4c94a6560f62 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1455,6 +1455,10 @@ retry: list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) pol->pd_init_fn(blkg->pd[pol->plid]); + if (pol->pd_online_fn) + list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) + pol->pd_online_fn(blkg->pd[pol->plid]); + __set_bit(pol->plid, q->blkcg_pols); ret = 0; From 9d6033e350694a67885605674244d43c9559dc36 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Fri, 30 Dec 2022 09:09:26 +0800 Subject: [PATCH 5/9] block/rnbd-clt: fix wrong max ID in ida_alloc_max We need to pass 'end - 1' to ida_alloc_max after switch from ida_simple_get to ida_alloc_max. Otherwise smatch warns. drivers/block/rnbd/rnbd-clt.c:1460 init_dev() error: Calling ida_alloc_max() with a 'max' argument which is a power of 2. -1 missing? Fixes: 24afc15dbe21 ("block/rnbd: Remove a useless mutex") Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Guoqing Jiang Acked-by: Jack Wang Link: https://lore.kernel.org/r/20221230010926.32243-1-guoqing.jiang@linux.dev Signed-off-by: Jens Axboe --- drivers/block/rnbd/rnbd-clt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c index 78334da74d8b..5eb8c7855970 100644 --- a/drivers/block/rnbd/rnbd-clt.c +++ b/drivers/block/rnbd/rnbd-clt.c @@ -1440,7 +1440,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess, goto out_alloc; } - ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS), + ret = ida_alloc_max(&index_ida, (1 << (MINORBITS - RNBD_PART_BITS)) - 1, GFP_KERNEL); if (ret < 0) { pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n", From 7746564793978fe2f43b18a302b22dca0ad3a0e8 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 17 Jan 2023 11:42:15 +0000 Subject: [PATCH 6/9] block: fix hctx checks for batch allocation When there are no read queues read requests will be assigned a default queue on allocation. However, blk_mq_get_cached_request() is not prepared for that and will fail all attempts to grab read requests from the cache. Worst case it doubles the number of requests allocated, roughly half of which will be returned by blk_mq_free_plug_rqs(). It only affects batched allocations and so is io_uring specific. For reference, QD8 t/io_uring benchmark improves by 20-35%. Signed-off-by: Pavel Begunkov Link: https://lore.kernel.org/r/80d4511011d7d4751b4cf6375c4e38f237d935e3.1673955390.git.asml.silence@gmail.com Signed-off-by: Jens Axboe --- block/blk-mq.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 2c49b4151da1..9d463f7563bc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2890,6 +2890,7 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, struct blk_plug *plug, struct bio **bio, unsigned int nsegs) { struct request *rq; + enum hctx_type type, hctx_type; if (!plug) return NULL; @@ -2902,7 +2903,10 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q, return NULL; } - if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type) + type = blk_mq_get_hctx_type((*bio)->bi_opf); + hctx_type = rq->mq_hctx->type; + if (type != hctx_type && + !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT)) return NULL; if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf)) return NULL; From c06ba7b892a50b48522ad441a40053f483dfee9e Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 17 Jan 2023 19:25:00 +0100 Subject: [PATCH 7/9] nvme-apple: reset controller during shutdown This is a functional revert of c76b8308e4c9 ("nvme-apple: fix controller shutdown in apple_nvme_disable"). The commit broke suspend/resume since apple_nvme_reset_work() tries to disable the controller on resume. This does not work for the apple NVMe controller since register access only works while the co-processor firmware is running. Disabling the NVMe controller in the shutdown path is also required for shutting the co-processor down. The original code was appropriate for this hardware. Add a comment to prevent a similar breaking changes in the future. Fixes: c76b8308e4c9 ("nvme-apple: fix controller shutdown in apple_nvme_disable") Reported-by: Janne Grunau Link: https://lore.kernel.org/all/20230110174745.GA3576@jannau.net/ Signed-off-by: Janne Grunau [hch: updated with a more descriptive comment from Hector Martin] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/apple.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index bf1c60edb7f9..146c9e63ce77 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -829,7 +829,23 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown) apple_nvme_remove_cq(anv); } - nvme_disable_ctrl(&anv->ctrl, shutdown); + /* + * Always disable the NVMe controller after shutdown. + * We need to do this to bring it back up later anyway, and we + * can't do it while the firmware is not running (e.g. in the + * resume reset path before RTKit is initialized), so for Apple + * controllers it makes sense to unconditionally do it here. + * Additionally, this sequence of events is reliable, while + * others (like disabling after bringing back the firmware on + * resume) seem to run into trouble under some circumstances. + * + * Both U-Boot and m1n1 also use this convention (i.e. an ANS + * NVMe controller is handed off with firmware shut down, in an + * NVMe disabled state, after a clean shutdown). + */ + if (shutdown) + nvme_disable_ctrl(&anv->ctrl, shutdown); + nvme_disable_ctrl(&anv->ctrl, false); } WRITE_ONCE(anv->ioq.enabled, false); From c0a4a1eafbd48e02829045bba3e6163c03037276 Mon Sep 17 00:00:00 2001 From: Janne Grunau Date: Tue, 17 Jan 2023 19:25:01 +0100 Subject: [PATCH 8/9] nvme-apple: only reset the controller when RTKit is running NVMe controller register access hangs indefinitely when the co-processor is not running. A missed reset is preferable over a hanging thread since it could be recoverable. Signed-off-by: Janne Grunau Signed-off-by: Christoph Hellwig --- drivers/nvme/host/apple.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c index 146c9e63ce77..b317ce6c4ec3 100644 --- a/drivers/nvme/host/apple.c +++ b/drivers/nvme/host/apple.c @@ -1001,11 +1001,11 @@ static void apple_nvme_reset_work(struct work_struct *work) goto out; } - if (anv->ctrl.ctrl_config & NVME_CC_ENABLE) - apple_nvme_disable(anv, false); - /* RTKit must be shut down cleanly for the (soft)-reset to work */ if (apple_rtkit_is_running(anv->rtk)) { + /* reset the controller if it is enabled */ + if (anv->ctrl.ctrl_config & NVME_CC_ENABLE) + apple_nvme_disable(anv, false); dev_dbg(anv->dev, "Trying to shut down RTKit before reset."); ret = apple_rtkit_shutdown(anv->rtk); if (ret) From 1c5842085851f786eba24a39ecd02650ad892064 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 18 Jan 2023 08:44:16 -0800 Subject: [PATCH 9/9] nvme-pci: fix timeout request state check Polling the completion can progress the request state to IDLE, either inline with the completion, or through softirq. Either way, the state may not be COMPLETED, so don't check for that. We only care if the state isn't IN_FLIGHT. This is fixing an issue where the driver aborts an IO that we just completed. Seeing the "aborting" message instead of "polled" is very misleading as to where the timeout problem resides. Fixes: bf392a5dc02a9b ("nvme-pci: Remove tag from process cq") Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a2553b7d9bb8..1ff8843bc4b3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1362,7 +1362,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req) else nvme_poll_irqdisable(nvmeq); - if (blk_mq_request_completed(req)) { + if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { dev_warn(dev->ctrl.device, "I/O %d QID %d timeout, completion polled\n", req->tag, nvmeq->qid);