From b0907cadabcae6f1248f37a32a6e777f9ff6d4aa Mon Sep 17 00:00:00 2001
From: Adrian Huang <ahuang12@lenovo.com>
Date: Tue, 10 Jan 2023 09:45:12 +0800
Subject: [PATCH 1/9] md: fix incorrect declaration about claim_rdev in
 md_import_device

Commit fb541ca4c365 ("md: remove lock_bdev / unlock_bdev") removes
wrappers for blkdev_get/blkdev_put. However, the uninitialized local
static variable of pointer type 'claim_rdev' in md_import_device()
is NULL, which leads to the following warning call trace:

  WARNING: CPU: 22 PID: 1037 at block/bdev.c:577 bd_prepare_to_claim+0x131/0x150
  CPU: 22 PID: 1037 Comm: mdadm Not tainted 6.2.0-rc3+ #69
  ..
  RIP: 0010:bd_prepare_to_claim+0x131/0x150
  ..
  Call Trace:
   <TASK>
   ? _raw_spin_unlock+0x15/0x30
   ? iput+0x6a/0x220
   blkdev_get_by_dev.part.0+0x4b/0x300
   md_import_device+0x126/0x1d0
   new_dev_store+0x184/0x240
   md_attr_store+0x80/0xf0
   kernfs_fop_write_iter+0x128/0x1c0
   vfs_write+0x2be/0x3c0
   ksys_write+0x5f/0xe0
   do_syscall_64+0x38/0x90
   entry_SYSCALL_64_after_hwframe+0x72/0xdc

It turns out the md device cannot be used:

  md: could not open device unknown-block(259,0).
  md: md127 stopped.

Fix the issue by declaring the local static variable of struct type
and passing the pointer of the variable to blkdev_get_by_dev().

Fixes: fb541ca4c365 ("md: remove lock_bdev / unlock_bdev")
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Adrian Huang <ahuang12@lenovo.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 drivers/md/md.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8af639296b3c..02b0240e7c71 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -3644,7 +3644,7 @@ EXPORT_SYMBOL_GPL(md_rdev_init);
  */
 static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
 {
-	static struct md_rdev *claim_rdev; /* just for claiming the bdev */
+	static struct md_rdev claim_rdev; /* just for claiming the bdev */
 	struct md_rdev *rdev;
 	sector_t size;
 	int err;
@@ -3662,7 +3662,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
 
 	rdev->bdev = blkdev_get_by_dev(newdev,
 			FMODE_READ | FMODE_WRITE | FMODE_EXCL,
-			super_format == -2 ? claim_rdev : rdev);
+			super_format == -2 ? &claim_rdev : rdev);
 	if (IS_ERR(rdev->bdev)) {
 		pr_warn("md: could not open device unknown-block(%u,%u).\n",
 			MAJOR(newdev), MINOR(newdev));

From 216f764716f34fe68cedc7296ae2043a7727e640 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 3 Jan 2023 16:47:55 +0800
Subject: [PATCH 2/9] block, bfq: switch 'bfqg->ref' to use atomic refcount
 apis

The updating of 'bfqg->ref' should be protected by 'bfqd->lock', however,
during code review, we found that bfq_pd_free() update 'bfqg->ref'
without holding the lock, which is problematic:

1) bfq_pd_free() triggered by removing cgroup is called asynchronously;
2) bfqq will grab bfqg reference, and exit bfqq will drop the reference,
which can concurrent with 1).

Unfortunately, 'bfqd->lock' can't be held here because 'bfqd' might already
be freed in bfq_pd_free(). Fix the problem by using atomic refcount apis.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230103084755.1256479-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bfq-cgroup.c  | 8 +++-----
 block/bfq-iosched.h | 2 +-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 1b2829e99dad..7d9b15f0dbd5 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -316,14 +316,12 @@ struct bfq_group *bfqq_group(struct bfq_queue *bfqq)
 
 static void bfqg_get(struct bfq_group *bfqg)
 {
-	bfqg->ref++;
+	refcount_inc(&bfqg->ref);
 }
 
 static void bfqg_put(struct bfq_group *bfqg)
 {
-	bfqg->ref--;
-
-	if (bfqg->ref == 0)
+	if (refcount_dec_and_test(&bfqg->ref))
 		kfree(bfqg);
 }
 
@@ -530,7 +528,7 @@ static struct blkg_policy_data *bfq_pd_alloc(gfp_t gfp, struct request_queue *q,
 	}
 
 	/* see comments in bfq_bic_update_cgroup for why refcounting */
-	bfqg_get(bfqg);
+	refcount_set(&bfqg->ref, 1);
 	return &bfqg->pd;
 }
 
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index 41aa151ccc22..466e4865ace6 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -928,7 +928,7 @@ struct bfq_group {
 	char blkg_path[128];
 
 	/* reference counter (see comments in bfq_bic_update_cgroup) */
-	int ref;
+	refcount_t ref;
 	/* Is bfq_group still online? */
 	bool online;
 

From 3e9900f3bd7ba30d60f82b162b70a1dffe4e8e24 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Mon, 16 Jan 2023 08:51:05 -0700
Subject: [PATCH 3/9] pktcdvd: check for NULL returna fter calling
 bio_split_to_limits()

The revert of the removal of this driver happened after we fixed up
the split limits for NOWAIT issue, hence it got missed. Ensure that
we check for a NULL bio after splitting, in case it should be retried.

Marking this as fixing both commits, so that stable backport will do
this correctly.

Cc: stable@vger.kernel.org
Fixes: 9cea62b2cbab ("block: don't allow splitting of a REQ_NOWAIT bio")
Fixes: 4b83e99ee709 ("Revert "pktcdvd: remove driver."")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/pktcdvd.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 4cea3b08087e..2f1a92509271 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2400,6 +2400,8 @@ static void pkt_submit_bio(struct bio *bio)
 	struct bio *split;
 
 	bio = bio_split_to_limits(bio);
+	if (!bio)
+		return;
 
 	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
 		(unsigned long long)bio->bi_iter.bi_sector,

From e3ff8887e7db757360f97634e0d6f4b8e27a8c46 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Tue, 3 Jan 2023 19:28:33 +0800
Subject: [PATCH 4/9] blk-cgroup: fix missing pd_online_fn() while activating
 policy

If the policy defines pd_online_fn(), it should be called after
pd_init_fn(), like blkg_create().

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20230103112833.2013432-1-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-cgroup.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index ce6a2b7d3dfb..4c94a6560f62 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -1455,6 +1455,10 @@ retry:
 		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
 			pol->pd_init_fn(blkg->pd[pol->plid]);
 
+	if (pol->pd_online_fn)
+		list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
+			pol->pd_online_fn(blkg->pd[pol->plid]);
+
 	__set_bit(pol->plid, q->blkcg_pols);
 	ret = 0;
 

From 9d6033e350694a67885605674244d43c9559dc36 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Fri, 30 Dec 2022 09:09:26 +0800
Subject: [PATCH 5/9] block/rnbd-clt: fix wrong max ID in ida_alloc_max

We need to pass 'end - 1' to ida_alloc_max after switch from
ida_simple_get to ida_alloc_max.

Otherwise smatch warns.

drivers/block/rnbd/rnbd-clt.c:1460 init_dev() error: Calling ida_alloc_max() with a 'max' argument which is a power of 2. -1 missing?

Fixes: 24afc15dbe21 ("block/rnbd: Remove a useless mutex")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Link: https://lore.kernel.org/r/20221230010926.32243-1-guoqing.jiang@linux.dev
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/rnbd/rnbd-clt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 78334da74d8b..5eb8c7855970 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1440,7 +1440,7 @@ static struct rnbd_clt_dev *init_dev(struct rnbd_clt_session *sess,
 		goto out_alloc;
 	}
 
-	ret = ida_alloc_max(&index_ida, 1 << (MINORBITS - RNBD_PART_BITS),
+	ret = ida_alloc_max(&index_ida, (1 << (MINORBITS - RNBD_PART_BITS)) - 1,
 			    GFP_KERNEL);
 	if (ret < 0) {
 		pr_err("Failed to initialize device '%s' from session %s, allocating idr failed, err: %d\n",

From 7746564793978fe2f43b18a302b22dca0ad3a0e8 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Tue, 17 Jan 2023 11:42:15 +0000
Subject: [PATCH 6/9] block: fix hctx checks for batch allocation

When there are no read queues read requests will be assigned a
default queue on allocation. However, blk_mq_get_cached_request() is not
prepared for that and will fail all attempts to grab read requests from
the cache. Worst case it doubles the number of requests allocated,
roughly half of which will be returned by blk_mq_free_plug_rqs().

It only affects batched allocations and so is io_uring specific.
For reference, QD8 t/io_uring benchmark improves by 20-35%.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/80d4511011d7d4751b4cf6375c4e38f237d935e3.1673955390.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2c49b4151da1..9d463f7563bc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2890,6 +2890,7 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 		struct blk_plug *plug, struct bio **bio, unsigned int nsegs)
 {
 	struct request *rq;
+	enum hctx_type type, hctx_type;
 
 	if (!plug)
 		return NULL;
@@ -2902,7 +2903,10 @@ static inline struct request *blk_mq_get_cached_request(struct request_queue *q,
 		return NULL;
 	}
 
-	if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type)
+	type = blk_mq_get_hctx_type((*bio)->bi_opf);
+	hctx_type = rq->mq_hctx->type;
+	if (type != hctx_type &&
+	    !(type == HCTX_TYPE_READ && hctx_type == HCTX_TYPE_DEFAULT))
 		return NULL;
 	if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
 		return NULL;

From c06ba7b892a50b48522ad441a40053f483dfee9e Mon Sep 17 00:00:00 2001
From: Janne Grunau <j@jannau.net>
Date: Tue, 17 Jan 2023 19:25:00 +0100
Subject: [PATCH 7/9] nvme-apple: reset controller during shutdown

This is a functional revert of c76b8308e4c9 ("nvme-apple: fix controller
shutdown in apple_nvme_disable").

The commit broke suspend/resume since apple_nvme_reset_work() tries to
disable the controller on resume. This does not work for the apple NVMe
controller since register access only works while the co-processor
firmware is running.

Disabling the NVMe controller in the shutdown path is also required
for shutting the co-processor down. The original code was appropriate
for this hardware. Add a comment to prevent a similar breaking changes
in the future.

Fixes: c76b8308e4c9 ("nvme-apple: fix controller shutdown in apple_nvme_disable")
Reported-by: Janne Grunau <j@jannau.net>
Link: https://lore.kernel.org/all/20230110174745.GA3576@jannau.net/
Signed-off-by: Janne Grunau <j@jannau.net>
[hch: updated with a more descriptive comment from Hector Martin]
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/apple.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index bf1c60edb7f9..146c9e63ce77 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -829,7 +829,23 @@ static void apple_nvme_disable(struct apple_nvme *anv, bool shutdown)
 			apple_nvme_remove_cq(anv);
 		}
 
-		nvme_disable_ctrl(&anv->ctrl, shutdown);
+		/*
+		 * Always disable the NVMe controller after shutdown.
+		 * We need to do this to bring it back up later anyway, and we
+		 * can't do it while the firmware is not running (e.g. in the
+		 * resume reset path before RTKit is initialized), so for Apple
+		 * controllers it makes sense to unconditionally do it here.
+		 * Additionally, this sequence of events is reliable, while
+		 * others (like disabling after bringing back the firmware on
+		 * resume) seem to run into trouble under some circumstances.
+		 *
+		 * Both U-Boot and m1n1 also use this convention (i.e. an ANS
+		 * NVMe controller is handed off with firmware shut down, in an
+		 * NVMe disabled state, after a clean shutdown).
+		 */
+		if (shutdown)
+			nvme_disable_ctrl(&anv->ctrl, shutdown);
+		nvme_disable_ctrl(&anv->ctrl, false);
 	}
 
 	WRITE_ONCE(anv->ioq.enabled, false);

From c0a4a1eafbd48e02829045bba3e6163c03037276 Mon Sep 17 00:00:00 2001
From: Janne Grunau <j@jannau.net>
Date: Tue, 17 Jan 2023 19:25:01 +0100
Subject: [PATCH 8/9] nvme-apple: only reset the controller when RTKit is
 running

NVMe controller register access hangs indefinitely when the co-processor
is not running. A missed reset is preferable over a hanging thread since
it could be recoverable.

Signed-off-by: Janne Grunau <j@jannau.net>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/apple.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 146c9e63ce77..b317ce6c4ec3 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1001,11 +1001,11 @@ static void apple_nvme_reset_work(struct work_struct *work)
 		goto out;
 	}
 
-	if (anv->ctrl.ctrl_config & NVME_CC_ENABLE)
-		apple_nvme_disable(anv, false);
-
 	/* RTKit must be shut down cleanly for the (soft)-reset to work */
 	if (apple_rtkit_is_running(anv->rtk)) {
+		/* reset the controller if it is enabled */
+		if (anv->ctrl.ctrl_config & NVME_CC_ENABLE)
+			apple_nvme_disable(anv, false);
 		dev_dbg(anv->dev, "Trying to shut down RTKit before reset.");
 		ret = apple_rtkit_shutdown(anv->rtk);
 		if (ret)

From 1c5842085851f786eba24a39ecd02650ad892064 Mon Sep 17 00:00:00 2001
From: Keith Busch <kbusch@kernel.org>
Date: Wed, 18 Jan 2023 08:44:16 -0800
Subject: [PATCH 9/9] nvme-pci: fix timeout request state check

Polling the completion can progress the request state to IDLE, either
inline with the completion, or through softirq. Either way, the state
may not be COMPLETED, so don't check for that. We only care if the state
isn't IN_FLIGHT.

This is fixing an issue where the driver aborts an IO that we just
completed. Seeing the "aborting" message instead of "polled" is very
misleading as to where the timeout problem resides.

Fixes: bf392a5dc02a9b ("nvme-pci: Remove tag from process cq")
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a2553b7d9bb8..1ff8843bc4b3 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1362,7 +1362,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
 	else
 		nvme_poll_irqdisable(nvmeq);
 
-	if (blk_mq_request_completed(req)) {
+	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
 		dev_warn(dev->ctrl.device,
 			 "I/O %d QID %d timeout, completion polled\n",
 			 req->tag, nvmeq->qid);