From 44f23dabdc08698b7a066300eb55db23895f5459 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 13 Jul 2023 06:50:09 +0200 Subject: [PATCH 1/7] nvdimm: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Reviewed-by: Vishal Verma Signed-off-by: Christophe JAILLET Signed-off-by: Dave Jiang --- drivers/nvdimm/namespace_devs.c | 3 ++- drivers/nvdimm/pmem.c | 3 ++- drivers/nvdimm/region_devs.c | 5 +++-- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index c60ec0b373c5..07177eadc56e 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2,6 +2,7 @@ /* * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. */ +#include #include #include #include @@ -1338,7 +1339,7 @@ static ssize_t force_raw_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { bool force_raw; - int rc = strtobool(buf, &force_raw); + int rc = kstrtobool(buf, &force_raw); if (rc) return rc; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 46e094e56159..4e8fdcb3f1c8 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -385,7 +386,7 @@ static ssize_t write_cache_store(struct device *dev, bool write_cache; int rc; - rc = strtobool(buf, &write_cache); + rc = kstrtobool(buf, &write_cache); if (rc) return rc; dax_write_cache(pmem->dax_dev, write_cache); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 8f134d63af13..0a81f87f6f6c 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -275,7 +276,7 @@ static ssize_t deep_flush_store(struct device *dev, struct device_attribute *att const char *buf, size_t len) { bool flush; - int rc = strtobool(buf, &flush); + int rc = kstrtobool(buf, &flush); struct nd_region *nd_region = to_nd_region(dev); if (rc) @@ -530,7 +531,7 @@ static ssize_t read_only_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { bool ro; - int rc = strtobool(buf, &ro); + int rc = kstrtobool(buf, &ro); struct nd_region *nd_region = to_nd_region(dev); if (rc) From c1dbd8a849183b9c12d257ad3043ecec50db50b3 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 13 Jul 2023 21:54:13 +0800 Subject: [PATCH 2/7] virtio_pmem: add the missing REQ_OP_WRITE for flush bio When doing mkfs.xfs on a pmem device, the following warning was reported: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 384 at block/blk-core.c:751 submit_bio_noacct Modules linked in: CPU: 2 PID: 384 Comm: mkfs.xfs Not tainted 6.4.0-rc7+ #154 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:submit_bio_noacct+0x340/0x520 ...... Call Trace: ? submit_bio_noacct+0xd5/0x520 submit_bio+0x37/0x60 async_pmem_flush+0x79/0xa0 nvdimm_flush+0x17/0x40 pmem_submit_bio+0x370/0x390 __submit_bio+0xbc/0x190 submit_bio_noacct_nocheck+0x14d/0x370 submit_bio_noacct+0x1ef/0x520 submit_bio+0x55/0x60 submit_bio_wait+0x5a/0xc0 blkdev_issue_flush+0x44/0x60 The root cause is that submit_bio_noacct() needs bio_op() is either WRITE or ZONE_APPEND for flush bio and async_pmem_flush() doesn't assign REQ_OP_WRITE when allocating flush bio, so submit_bio_noacct just fail the flush bio. Simply fix it by adding the missing REQ_OP_WRITE for flush bio. And we could fix the flush order issue and do flush optimization later. Cc: stable@vger.kernel.org # 6.3+ Fixes: b4a6bb3a67aa ("block: add a sanity check for non-write flush/fua bios") Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Pankaj Gupta Tested-by: Pankaj Gupta Signed-off-by: Hou Tao Signed-off-by: Dave Jiang --- drivers/nvdimm/nd_virtio.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c index c6a648fd8744..1f8c667c6f1e 100644 --- a/drivers/nvdimm/nd_virtio.c +++ b/drivers/nvdimm/nd_virtio.c @@ -105,7 +105,8 @@ int async_pmem_flush(struct nd_region *nd_region, struct bio *bio) * parent bio. Otherwise directly call nd_region flush. */ if (bio && bio->bi_iter.bi_sector != -1) { - struct bio *child = bio_alloc(bio->bi_bdev, 0, REQ_PREFLUSH, + struct bio *child = bio_alloc(bio->bi_bdev, 0, + REQ_OP_WRITE | REQ_PREFLUSH, GFP_ATOMIC); if (!child) From fd774e36fe873632b1e9cf067cea46d7d7df55ac Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 14 Jul 2023 11:48:13 -0600 Subject: [PATCH 3/7] nvdimm: Explicitly include correct DT includes The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring Signed-off-by: Dave Jiang --- drivers/nvdimm/of_pmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 10dbdcdfb9ce..1b9f5b8a6167 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -2,11 +2,11 @@ #define pr_fmt(fmt) "of_pmem: " fmt -#include -#include +#include #include #include #include +#include #include struct of_pmem_private { From e96d9a938e8946e87daf456e0311020ca6747d99 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 9 Aug 2023 11:05:11 +0530 Subject: [PATCH 4/7] nvdimm/pfn_dev: Prevent the creation of zero-sized namespaces On architectures that have different page size values used for kernel direct mapping and userspace mappings, the user can end up creating zero-sized namespaces as shown below :/sys/bus/nd/devices/region1# cat align 0x1000000 /sys/bus/nd/devices/region1# echo 0x200000 > align /sys/bus/nd/devices/region1/dax1.0# cat supported_alignments 65536 16777216 $ ndctl create-namespace -r region1 -m devdax -s 18M --align 64K { "dev":"namespace1.0", "mode":"devdax", "map":"dev", "size":0, "uuid":"3094329a-0c66-4905-847e-357223e56ab0", "daxregion":{ "id":1, "size":0, "align":65536 }, "align":65536 } similarily for fsdax $ ndctl create-namespace -r region1 -m fsdax -s 18M --align 64K { "dev":"namespace1.0", "mode":"fsdax", "map":"dev", "size":0, "uuid":"45538a6f-dec7-405d-b1da-2a4075e06232", "sector_size":512, "align":65536, "blockdev":"pmem1" } In commit 9ffc1d19fc4a ("mm/memremap_pages: Introduce memremap_compat_align()") memremap_compat_align was added to make sure the kernel always creates namespaces with 16MB alignment. But the user can still override the region alignment and no input validation is done in the region alignment values to retain the flexibility user had before. However, the kernel ensures that only part of the namespace that can be mapped via kernel direct mapping page size is enabled. This is achieved by tracking the unmapped part of the namespace in pfn_sb->end_trunc. The kernel also ensures that the start address of the namespace is also aligned to the kernel direct mapping page size. Depending on the user request, the kernel implements userspace mapping alignment by updating pfn device alignment attribute and this value is used to adjust the start address for userspace mappings. This is tracked in pfn_sb->dataoff. Hence the available size for userspace mapping is: usermapping_size = size of the namespace - pfn_sb->end_trun - pfn_sb->dataoff If the kernel finds the user mapping size zero then don't allow the creation of namespace. After fix: $ ndctl create-namespace -f -r region1 -m devdax -s 18M --align 64K libndctl: ndctl_dax_enable: dax1.1: failed to enable Error: namespace1.2: failed to enable failed to create namespace: No such device or address And existing zero sized namespace will be marked disabled. root@ltczz75-lp2:/home/kvaneesh# ndctl list -N -r region1 -i [ { "dev":"namespace1.0", "mode":"raw", "size":18874368, "uuid":"94a90fb0-8e78-4fb6-a759-ffc62f9fa181", "sector_size":512, "state":"disabled" }, Signed-off-by: Aneesh Kumar K.V Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/20230809053512.350660-1-aneesh.kumar@linux.ibm.com Signed-off-by: Dave Jiang --- drivers/nvdimm/pfn_devs.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index af7d9301520c..0777b1626f6c 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -452,8 +452,9 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) u64 checksum, offset; struct resource *res; enum nd_pfn_mode mode; + resource_size_t res_size; struct nd_namespace_io *nsio; - unsigned long align, start_pad; + unsigned long align, start_pad, end_trunc; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_namespace_common *ndns = nd_pfn->ndns; const uuid_t *parent_uuid = nd_dev_to_uuid(&ndns->dev); @@ -503,6 +504,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) align = le32_to_cpu(pfn_sb->align); offset = le64_to_cpu(pfn_sb->dataoff); start_pad = le32_to_cpu(pfn_sb->start_pad); + end_trunc = le32_to_cpu(pfn_sb->end_trunc); if (align == 0) align = 1UL << ilog2(offset); mode = le32_to_cpu(pfn_sb->mode); @@ -584,7 +586,8 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) */ nsio = to_nd_namespace_io(&ndns->dev); res = &nsio->res; - if (offset >= resource_size(res)) { + res_size = resource_size(res); + if (offset >= res_size) { dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", dev_name(&ndns->dev)); return -EOPNOTSUPP; @@ -610,6 +613,10 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EOPNOTSUPP; } + if (offset >= (res_size - start_pad - end_trunc)) { + dev_err(&nd_pfn->dev, "bad offset with small namespace\n"); + return -EOPNOTSUPP; + } return 0; } EXPORT_SYMBOL(nd_pfn_validate); @@ -810,7 +817,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) else return -ENXIO; - if (offset >= size) { + if (offset >= (size - end_trunc)) { + /* This results in zero size devices */ dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; From feb72e9b20823419aaed57801018eeffc7be7e82 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 9 Aug 2023 11:05:12 +0530 Subject: [PATCH 5/7] nvdimm/pfn_dev: Avoid unnecessary endian conversion use the local variable that already have the converted values. No functional change in this patch. Reviewed-by: Jeff Moyer Signed-off-by: Aneesh Kumar K.V Link: https://lore.kernel.org/r/20230809053512.350660-2-aneesh.kumar@linux.ibm.com Signed-off-by: Dave Jiang --- drivers/nvdimm/pfn_devs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 0777b1626f6c..c662d8964802 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -601,14 +601,12 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) return -EOPNOTSUPP; } - if (!IS_ALIGNED(res->start + le32_to_cpu(pfn_sb->start_pad), - memremap_compat_align())) { + if (!IS_ALIGNED(res->start + start_pad, memremap_compat_align())) { dev_err(&nd_pfn->dev, "resource start misaligned\n"); return -EOPNOTSUPP; } - if (!IS_ALIGNED(res->end + 1 - le32_to_cpu(pfn_sb->end_trunc), - memremap_compat_align())) { + if (!IS_ALIGNED(res->end + 1 - end_trunc, memremap_compat_align())) { dev_err(&nd_pfn->dev, "resource end misaligned\n"); return -EOPNOTSUPP; } From 85ae42c72142346645e63c33835da947dfa008b3 Mon Sep 17 00:00:00 2001 From: Konstantin Meskhidze Date: Thu, 17 Aug 2023 19:59:45 +0800 Subject: [PATCH 6/7] nvdimm: Fix memleak of pmu attr_groups in unregister_nvdimm_pmu() Memory pointed by 'nd_pmu->pmu.attr_groups' is allocated in function 'register_nvdimm_pmu' and is lost after 'kfree(nd_pmu)' call in function 'unregister_nvdimm_pmu'. Fixes: 0fab1ba6ad6b ("drivers/nvdimm: Add perf interface to expose nvdimm performance stats") Co-developed-by: Ivanov Mikhail Signed-off-by: Konstantin Meskhidze Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/20230817115945.771826-1-konstantin.meskhidze@huawei.com Signed-off-by: Dave Jiang --- drivers/nvdimm/nd_perf.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c index 433bbb68ae64..14881c4e03e6 100644 --- a/drivers/nvdimm/nd_perf.c +++ b/drivers/nvdimm/nd_perf.c @@ -324,6 +324,7 @@ void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu) { perf_pmu_unregister(&nd_pmu->pmu); nvdimm_pmu_free_hotplug_memory(nd_pmu); + kfree(nd_pmu->pmu.attr_groups); kfree(nd_pmu); } EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu); From 08ca6906a4b7e48f8e93b7c1f49a742a415be6d5 Mon Sep 17 00:00:00 2001 From: Konstantin Meskhidze Date: Thu, 17 Aug 2023 19:41:03 +0800 Subject: [PATCH 7/7] nvdimm: Fix dereference after free in register_nvdimm_pmu() 'nd_pmu->pmu.attr_groups' is dereferenced in function 'nvdimm_pmu_free_hotplug_memory' call after it has been freed. Because in function 'nvdimm_pmu_free_hotplug_memory' memory pointed by the fields of 'nd_pmu->pmu.attr_groups' is deallocated it is necessary to call 'kfree' after 'nvdimm_pmu_free_hotplug_memory'. Fixes: 0fab1ba6ad6b ("drivers/nvdimm: Add perf interface to expose nvdimm performance stats") Co-developed-by: Ivanov Mikhail Signed-off-by: Konstantin Meskhidze Reviewed-by: Jeff Moyer Link: https://lore.kernel.org/r/20230817114103.754977-1-konstantin.meskhidze@huawei.com Signed-off-by: Dave Jiang --- drivers/nvdimm/nd_perf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c index 14881c4e03e6..2b6dc80d8fb5 100644 --- a/drivers/nvdimm/nd_perf.c +++ b/drivers/nvdimm/nd_perf.c @@ -308,8 +308,8 @@ int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev) rc = perf_pmu_register(&nd_pmu->pmu, nd_pmu->pmu.name, -1); if (rc) { - kfree(nd_pmu->pmu.attr_groups); nvdimm_pmu_free_hotplug_memory(nd_pmu); + kfree(nd_pmu->pmu.attr_groups); return rc; }