linux/drivers/of/of_reserved_mem.c

516 lines
13 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0+
/*
* Device tree based initialization code for reserved memory.
*
* Copyright (c) 2013, 2015 The Linux Foundation. All Rights Reserved.
* Copyright (c) 2013,2014 Samsung Electronics Co., Ltd.
* http://www.samsung.com
* Author: Marek Szyprowski <m.szyprowski@samsung.com>
* Author: Josh Cartwright <joshc@codeaurora.org>
*/
#define pr_fmt(fmt) "OF: reserved mem: " fmt
#include <linux/err.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
#include <linux/of_platform.h>
#include <linux/mm.h>
#include <linux/sizes.h>
#include <linux/of_reserved_mem.h>
#include <linux/sort.h>
#include <linux/slab.h>
mm: remove CONFIG_HAVE_MEMBLOCK All architecures use memblock for early memory management. There is no need for the CONFIG_HAVE_MEMBLOCK configuration option. [rppt@linux.vnet.ibm.com: of/fdt: fixup #ifdefs] Link: http://lkml.kernel.org/r/20180919103457.GA20545@rapoport-lnx [rppt@linux.vnet.ibm.com: csky: fixups after bootmem removal] Link: http://lkml.kernel.org/r/20180926112744.GC4628@rapoport-lnx [rppt@linux.vnet.ibm.com: remove stale #else and the code it protects] Link: http://lkml.kernel.org/r/1538067825-24835-1-git-send-email-rppt@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1536927045-23536-4-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Acked-by: Michal Hocko <mhocko@suse.com> Tested-by: Jonathan Cameron <jonathan.cameron@huawei.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: "David S. Miller" <davem@davemloft.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Jonas Bonn <jonas@southpole.se> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Ley Foon Tan <lftan@altera.com> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Palmer Dabbelt <palmer@sifive.com> Cc: Paul Burton <paul.burton@mips.com> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Serge Semin <fancer.lancer@gmail.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-30 15:07:44 -07:00
#include <linux/memblock.h>
memblock: exclude MEMBLOCK_NOMAP regions from kmemleak Vladimir Zapolskiy reports: Commit a7259df76702 ("memblock: make memblock_find_in_range method private") invokes a kernel panic while running kmemleak on OF platforms with nomaped regions: Unable to handle kernel paging request at virtual address fff000021e00000 [...] scan_block+0x64/0x170 scan_gray_list+0xe8/0x17c kmemleak_scan+0x270/0x514 kmemleak_write+0x34c/0x4ac The memory allocated from memblock is registered with kmemleak, but if it is marked MEMBLOCK_NOMAP it won't have linear map entries so an attempt to scan such areas will fault. Ideally, memblock_mark_nomap() would inform kmemleak to ignore MEMBLOCK_NOMAP memory, but it can be called before kmemleak interfaces operating on physical addresses can use __va() conversion. Make sure that functions that mark allocated memory as MEMBLOCK_NOMAP take care of informing kmemleak to ignore such memory. Link: https://lore.kernel.org/all/8ade5174-b143-d621-8c8e-dc6a1898c6fb@linaro.org Link: https://lore.kernel.org/all/c30ff0a2-d196-c50d-22f0-bd50696b1205@quicinc.com Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Reported-by: Vladimir Zapolskiy <vladimir.zapolskiy@linaro.org> Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Tested-by: Vladimir Zapolskiy <vladimir.zapolskiy@linaro.org> Tested-by: Qian Cai <quic_qiancai@quicinc.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2021-10-21 10:09:29 +03:00
#include <linux/kmemleak.h>
cma: factor out minimum alignment requirement Patch series "mm: enforce pageblock_order < MAX_ORDER". Having pageblock_order >= MAX_ORDER seems to be able to happen in corner cases and some parts of the kernel are not prepared for it. For example, Aneesh has shown [1] that such kernels can be compiled on ppc64 with 64k base pages by setting FORCE_MAX_ZONEORDER=8, which will run into a WARN_ON_ONCE(order >= MAX_ORDER) in comapction code right during boot. We can get pageblock_order >= MAX_ORDER when the default hugetlb size is bigger than the maximum allocation granularity of the buddy, in which case we are no longer talking about huge pages but instead gigantic pages. Having pageblock_order >= MAX_ORDER can only make alloc_contig_range() of such gigantic pages more likely to succeed. Reliable use of gigantic pages either requires boot time allcoation or CMA, no need to overcomplicate some places in the kernel to optimize for corner cases that are broken in other areas of the kernel. This patch (of 2): Let's enforce pageblock_order < MAX_ORDER and simplify. Especially patch #1 can be regarded a cleanup before: [PATCH v5 0/6] Use pageblock_order for cma and alloc_contig_range alignment. [2] [1] https://lkml.kernel.org/r/87r189a2ks.fsf@linux.ibm.com [2] https://lkml.kernel.org/r/20220211164135.1803616-1-zi.yan@sent.com Link: https://lkml.kernel.org/r/20220214174132.219303-2-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Reviewed-by: Zi Yan <ziy@nvidia.com> Acked-by: Rob Herring <robh@kernel.org> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Robin Murphy <robin.murphy@arm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: John Garry via iommu <iommu@lists.linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-03-22 14:43:17 -07:00
#include <linux/cma.h>
#include "of_private.h"
#define MAX_RESERVED_REGIONS 64
static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
static int reserved_mem_count;
static int __init early_init_dt_alloc_reserved_memory_arch(phys_addr_t size,
phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
phys_addr_t *res_base)
{
phys_addr_t base;
int err = 0;
memblock: drop __memblock_alloc_base() The __memblock_alloc_base() function tries to allocate a memory up to the limit specified by its max_addr parameter. Depending on the value of this parameter, the __memblock_alloc_base() can is replaced with the appropriate memblock_phys_alloc*() variant. Link: http://lkml.kernel.org/r/1548057848-15136-9-git-send-email-rppt@linux.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Acked-by: Rob Herring <robh@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Christophe Leroy <christophe.leroy@c-s.fr> Cc: Christoph Hellwig <hch@lst.de> Cc: "David S. Miller" <davem@davemloft.net> Cc: Dennis Zhou <dennis@kernel.org> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Greentime Hu <green.hu@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Guo Ren <guoren@kernel.org> Cc: Guo Ren <ren_guo@c-sky.com> [c-sky] Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Juergen Gross <jgross@suse.com> [Xen] Cc: Mark Salter <msalter@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Michal Simek <monstr@monstr.eu> Cc: Paul Burton <paul.burton@mips.com> Cc: Petr Mladek <pmladek@suse.com> Cc: Richard Weinberger <richard@nod.at> Cc: Rich Felker <dalias@libc.org> Cc: Rob Herring <robh+dt@kernel.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Stafford Horne <shorne@gmail.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-11 23:29:31 -07:00
end = !end ? MEMBLOCK_ALLOC_ANYWHERE : end;
memblock: stop using implicit alignment to SMP_CACHE_BYTES When a memblock allocation APIs are called with align = 0, the alignment is implicitly set to SMP_CACHE_BYTES. Implicit alignment is done deep in the memblock allocator and it can come as a surprise. Not that such an alignment would be wrong even when used incorrectly but it is better to be explicit for the sake of clarity and the prinicple of the least surprise. Replace all such uses of memblock APIs with the 'align' parameter explicitly set to SMP_CACHE_BYTES and stop implicit alignment assignment in the memblock internal allocation functions. For the case when memblock APIs are used via helper functions, e.g. like iommu_arena_new_node() in Alpha, the helper functions were detected with Coccinelle's help and then manually examined and updated where appropriate. The direct memblock APIs users were updated using the semantic patch below: @@ expression size, min_addr, max_addr, nid; @@ ( | - memblock_alloc_try_nid_raw(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_raw(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid_nopanic(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid_nopanic(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc_try_nid(size, 0, min_addr, max_addr, nid) + memblock_alloc_try_nid(size, SMP_CACHE_BYTES, min_addr, max_addr, nid) | - memblock_alloc(size, 0) + memblock_alloc(size, SMP_CACHE_BYTES) | - memblock_alloc_raw(size, 0) + memblock_alloc_raw(size, SMP_CACHE_BYTES) | - memblock_alloc_from(size, 0, min_addr) + memblock_alloc_from(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_nopanic(size, 0) + memblock_alloc_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_low(size, 0) + memblock_alloc_low(size, SMP_CACHE_BYTES) | - memblock_alloc_low_nopanic(size, 0) + memblock_alloc_low_nopanic(size, SMP_CACHE_BYTES) | - memblock_alloc_from_nopanic(size, 0, min_addr) + memblock_alloc_from_nopanic(size, SMP_CACHE_BYTES, min_addr) | - memblock_alloc_node(size, 0, nid) + memblock_alloc_node(size, SMP_CACHE_BYTES, nid) ) [mhocko@suse.com: changelog update] [akpm@linux-foundation.org: coding-style fixes] [rppt@linux.ibm.com: fix missed uses of implicit alignment] Link: http://lkml.kernel.org/r/20181016133656.GA10925@rapoport-lnx Link: http://lkml.kernel.org/r/1538687224-17535-1-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport <rppt@linux.vnet.ibm.com> Suggested-by: Michal Hocko <mhocko@suse.com> Acked-by: Paul Burton <paul.burton@mips.com> [MIPS] Acked-by: Michael Ellerman <mpe@ellerman.id.au> [powerpc] Acked-by: Michal Hocko <mhocko@suse.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Zankel <chris@zankel.net> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guan Xuetao <gxt@pku.edu.cn> Cc: Ingo Molnar <mingo@redhat.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Michal Simek <monstr@monstr.eu> Cc: Richard Weinberger <richard@nod.at> Cc: Russell King <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Tony Luck <tony.luck@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-10-30 15:09:57 -07:00
align = !align ? SMP_CACHE_BYTES : align;
base = memblock_phys_alloc_range(size, align, start, end);
if (!base)
return -ENOMEM;
*res_base = base;
if (nomap) {
err = memblock_mark_nomap(base, size);
if (err)
memblock_phys_free(base, size);
}
of: fix kmemleak crash caused by imbalance in early memory reservation Marc Gonzalez reported the following kmemleak crash: Unable to handle kernel paging request at virtual address ffffffc021e00000 Mem abort info: ESR = 0x96000006 Exception class = DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000006 CM = 0, WnR = 0 swapper pgtable: 4k pages, 39-bit VAs, pgdp = (____ptrval____) [ffffffc021e00000] pgd=000000017e3ba803, pud=000000017e3ba803, pmd=0000000000000000 Internal error: Oops: 96000006 [#1] PREEMPT SMP Modules linked in: CPU: 6 PID: 523 Comm: kmemleak Tainted: G S W 5.0.0-rc1 #13 Hardware name: Qualcomm Technologies, Inc. MSM8998 v1 MTP (DT) pstate: 80000085 (Nzcv daIf -PAN -UAO) pc : scan_block+0x70/0x190 lr : scan_block+0x6c/0x190 Process kmemleak (pid: 523, stack limit = 0x(____ptrval____)) Call trace: scan_block+0x70/0x190 scan_gray_list+0x108/0x1c0 kmemleak_scan+0x33c/0x7c0 kmemleak_scan_thread+0x98/0xf0 kthread+0x11c/0x120 ret_from_fork+0x10/0x1c Code: f9000fb4 d503201f 97ffffd2 35000580 (f9400260) The crash happens when a no-map area is allocated in early_init_dt_alloc_reserved_memory_arch(). The allocated region is registered with kmemleak, but it is then removed from memblock using memblock_remove() that is not kmemleak-aware. Replacing memblock_phys_alloc_range() with memblock_find_in_range() makes sure that the allocated memory is not added to kmemleak and then memblock_remove()'ing this memory is safe. As a bonus, since memblock_find_in_range() ensures the allocation in the specified range, the bounds check can be removed. [rppt@linux.ibm.com: of: fix parameters order for call to memblock_find_in_range()] Link: http://lkml.kernel.org/r/20190221112619.GC32004@rapoport-lnx Link: http://lkml.kernel.org/r/20190213181921.GB15270@rapoport-lnx Fixes: 3f0c820664483 ("drivers: of: add initialization code for dynamic reserved memory") Signed-off-by: Mike Rapoport <rppt@linux.ibm.com> Acked-by: Marek Szyprowski <m.szyprowski@samsung.com> Acked-by: Prateek Patel <prpatel@nvidia.com> Tested-by: Marc Gonzalez <marc.w.gonzalez@free.fr> Cc: Rob Herring <robh+dt@kernel.org> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2019-03-11 23:30:58 -07:00
of: reserved_mem: Have kmemleak ignore dynamically allocated reserved mem Patch series "Fix kmemleak crashes when scanning CMA regions", v2. When trying to boot a device with an ARM64 kernel with the following config options enabled: CONFIG_DEBUG_PAGEALLOC=y CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT=y CONFIG_DEBUG_KMEMLEAK=y a crash is encountered when kmemleak starts to scan the list of gray or allocated objects that it maintains. Upon closer inspection, it was observed that these page-faults always occurred when kmemleak attempted to scan a CMA region. At the moment, kmemleak is made aware of CMA regions that are specified through the devicetree to be dynamically allocated within a range of addresses. However, kmemleak should not need to scan CMA regions or any reserved memory region, as those regions can be used for DMA transfers between drivers and peripherals, and thus wouldn't contain anything useful for kmemleak. Additionally, since CMA regions are unmapped from the kernel's address space when they are freed to the buddy allocator at boot when CONFIG_DEBUG_PAGEALLOC is enabled, kmemleak shouldn't attempt to access those memory regions, as that will trigger a crash. Thus, kmemleak should ignore all dynamically allocated reserved memory regions. This patch (of 1): Currently, kmemleak ignores dynamically allocated reserved memory regions that don't have a kernel mapping. However, regions that do retain a kernel mapping (e.g. CMA regions) do get scanned by kmemleak. This is not ideal for two reasons: 1 kmemleak works by scanning memory regions for pointers to allocated objects to determine if those objects have been leaked or not. However, reserved memory regions can be used between drivers and peripherals for DMA transfers, and thus, would not contain pointers to allocated objects, making it unnecessary for kmemleak to scan these reserved memory regions. 2 When CONFIG_DEBUG_PAGEALLOC is enabled, along with kmemleak, the CMA reserved memory regions are unmapped from the kernel's address space when they are freed to buddy at boot. These CMA reserved regions are still tracked by kmemleak, however, and when kmemleak attempts to scan them, a crash will happen, as accessing the CMA region will result in a page-fault, since the regions are unmapped. Thus, use kmemleak_ignore_phys() for all dynamically allocated reserved memory regions, instead of those that do not have a kernel mapping associated with them. Link: https://lkml.kernel.org/r/20230208232001.2052777-1-isaacmanjarres@google.com Link: https://lkml.kernel.org/r/20230208232001.2052777-2-isaacmanjarres@google.com Fixes: a7259df76702 ("memblock: make memblock_find_in_range method private") Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com> Acked-by: Mike Rapoport (IBM) <rppt@kernel.org> Acked-by: Catalin Marinas <catalin.marinas@arm.com> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Kirill A. Shutemov <kirill.shtuemov@linux.intel.com> Cc: Nick Kossifidis <mick@ics.forth.gr> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: Rob Herring <robh@kernel.org> Cc: Russell King (Oracle) <rmk+kernel@armlinux.org.uk> Cc: Saravana Kannan <saravanak@google.com> Cc: <stable@vger.kernel.org> [5.15+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-02-08 15:20:00 -08:00
kmemleak_ignore_phys(base);
return err;
}
of: of_reserved_mem: Demote kernel-doc abuses Fixes the following W=1 kernel build warning(s): drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'node' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'uname' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'base' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'size' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'node' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'uname' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'res_base' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'res_size' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:171: warning: Function parameter or member 'rmem' not described in '__reserved_mem_init_node' Cc: Rob Herring <robh+dt@kernel.org> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Josh Cartwright <joshc@codeaurora.org> Cc: devicetree@vger.kernel.org Signed-off-by: Lee Jones <lee.jones@linaro.org> Signed-off-by: Rob Herring <robh@kernel.org> Link: https://lore.kernel.org/r/20210318104036.3175910-11-lee.jones@linaro.org
2021-03-18 10:40:36 +00:00
/*
* fdt_reserved_mem_save_node() - save fdt node for second pass initialization
*/
void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
phys_addr_t base, phys_addr_t size)
{
struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
if (reserved_mem_count == ARRAY_SIZE(reserved_mem)) {
pr_err("not enough space for all defined regions.\n");
return;
}
rmem->fdt_node = node;
rmem->name = uname;
rmem->base = base;
rmem->size = size;
reserved_mem_count++;
return;
}
/*
* __reserved_mem_alloc_in_range() - allocate reserved memory described with
* 'alloc-ranges'. Choose bottom-up/top-down depending on nearby existing
* reserved regions to keep the reserved memory contiguous if possible.
*/
static int __init __reserved_mem_alloc_in_range(phys_addr_t size,
phys_addr_t align, phys_addr_t start, phys_addr_t end, bool nomap,
phys_addr_t *res_base)
{
bool prev_bottom_up = memblock_bottom_up();
bool bottom_up = false, top_down = false;
int ret, i;
for (i = 0; i < reserved_mem_count; i++) {
struct reserved_mem *rmem = &reserved_mem[i];
/* Skip regions that were not reserved yet */
if (rmem->size == 0)
continue;
/*
* If range starts next to an existing reservation, use bottom-up:
* |....RRRR................RRRRRRRR..............|
* --RRRR------
*/
if (start >= rmem->base && start <= (rmem->base + rmem->size))
bottom_up = true;
/*
* If range ends next to an existing reservation, use top-down:
* |....RRRR................RRRRRRRR..............|
* -------RRRR-----
*/
if (end >= rmem->base && end <= (rmem->base + rmem->size))
top_down = true;
}
/* Change setting only if either bottom-up or top-down was selected */
if (bottom_up != top_down)
memblock_set_bottom_up(bottom_up);
ret = early_init_dt_alloc_reserved_memory_arch(size, align,
start, end, nomap, res_base);
/* Restore old setting if needed */
if (bottom_up != top_down)
memblock_set_bottom_up(prev_bottom_up);
return ret;
}
of: of_reserved_mem: Demote kernel-doc abuses Fixes the following W=1 kernel build warning(s): drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'node' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'uname' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'base' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'size' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'node' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'uname' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'res_base' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'res_size' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:171: warning: Function parameter or member 'rmem' not described in '__reserved_mem_init_node' Cc: Rob Herring <robh+dt@kernel.org> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Josh Cartwright <joshc@codeaurora.org> Cc: devicetree@vger.kernel.org Signed-off-by: Lee Jones <lee.jones@linaro.org> Signed-off-by: Rob Herring <robh@kernel.org> Link: https://lore.kernel.org/r/20210318104036.3175910-11-lee.jones@linaro.org
2021-03-18 10:40:36 +00:00
/*
* __reserved_mem_alloc_size() - allocate reserved memory described by
* 'size', 'alignment' and 'alloc-ranges' properties.
*/
static int __init __reserved_mem_alloc_size(unsigned long node,
const char *uname, phys_addr_t *res_base, phys_addr_t *res_size)
{
int t_len = (dt_root_addr_cells + dt_root_size_cells) * sizeof(__be32);
phys_addr_t start = 0, end = 0;
phys_addr_t base = 0, align = 0, size;
int len;
const __be32 *prop;
bool nomap;
int ret;
prop = of_get_flat_dt_prop(node, "size", &len);
if (!prop)
return -EINVAL;
if (len != dt_root_size_cells * sizeof(__be32)) {
pr_err("invalid size property in '%s' node.\n", uname);
return -EINVAL;
}
size = dt_mem_next_cell(dt_root_size_cells, &prop);
prop = of_get_flat_dt_prop(node, "alignment", &len);
if (prop) {
if (len != dt_root_addr_cells * sizeof(__be32)) {
pr_err("invalid alignment property in '%s' node.\n",
uname);
return -EINVAL;
}
align = dt_mem_next_cell(dt_root_addr_cells, &prop);
}
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
/* Need adjust the alignment to satisfy the CMA requirement */
if (IS_ENABLED(CONFIG_CMA)
&& of_flat_dt_is_compatible(node, "shared-dma-pool")
&& of_get_flat_dt_prop(node, "reusable", NULL)
cma: factor out minimum alignment requirement Patch series "mm: enforce pageblock_order < MAX_ORDER". Having pageblock_order >= MAX_ORDER seems to be able to happen in corner cases and some parts of the kernel are not prepared for it. For example, Aneesh has shown [1] that such kernels can be compiled on ppc64 with 64k base pages by setting FORCE_MAX_ZONEORDER=8, which will run into a WARN_ON_ONCE(order >= MAX_ORDER) in comapction code right during boot. We can get pageblock_order >= MAX_ORDER when the default hugetlb size is bigger than the maximum allocation granularity of the buddy, in which case we are no longer talking about huge pages but instead gigantic pages. Having pageblock_order >= MAX_ORDER can only make alloc_contig_range() of such gigantic pages more likely to succeed. Reliable use of gigantic pages either requires boot time allcoation or CMA, no need to overcomplicate some places in the kernel to optimize for corner cases that are broken in other areas of the kernel. This patch (of 2): Let's enforce pageblock_order < MAX_ORDER and simplify. Especially patch #1 can be regarded a cleanup before: [PATCH v5 0/6] Use pageblock_order for cma and alloc_contig_range alignment. [2] [1] https://lkml.kernel.org/r/87r189a2ks.fsf@linux.ibm.com [2] https://lkml.kernel.org/r/20220211164135.1803616-1-zi.yan@sent.com Link: https://lkml.kernel.org/r/20220214174132.219303-2-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Reviewed-by: Zi Yan <ziy@nvidia.com> Acked-by: Rob Herring <robh@kernel.org> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Michael S. Tsirkin <mst@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Robin Murphy <robin.murphy@arm.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Vlastimil Babka <vbabka@suse.cz> Cc: John Garry via iommu <iommu@lists.linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2022-03-22 14:43:17 -07:00
&& !nomap)
align = max_t(phys_addr_t, align, CMA_MIN_ALIGNMENT_BYTES);
prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
if (prop) {
if (len % t_len != 0) {
pr_err("invalid alloc-ranges property in '%s', skipping node.\n",
uname);
return -EINVAL;
}
base = 0;
while (len > 0) {
start = dt_mem_next_cell(dt_root_addr_cells, &prop);
end = start + dt_mem_next_cell(dt_root_size_cells,
&prop);
ret = __reserved_mem_alloc_in_range(size, align,
start, end, nomap, &base);
if (ret == 0) {
pr_debug("allocated memory for '%s' node: base %pa, size %lu MiB\n",
uname, &base,
(unsigned long)(size / SZ_1M));
break;
}
len -= t_len;
}
} else {
ret = early_init_dt_alloc_reserved_memory_arch(size, align,
0, 0, nomap, &base);
if (ret == 0)
pr_debug("allocated memory for '%s' node: base %pa, size %lu MiB\n",
uname, &base, (unsigned long)(size / SZ_1M));
}
if (base == 0) {
pr_err("failed to allocate memory for node '%s': size %lu MiB\n",
uname, (unsigned long)(size / SZ_1M));
return -ENOMEM;
}
*res_base = base;
*res_size = size;
return 0;
}
static const struct of_device_id __rmem_of_table_sentinel
__used __section("__reservedmem_of_table_end");
of: of_reserved_mem: Demote kernel-doc abuses Fixes the following W=1 kernel build warning(s): drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'node' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'uname' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'base' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:53: warning: Function parameter or member 'size' not described in 'fdt_reserved_mem_save_node' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'node' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'uname' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'res_base' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:76: warning: Function parameter or member 'res_size' not described in '__reserved_mem_alloc_size' drivers/of/of_reserved_mem.c:171: warning: Function parameter or member 'rmem' not described in '__reserved_mem_init_node' Cc: Rob Herring <robh+dt@kernel.org> Cc: Frank Rowand <frowand.list@gmail.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Josh Cartwright <joshc@codeaurora.org> Cc: devicetree@vger.kernel.org Signed-off-by: Lee Jones <lee.jones@linaro.org> Signed-off-by: Rob Herring <robh@kernel.org> Link: https://lore.kernel.org/r/20210318104036.3175910-11-lee.jones@linaro.org
2021-03-18 10:40:36 +00:00
/*
* __reserved_mem_init_node() - call region specific reserved memory init code
*/
static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
{
extern const struct of_device_id __reservedmem_of_table[];
const struct of_device_id *i;
int ret = -ENOENT;
for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {
reservedmem_of_init_fn initfn = i->data;
const char *compat = i->compatible;
if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
continue;
ret = initfn(rmem);
if (ret == 0) {
pr_info("initialized node %s, compatible id %s\n",
rmem->name, compat);
break;
}
}
return ret;
}
static int __init __rmem_cmp(const void *a, const void *b)
{
const struct reserved_mem *ra = a, *rb = b;
if (ra->base < rb->base)
return -1;
if (ra->base > rb->base)
return 1;
/*
* Put the dynamic allocations (address == 0, size == 0) before static
* allocations at address 0x0 so that overlap detection works
* correctly.
*/
if (ra->size < rb->size)
return -1;
if (ra->size > rb->size)
return 1;
if (ra->fdt_node < rb->fdt_node)
return -1;
if (ra->fdt_node > rb->fdt_node)
return 1;
return 0;
}
static void __init __rmem_check_for_overlap(void)
{
int i;
if (reserved_mem_count < 2)
return;
sort(reserved_mem, reserved_mem_count, sizeof(reserved_mem[0]),
__rmem_cmp, NULL);
for (i = 0; i < reserved_mem_count - 1; i++) {
struct reserved_mem *this, *next;
this = &reserved_mem[i];
next = &reserved_mem[i + 1];
if (this->base + this->size > next->base) {
phys_addr_t this_end, next_end;
this_end = this->base + this->size;
next_end = next->base + next->size;
pr_err("OVERLAP DETECTED!\n%s (%pa--%pa) overlaps with %s (%pa--%pa)\n",
this->name, &this->base, &this_end,
next->name, &next->base, &next_end);
}
}
}
/**
* fdt_init_reserved_mem() - allocate and init all saved reserved memory regions
*/
void __init fdt_init_reserved_mem(void)
{
int i;
/* check for overlapping reserved regions */
__rmem_check_for_overlap();
for (i = 0; i < reserved_mem_count; i++) {
struct reserved_mem *rmem = &reserved_mem[i];
unsigned long node = rmem->fdt_node;
int len;
const __be32 *prop;
int err = 0;
bool nomap;
nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;
prop = of_get_flat_dt_prop(node, "phandle", &len);
if (!prop)
prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
if (prop)
rmem->phandle = of_read_number(prop, len/4);
if (rmem->size == 0)
err = __reserved_mem_alloc_size(node, rmem->name,
&rmem->base, &rmem->size);
if (err == 0) {
err = __reserved_mem_init_node(rmem);
if (err != 0 && err != -ENOENT) {
pr_info("node %s compatible matching fail\n",
rmem->name);
if (nomap)
memblock_clear_nomap(rmem->base, rmem->size);
else
memblock_phys_free(rmem->base,
rmem->size);
} else {
phys_addr_t end = rmem->base + rmem->size - 1;
bool reusable =
(of_get_flat_dt_prop(node, "reusable", NULL)) != NULL;
pr_info("%pa..%pa (%lu KiB) %s %s %s\n",
&rmem->base, &end, (unsigned long)(rmem->size / SZ_1K),
nomap ? "nomap" : "map",
reusable ? "reusable" : "non-reusable",
rmem->name ? rmem->name : "unknown");
}
}
}
}
static inline struct reserved_mem *__find_rmem(struct device_node *node)
{
unsigned int i;
if (!node->phandle)
return NULL;
for (i = 0; i < reserved_mem_count; i++)
if (reserved_mem[i].phandle == node->phandle)
return &reserved_mem[i];
return NULL;
}
struct rmem_assigned_device {
struct device *dev;
struct reserved_mem *rmem;
struct list_head list;
};
static LIST_HEAD(of_rmem_assigned_device_list);
static DEFINE_MUTEX(of_rmem_assigned_device_mutex);
/**
* of_reserved_mem_device_init_by_idx() - assign reserved memory region to
* given device
* @dev: Pointer to the device to configure
* @np: Pointer to the device_node with 'reserved-memory' property
* @idx: Index of selected region
*
* This function assigns respective DMA-mapping operations based on reserved
* memory region specified by 'memory-region' property in @np node to the @dev
* device. When driver needs to use more than one reserved memory region, it
* should allocate child devices and initialize regions by name for each of
* child device.
*
* Returns error code or zero on success.
*/
int of_reserved_mem_device_init_by_idx(struct device *dev,
struct device_node *np, int idx)
{
struct rmem_assigned_device *rd;
struct device_node *target;
struct reserved_mem *rmem;
int ret;
if (!np || !dev)
return -EINVAL;
target = of_parse_phandle(np, "memory-region", idx);
if (!target)
return -ENODEV;
if (!of_device_is_available(target)) {
of_node_put(target);
return 0;
}
rmem = __find_rmem(target);
of_node_put(target);
if (!rmem || !rmem->ops || !rmem->ops->device_init)
return -EINVAL;
rd = kmalloc(sizeof(struct rmem_assigned_device), GFP_KERNEL);
if (!rd)
return -ENOMEM;
ret = rmem->ops->device_init(rmem, dev);
if (ret == 0) {
rd->dev = dev;
rd->rmem = rmem;
mutex_lock(&of_rmem_assigned_device_mutex);
list_add(&rd->list, &of_rmem_assigned_device_list);
mutex_unlock(&of_rmem_assigned_device_mutex);
dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
} else {
kfree(rd);
}
return ret;
}
EXPORT_SYMBOL_GPL(of_reserved_mem_device_init_by_idx);
/**
* of_reserved_mem_device_init_by_name() - assign named reserved memory region
* to given device
* @dev: pointer to the device to configure
* @np: pointer to the device node with 'memory-region' property
* @name: name of the selected memory region
*
* Returns: 0 on success or a negative error-code on failure.
*/
int of_reserved_mem_device_init_by_name(struct device *dev,
struct device_node *np,
const char *name)
{
int idx = of_property_match_string(np, "memory-region-names", name);
return of_reserved_mem_device_init_by_idx(dev, np, idx);
}
EXPORT_SYMBOL_GPL(of_reserved_mem_device_init_by_name);
/**
* of_reserved_mem_device_release() - release reserved memory device structures
* @dev: Pointer to the device to deconfigure
*
* This function releases structures allocated for memory region handling for
* the given device.
*/
void of_reserved_mem_device_release(struct device *dev)
{
struct rmem_assigned_device *rd, *tmp;
LIST_HEAD(release_list);
mutex_lock(&of_rmem_assigned_device_mutex);
list_for_each_entry_safe(rd, tmp, &of_rmem_assigned_device_list, list) {
if (rd->dev == dev)
list_move_tail(&rd->list, &release_list);
}
mutex_unlock(&of_rmem_assigned_device_mutex);
list_for_each_entry_safe(rd, tmp, &release_list, list) {
if (rd->rmem && rd->rmem->ops && rd->rmem->ops->device_release)
rd->rmem->ops->device_release(rd->rmem, dev);
kfree(rd);
}
}
EXPORT_SYMBOL_GPL(of_reserved_mem_device_release);
/**
* of_reserved_mem_lookup() - acquire reserved_mem from a device node
* @np: node pointer of the desired reserved-memory region
*
* This function allows drivers to acquire a reference to the reserved_mem
* struct based on a device node handle.
*
* Returns a reserved_mem reference, or NULL on error.
*/
struct reserved_mem *of_reserved_mem_lookup(struct device_node *np)
{
const char *name;
int i;
if (!np->full_name)
return NULL;
name = kbasename(np->full_name);
for (i = 0; i < reserved_mem_count; i++)
if (!strcmp(reserved_mem[i].name, name))
return &reserved_mem[i];
return NULL;
}
EXPORT_SYMBOL_GPL(of_reserved_mem_lookup);