8a725e4694
Currently, when adding memory, we create entries in /sys/firmware/memmap/ as "System RAM". This will lead to kexec-tools to add that memory to the fixed-up initial memmap for a kexec kernel (loaded via kexec_load()). The memory will be considered initial System RAM by the kexec'd kernel and can no longer be reconfigured. This is not what happens during a real reboot. Let's add our memory via add_memory_driver_managed() now, so we won't create entries in /sys/firmware/memmap/ and indicate the memory as "System RAM (kmem)" in /proc/iomem. This allows everybody (especially kexec-tools) to identify that this memory is special and has to be treated differently than ordinary (hotplugged) System RAM. Before configuring the namespace: [root@localhost ~]# cat /proc/iomem ... 140000000-33fffffff : Persistent Memory 140000000-33fffffff : namespace0.0 3280000000-32ffffffff : PCI Bus 0000:00 After configuring the namespace: [root@localhost ~]# cat /proc/iomem ... 140000000-33fffffff : Persistent Memory 140000000-1481fffff : namespace0.0 148200000-33fffffff : dax0.0 3280000000-32ffffffff : PCI Bus 0000:00 After loading kmem before this change: [root@localhost ~]# cat /proc/iomem ... 140000000-33fffffff : Persistent Memory 140000000-1481fffff : namespace0.0 150000000-33fffffff : dax0.0 150000000-33fffffff : System RAM 3280000000-32ffffffff : PCI Bus 0000:00 After loading kmem after this change: [root@localhost ~]# cat /proc/iomem ... 140000000-33fffffff : Persistent Memory 140000000-1481fffff : namespace0.0 150000000-33fffffff : dax0.0 150000000-33fffffff : System RAM (kmem) 3280000000-32ffffffff : PCI Bus 0000:00 After a proper reboot: [root@localhost ~]# cat /proc/iomem ... 140000000-33fffffff : Persistent Memory 140000000-1481fffff : namespace0.0 148200000-33fffffff : dax0.0 3280000000-32ffffffff : PCI Bus 0000:00 Within the kexec kernel before this change: [root@localhost ~]# cat /proc/iomem ... 140000000-33fffffff : Persistent Memory 140000000-1481fffff : namespace0.0 150000000-33fffffff : System RAM 3280000000-32ffffffff : PCI Bus 0000:00 Within the kexec kernel after this change: [root@localhost ~]# cat /proc/iomem ... 140000000-33fffffff : Persistent Memory 140000000-1481fffff : namespace0.0 148200000-33fffffff : dax0.0 3280000000-32ffffffff : PCI Bus 0000:00 /sys/firmware/memmap/ before this change: 0000000000000000-000000000009fc00 (System RAM) 000000000009fc00-00000000000a0000 (Reserved) 00000000000f0000-0000000000100000 (Reserved) 0000000000100000-00000000bffdf000 (System RAM) 00000000bffdf000-00000000c0000000 (Reserved) 00000000feffc000-00000000ff000000 (Reserved) 00000000fffc0000-0000000100000000 (Reserved) 0000000100000000-0000000140000000 (System RAM) 0000000150000000-0000000340000000 (System RAM) /sys/firmware/memmap/ after a proper reboot: 0000000000000000-000000000009fc00 (System RAM) 000000000009fc00-00000000000a0000 (Reserved) 00000000000f0000-0000000000100000 (Reserved) 0000000000100000-00000000bffdf000 (System RAM) 00000000bffdf000-00000000c0000000 (Reserved) 00000000feffc000-00000000ff000000 (Reserved) 00000000fffc0000-0000000100000000 (Reserved) 0000000100000000-0000000140000000 (System RAM) /sys/firmware/memmap/ after this change: 0000000000000000-000000000009fc00 (System RAM) 000000000009fc00-00000000000a0000 (Reserved) 00000000000f0000-0000000000100000 (Reserved) 0000000000100000-00000000bffdf000 (System RAM) 00000000bffdf000-00000000c0000000 (Reserved) 00000000feffc000-00000000ff000000 (Reserved) 00000000fffc0000-0000000100000000 (Reserved) 0000000100000000-0000000140000000 (System RAM) kexec-tools already seem to basically ignore any System RAM that's not on top level when searching for areas to place kexec images - but also for determining crash areas to dump via kdump. Changing the resource name won't have an impact. Handle unloading of the driver after memory hotremove failed properly, by duplicating the string if necessary. Signed-off-by: David Hildenbrand <david@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Acked-by: Pankaj Gupta <pankaj.gupta.linux@gmail.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Pankaj Gupta <pankaj.gupta.linux@gmail.com> Cc: Wei Yang <richard.weiyang@gmail.com> Cc: Baoquan He <bhe@redhat.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: Eric Biederman <ebiederm@xmission.com> Cc: Pavel Tatashin <pasha.tatashin@soleen.com> Cc: Dan Williams <dan.j.williams@intel.com> Link: http://lkml.kernel.org/r/20200508084217.9160-5-david@redhat.com Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
63 lines
1.8 KiB
C
63 lines
1.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright(c) 2016 Intel Corporation. All rights reserved.
|
|
*/
|
|
#ifndef __DAX_PRIVATE_H__
|
|
#define __DAX_PRIVATE_H__
|
|
|
|
#include <linux/device.h>
|
|
#include <linux/cdev.h>
|
|
|
|
/* private routines between core files */
|
|
struct dax_device;
|
|
struct dax_device *inode_dax(struct inode *inode);
|
|
struct inode *dax_inode(struct dax_device *dax_dev);
|
|
int dax_bus_init(void);
|
|
void dax_bus_exit(void);
|
|
|
|
/**
|
|
* struct dax_region - mapping infrastructure for dax devices
|
|
* @id: kernel-wide unique region for a memory range
|
|
* @target_node: effective numa node if this memory range is onlined
|
|
* @kref: to pin while other agents have a need to do lookups
|
|
* @dev: parent device backing this region
|
|
* @align: allocation and mapping alignment for child dax devices
|
|
* @res: physical address range of the region
|
|
* @pfn_flags: identify whether the pfns are paged back or not
|
|
*/
|
|
struct dax_region {
|
|
int id;
|
|
int target_node;
|
|
struct kref kref;
|
|
struct device *dev;
|
|
unsigned int align;
|
|
struct resource res;
|
|
unsigned long long pfn_flags;
|
|
};
|
|
|
|
/**
|
|
* struct dev_dax - instance data for a subdivision of a dax region, and
|
|
* data while the device is activated in the driver.
|
|
* @region - parent region
|
|
* @dax_dev - core dax functionality
|
|
* @target_node: effective numa node if dev_dax memory range is onlined
|
|
* @dev - device core
|
|
* @pgmap - pgmap for memmap setup / lifetime (driver owned)
|
|
* @dax_mem_res: physical address range of hotadded DAX memory
|
|
* @dax_mem_name: name for hotadded DAX memory via add_memory_driver_managed()
|
|
*/
|
|
struct dev_dax {
|
|
struct dax_region *region;
|
|
struct dax_device *dax_dev;
|
|
int target_node;
|
|
struct device dev;
|
|
struct dev_pagemap pgmap;
|
|
struct resource *dax_kmem_res;
|
|
};
|
|
|
|
static inline struct dev_dax *to_dev_dax(struct device *dev)
|
|
{
|
|
return container_of(dev, struct dev_dax, dev);
|
|
}
|
|
#endif
|