proc/vmcore: convert oldmem_pfn_is_ram callback to more generic vmcore callbacks
Let's support multiple registered callbacks, making sure that registering vmcore callbacks cannot fail. Make the callback return a bool instead of an int, handling how to deal with errors internally. Drop unused HAVE_OLDMEM_PFN_IS_RAM. We soon want to make use of this infrastructure from other drivers: virtio-mem, registering one callback for each virtio-mem device, to prevent reading unplugged virtio-mem memory. Handle it via a generic vmcore_cb structure, prepared for future extensions: for example, once we support virtio-mem on s390x where the vmcore is completely constructed in the second kernel, we want to detect and add plugged virtio-mem memory ranges to the vmcore in order for them to get dumped properly. Handle corner cases that are unexpected and shouldn't happen in sane setups: registering a callback after the vmcore has already been opened (warn only) and unregistering a callback after the vmcore has already been opened (warn and essentially read only zeroes from that point on). Link: https://lkml.kernel.org/r/20211005121430.30136-6-david@redhat.com Signed-off-by: David Hildenbrand <david@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Dave Young <dyoung@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: Michal Hocko <mhocko@suse.com> Cc: Mike Rapoport <rppt@kernel.org> Cc: Oscar Salvador <osalvador@suse.de> Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
2c9feeaedf
commit
cc5f2704c9
@ -73,12 +73,23 @@ static int gart_mem_pfn_is_ram(unsigned long pfn)
|
||||
(pfn >= aperture_pfn_start + aperture_page_count));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PROC_VMCORE
|
||||
static bool gart_oldmem_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
|
||||
{
|
||||
return !!gart_mem_pfn_is_ram(pfn);
|
||||
}
|
||||
|
||||
static struct vmcore_cb gart_vmcore_cb = {
|
||||
.pfn_is_ram = gart_oldmem_pfn_is_ram,
|
||||
};
|
||||
#endif
|
||||
|
||||
static void __init exclude_from_core(u64 aper_base, u32 aper_order)
|
||||
{
|
||||
aperture_pfn_start = aper_base >> PAGE_SHIFT;
|
||||
aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT;
|
||||
#ifdef CONFIG_PROC_VMCORE
|
||||
WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram));
|
||||
register_vmcore_cb(&gart_vmcore_cb);
|
||||
#endif
|
||||
#ifdef CONFIG_PROC_KCORE
|
||||
WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram));
|
||||
|
@ -12,10 +12,10 @@
|
||||
* The kdump kernel has to check whether a pfn of the crashed kernel
|
||||
* was a ballooned page. vmcore is using this function to decide
|
||||
* whether to access a pfn of the crashed kernel.
|
||||
* Returns 0 if the pfn is not backed by a RAM page, the caller may
|
||||
* Returns "false" if the pfn is not backed by a RAM page, the caller may
|
||||
* handle the pfn special in this case.
|
||||
*/
|
||||
static int xen_oldmem_pfn_is_ram(unsigned long pfn)
|
||||
static bool xen_vmcore_pfn_is_ram(struct vmcore_cb *cb, unsigned long pfn)
|
||||
{
|
||||
struct xen_hvm_get_mem_type a = {
|
||||
.domid = DOMID_SELF,
|
||||
@ -24,10 +24,13 @@ static int xen_oldmem_pfn_is_ram(unsigned long pfn)
|
||||
|
||||
if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a)) {
|
||||
pr_warn_once("Unexpected HVMOP_get_mem_type failure\n");
|
||||
return -ENXIO;
|
||||
return true;
|
||||
}
|
||||
return a.mem_type != HVMMEM_mmio_dm;
|
||||
}
|
||||
static struct vmcore_cb xen_vmcore_cb = {
|
||||
.pfn_is_ram = xen_vmcore_pfn_is_ram,
|
||||
};
|
||||
#endif
|
||||
|
||||
static void xen_hvm_exit_mmap(struct mm_struct *mm)
|
||||
@ -61,6 +64,6 @@ void __init xen_hvm_init_mmu_ops(void)
|
||||
if (is_pagetable_dying_supported())
|
||||
pv_ops.mmu.exit_mmap = xen_hvm_exit_mmap;
|
||||
#ifdef CONFIG_PROC_VMCORE
|
||||
WARN_ON(register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram));
|
||||
register_vmcore_cb(&xen_vmcore_cb);
|
||||
#endif
|
||||
}
|
||||
|
@ -62,46 +62,75 @@ core_param(novmcoredd, vmcoredd_disabled, bool, 0);
|
||||
/* Device Dump Size */
|
||||
static size_t vmcoredd_orig_sz;
|
||||
|
||||
/*
|
||||
* Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error
|
||||
* The called function has to take care of module refcounting.
|
||||
*/
|
||||
static int (*oldmem_pfn_is_ram)(unsigned long pfn);
|
||||
static DECLARE_RWSEM(vmcore_cb_rwsem);
|
||||
/* List of registered vmcore callbacks. */
|
||||
static LIST_HEAD(vmcore_cb_list);
|
||||
/* Whether we had a surprise unregistration of a callback. */
|
||||
static bool vmcore_cb_unstable;
|
||||
/* Whether the vmcore has been opened once. */
|
||||
static bool vmcore_opened;
|
||||
|
||||
int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn))
|
||||
void register_vmcore_cb(struct vmcore_cb *cb)
|
||||
{
|
||||
if (oldmem_pfn_is_ram)
|
||||
return -EBUSY;
|
||||
oldmem_pfn_is_ram = fn;
|
||||
return 0;
|
||||
down_write(&vmcore_cb_rwsem);
|
||||
INIT_LIST_HEAD(&cb->next);
|
||||
list_add_tail(&cb->next, &vmcore_cb_list);
|
||||
/*
|
||||
* Registering a vmcore callback after the vmcore was opened is
|
||||
* very unusual (e.g., manual driver loading).
|
||||
*/
|
||||
if (vmcore_opened)
|
||||
pr_warn_once("Unexpected vmcore callback registration\n");
|
||||
up_write(&vmcore_cb_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_oldmem_pfn_is_ram);
|
||||
EXPORT_SYMBOL_GPL(register_vmcore_cb);
|
||||
|
||||
void unregister_oldmem_pfn_is_ram(void)
|
||||
void unregister_vmcore_cb(struct vmcore_cb *cb)
|
||||
{
|
||||
oldmem_pfn_is_ram = NULL;
|
||||
wmb();
|
||||
down_write(&vmcore_cb_rwsem);
|
||||
list_del(&cb->next);
|
||||
/*
|
||||
* Unregistering a vmcore callback after the vmcore was opened is
|
||||
* very unusual (e.g., forced driver removal), but we cannot stop
|
||||
* unregistering.
|
||||
*/
|
||||
if (vmcore_opened) {
|
||||
pr_warn_once("Unexpected vmcore callback unregistration\n");
|
||||
vmcore_cb_unstable = true;
|
||||
}
|
||||
up_write(&vmcore_cb_rwsem);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unregister_oldmem_pfn_is_ram);
|
||||
EXPORT_SYMBOL_GPL(unregister_vmcore_cb);
|
||||
|
||||
static bool pfn_is_ram(unsigned long pfn)
|
||||
{
|
||||
int (*fn)(unsigned long pfn);
|
||||
/* pfn is ram unless fn() checks pagetype */
|
||||
struct vmcore_cb *cb;
|
||||
bool ret = true;
|
||||
|
||||
/*
|
||||
* Ask hypervisor if the pfn is really ram.
|
||||
* A ballooned page contains no data and reading from such a page
|
||||
* will cause high load in the hypervisor.
|
||||
*/
|
||||
fn = oldmem_pfn_is_ram;
|
||||
if (fn)
|
||||
ret = !!fn(pfn);
|
||||
lockdep_assert_held_read(&vmcore_cb_rwsem);
|
||||
if (unlikely(vmcore_cb_unstable))
|
||||
return false;
|
||||
|
||||
list_for_each_entry(cb, &vmcore_cb_list, next) {
|
||||
if (unlikely(!cb->pfn_is_ram))
|
||||
continue;
|
||||
ret = cb->pfn_is_ram(cb, pfn);
|
||||
if (!ret)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int open_vmcore(struct inode *inode, struct file *file)
|
||||
{
|
||||
down_read(&vmcore_cb_rwsem);
|
||||
vmcore_opened = true;
|
||||
up_read(&vmcore_cb_rwsem);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reads a page from the oldmem device from given offset. */
|
||||
ssize_t read_from_oldmem(char *buf, size_t count,
|
||||
u64 *ppos, int userbuf,
|
||||
@ -117,6 +146,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
|
||||
offset = (unsigned long)(*ppos % PAGE_SIZE);
|
||||
pfn = (unsigned long)(*ppos / PAGE_SIZE);
|
||||
|
||||
down_read(&vmcore_cb_rwsem);
|
||||
do {
|
||||
if (count > (PAGE_SIZE - offset))
|
||||
nr_bytes = PAGE_SIZE - offset;
|
||||
@ -136,8 +166,10 @@ ssize_t read_from_oldmem(char *buf, size_t count,
|
||||
tmp = copy_oldmem_page(pfn, buf, nr_bytes,
|
||||
offset, userbuf);
|
||||
|
||||
if (tmp < 0)
|
||||
if (tmp < 0) {
|
||||
up_read(&vmcore_cb_rwsem);
|
||||
return tmp;
|
||||
}
|
||||
}
|
||||
*ppos += nr_bytes;
|
||||
count -= nr_bytes;
|
||||
@ -147,6 +179,7 @@ ssize_t read_from_oldmem(char *buf, size_t count,
|
||||
offset = 0;
|
||||
} while (count);
|
||||
|
||||
up_read(&vmcore_cb_rwsem);
|
||||
return read;
|
||||
}
|
||||
|
||||
@ -537,14 +570,19 @@ static int vmcore_remap_oldmem_pfn(struct vm_area_struct *vma,
|
||||
unsigned long from, unsigned long pfn,
|
||||
unsigned long size, pgprot_t prot)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Check if oldmem_pfn_is_ram was registered to avoid
|
||||
* looping over all pages without a reason.
|
||||
*/
|
||||
if (oldmem_pfn_is_ram)
|
||||
return remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
|
||||
down_read(&vmcore_cb_rwsem);
|
||||
if (!list_empty(&vmcore_cb_list) || vmcore_cb_unstable)
|
||||
ret = remap_oldmem_pfn_checked(vma, from, pfn, size, prot);
|
||||
else
|
||||
return remap_oldmem_pfn_range(vma, from, pfn, size, prot);
|
||||
ret = remap_oldmem_pfn_range(vma, from, pfn, size, prot);
|
||||
up_read(&vmcore_cb_rwsem);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
|
||||
@ -668,6 +706,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
|
||||
#endif
|
||||
|
||||
static const struct proc_ops vmcore_proc_ops = {
|
||||
.proc_open = open_vmcore,
|
||||
.proc_read = read_vmcore,
|
||||
.proc_lseek = default_llseek,
|
||||
.proc_mmap = mmap_vmcore,
|
||||
|
@ -91,9 +91,29 @@ static inline void vmcore_unusable(void)
|
||||
elfcorehdr_addr = ELFCORE_ADDR_ERR;
|
||||
}
|
||||
|
||||
#define HAVE_OLDMEM_PFN_IS_RAM 1
|
||||
extern int register_oldmem_pfn_is_ram(int (*fn)(unsigned long pfn));
|
||||
extern void unregister_oldmem_pfn_is_ram(void);
|
||||
/**
|
||||
* struct vmcore_cb - driver callbacks for /proc/vmcore handling
|
||||
* @pfn_is_ram: check whether a PFN really is RAM and should be accessed when
|
||||
* reading the vmcore. Will return "true" if it is RAM or if the
|
||||
* callback cannot tell. If any callback returns "false", it's not
|
||||
* RAM and the page must not be accessed; zeroes should be
|
||||
* indicated in the vmcore instead. For example, a ballooned page
|
||||
* contains no data and reading from such a page will cause high
|
||||
* load in the hypervisor.
|
||||
* @next: List head to manage registered callbacks internally; initialized by
|
||||
* register_vmcore_cb().
|
||||
*
|
||||
* vmcore callbacks allow drivers managing physical memory ranges to
|
||||
* coordinate with vmcore handling code, for example, to prevent accessing
|
||||
* physical memory ranges that should not be accessed when reading the vmcore,
|
||||
* although included in the vmcore header as memory ranges to dump.
|
||||
*/
|
||||
struct vmcore_cb {
|
||||
bool (*pfn_is_ram)(struct vmcore_cb *cb, unsigned long pfn);
|
||||
struct list_head next;
|
||||
};
|
||||
extern void register_vmcore_cb(struct vmcore_cb *cb);
|
||||
extern void unregister_vmcore_cb(struct vmcore_cb *cb);
|
||||
|
||||
#else /* !CONFIG_CRASH_DUMP */
|
||||
static inline bool is_kdump_kernel(void) { return 0; }
|
||||
|
Loading…
x
Reference in New Issue
Block a user