dcdfdd40fa
UEFI Specification version 2.9 introduces the concept of memory acceptance. Some Virtual Machine platforms, such as Intel TDX or AMD SEV-SNP, require memory to be accepted before it can be used by the guest. Accepting happens via a protocol specific to the Virtual Machine platform. There are several ways the kernel can deal with unaccepted memory: 1. Accept all the memory during boot. It is easy to implement and it doesn't have runtime cost once the system is booted. The downside is very long boot time. Accept can be parallelized to multiple CPUs to keep it manageable (i.e. via DEFERRED_STRUCT_PAGE_INIT), but it tends to saturate memory bandwidth and does not scale beyond the point. 2. Accept a block of memory on the first use. It requires more infrastructure and changes in page allocator to make it work, but it provides good boot time. On-demand memory accept means latency spikes every time kernel steps onto a new memory block. The spikes will go away once workload data set size gets stabilized or all memory gets accepted. 3. Accept all memory in background. Introduce a thread (or multiple) that gets memory accepted proactively. It will minimize time the system experience latency spikes on memory allocation while keeping low boot time. This approach cannot function on its own. It is an extension of #2: background memory acceptance requires functional scheduler, but the page allocator may need to tap into unaccepted memory before that. The downside of the approach is that these threads also steal CPU cycles and memory bandwidth from the user's workload and may hurt user experience. Implement #1 and #2 for now. #2 is the default. Some workloads may want to use #1 with accept_memory=eager in kernel command line. #3 can be implemented later based on user's demands. Support of unaccepted memory requires a few changes in core-mm code: - memblock accepts memory on allocation. It serves early boot memory allocations and doesn't limit them to pre-accepted pool of memory. - page allocator accepts memory on the first allocation of the page. When kernel runs out of accepted memory, it accepts memory until the high watermark is reached. It helps to minimize fragmentation. EFI code will provide two helpers if the platform supports unaccepted memory: - accept_memory() makes a range of physical addresses accepted. - range_contains_unaccepted_memory() checks anything within the range of physical addresses requires acceptance. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Vlastimil Babka <vbabka@suse.cz> Acked-by: Mike Rapoport <rppt@linux.ibm.com> # memblock Link: https://lore.kernel.org/r/20230606142637.5171-2-kirill.shutemov@linux.intel.com
192 lines
6.1 KiB
C
192 lines
6.1 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#include <linux/fs.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/hugetlb.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/mmzone.h>
|
|
#include <linux/memblock.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/vmstat.h>
|
|
#include <linux/atomic.h>
|
|
#include <linux/vmalloc.h>
|
|
#ifdef CONFIG_CMA
|
|
#include <linux/cma.h>
|
|
#endif
|
|
#include <asm/page.h>
|
|
#include "internal.h"
|
|
|
|
void __attribute__((weak)) arch_report_meminfo(struct seq_file *m)
|
|
{
|
|
}
|
|
|
|
static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
|
|
{
|
|
seq_put_decimal_ull_width(m, s, num << (PAGE_SHIFT - 10), 8);
|
|
seq_write(m, " kB\n", 4);
|
|
}
|
|
|
|
static int meminfo_proc_show(struct seq_file *m, void *v)
|
|
{
|
|
struct sysinfo i;
|
|
unsigned long committed;
|
|
long cached;
|
|
long available;
|
|
unsigned long pages[NR_LRU_LISTS];
|
|
unsigned long sreclaimable, sunreclaim;
|
|
int lru;
|
|
|
|
si_meminfo(&i);
|
|
si_swapinfo(&i);
|
|
committed = vm_memory_committed();
|
|
|
|
cached = global_node_page_state(NR_FILE_PAGES) -
|
|
total_swapcache_pages() - i.bufferram;
|
|
if (cached < 0)
|
|
cached = 0;
|
|
|
|
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
|
|
pages[lru] = global_node_page_state(NR_LRU_BASE + lru);
|
|
|
|
available = si_mem_available();
|
|
sreclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B);
|
|
sunreclaim = global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B);
|
|
|
|
show_val_kb(m, "MemTotal: ", i.totalram);
|
|
show_val_kb(m, "MemFree: ", i.freeram);
|
|
show_val_kb(m, "MemAvailable: ", available);
|
|
show_val_kb(m, "Buffers: ", i.bufferram);
|
|
show_val_kb(m, "Cached: ", cached);
|
|
show_val_kb(m, "SwapCached: ", total_swapcache_pages());
|
|
show_val_kb(m, "Active: ", pages[LRU_ACTIVE_ANON] +
|
|
pages[LRU_ACTIVE_FILE]);
|
|
show_val_kb(m, "Inactive: ", pages[LRU_INACTIVE_ANON] +
|
|
pages[LRU_INACTIVE_FILE]);
|
|
show_val_kb(m, "Active(anon): ", pages[LRU_ACTIVE_ANON]);
|
|
show_val_kb(m, "Inactive(anon): ", pages[LRU_INACTIVE_ANON]);
|
|
show_val_kb(m, "Active(file): ", pages[LRU_ACTIVE_FILE]);
|
|
show_val_kb(m, "Inactive(file): ", pages[LRU_INACTIVE_FILE]);
|
|
show_val_kb(m, "Unevictable: ", pages[LRU_UNEVICTABLE]);
|
|
show_val_kb(m, "Mlocked: ", global_zone_page_state(NR_MLOCK));
|
|
|
|
#ifdef CONFIG_HIGHMEM
|
|
show_val_kb(m, "HighTotal: ", i.totalhigh);
|
|
show_val_kb(m, "HighFree: ", i.freehigh);
|
|
show_val_kb(m, "LowTotal: ", i.totalram - i.totalhigh);
|
|
show_val_kb(m, "LowFree: ", i.freeram - i.freehigh);
|
|
#endif
|
|
|
|
#ifndef CONFIG_MMU
|
|
show_val_kb(m, "MmapCopy: ",
|
|
(unsigned long)atomic_long_read(&mmap_pages_allocated));
|
|
#endif
|
|
|
|
show_val_kb(m, "SwapTotal: ", i.totalswap);
|
|
show_val_kb(m, "SwapFree: ", i.freeswap);
|
|
#ifdef CONFIG_ZSWAP
|
|
seq_printf(m, "Zswap: %8lu kB\n",
|
|
(unsigned long)(zswap_pool_total_size >> 10));
|
|
seq_printf(m, "Zswapped: %8lu kB\n",
|
|
(unsigned long)atomic_read(&zswap_stored_pages) <<
|
|
(PAGE_SHIFT - 10));
|
|
#endif
|
|
show_val_kb(m, "Dirty: ",
|
|
global_node_page_state(NR_FILE_DIRTY));
|
|
show_val_kb(m, "Writeback: ",
|
|
global_node_page_state(NR_WRITEBACK));
|
|
show_val_kb(m, "AnonPages: ",
|
|
global_node_page_state(NR_ANON_MAPPED));
|
|
show_val_kb(m, "Mapped: ",
|
|
global_node_page_state(NR_FILE_MAPPED));
|
|
show_val_kb(m, "Shmem: ", i.sharedram);
|
|
show_val_kb(m, "KReclaimable: ", sreclaimable +
|
|
global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE));
|
|
show_val_kb(m, "Slab: ", sreclaimable + sunreclaim);
|
|
show_val_kb(m, "SReclaimable: ", sreclaimable);
|
|
show_val_kb(m, "SUnreclaim: ", sunreclaim);
|
|
seq_printf(m, "KernelStack: %8lu kB\n",
|
|
global_node_page_state(NR_KERNEL_STACK_KB));
|
|
#ifdef CONFIG_SHADOW_CALL_STACK
|
|
seq_printf(m, "ShadowCallStack:%8lu kB\n",
|
|
global_node_page_state(NR_KERNEL_SCS_KB));
|
|
#endif
|
|
show_val_kb(m, "PageTables: ",
|
|
global_node_page_state(NR_PAGETABLE));
|
|
show_val_kb(m, "SecPageTables: ",
|
|
global_node_page_state(NR_SECONDARY_PAGETABLE));
|
|
|
|
show_val_kb(m, "NFS_Unstable: ", 0);
|
|
show_val_kb(m, "Bounce: ",
|
|
global_zone_page_state(NR_BOUNCE));
|
|
show_val_kb(m, "WritebackTmp: ",
|
|
global_node_page_state(NR_WRITEBACK_TEMP));
|
|
show_val_kb(m, "CommitLimit: ", vm_commit_limit());
|
|
show_val_kb(m, "Committed_AS: ", committed);
|
|
seq_printf(m, "VmallocTotal: %8lu kB\n",
|
|
(unsigned long)VMALLOC_TOTAL >> 10);
|
|
show_val_kb(m, "VmallocUsed: ", vmalloc_nr_pages());
|
|
show_val_kb(m, "VmallocChunk: ", 0ul);
|
|
show_val_kb(m, "Percpu: ", pcpu_nr_pages());
|
|
|
|
#ifdef CONFIG_MEMTEST
|
|
if (early_memtest_done) {
|
|
unsigned long early_memtest_bad_size_kb;
|
|
|
|
early_memtest_bad_size_kb = early_memtest_bad_size>>10;
|
|
if (early_memtest_bad_size && !early_memtest_bad_size_kb)
|
|
early_memtest_bad_size_kb = 1;
|
|
/* When 0 is reported, it means there actually was a successful test */
|
|
seq_printf(m, "EarlyMemtestBad: %5lu kB\n", early_memtest_bad_size_kb);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_MEMORY_FAILURE
|
|
seq_printf(m, "HardwareCorrupted: %5lu kB\n",
|
|
atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10));
|
|
#endif
|
|
|
|
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
|
show_val_kb(m, "AnonHugePages: ",
|
|
global_node_page_state(NR_ANON_THPS));
|
|
show_val_kb(m, "ShmemHugePages: ",
|
|
global_node_page_state(NR_SHMEM_THPS));
|
|
show_val_kb(m, "ShmemPmdMapped: ",
|
|
global_node_page_state(NR_SHMEM_PMDMAPPED));
|
|
show_val_kb(m, "FileHugePages: ",
|
|
global_node_page_state(NR_FILE_THPS));
|
|
show_val_kb(m, "FilePmdMapped: ",
|
|
global_node_page_state(NR_FILE_PMDMAPPED));
|
|
#endif
|
|
|
|
#ifdef CONFIG_CMA
|
|
show_val_kb(m, "CmaTotal: ", totalcma_pages);
|
|
show_val_kb(m, "CmaFree: ",
|
|
global_zone_page_state(NR_FREE_CMA_PAGES));
|
|
#endif
|
|
|
|
#ifdef CONFIG_UNACCEPTED_MEMORY
|
|
show_val_kb(m, "Unaccepted: ",
|
|
global_zone_page_state(NR_UNACCEPTED));
|
|
#endif
|
|
|
|
hugetlb_report_meminfo(m);
|
|
|
|
arch_report_meminfo(m);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int __init proc_meminfo_init(void)
|
|
{
|
|
struct proc_dir_entry *pde;
|
|
|
|
pde = proc_create_single("meminfo", 0, NULL, meminfo_proc_show);
|
|
pde_make_permanent(pde);
|
|
return 0;
|
|
}
|
|
fs_initcall(proc_meminfo_init);
|