Merge branch 'akpm' (patches from Andrew)
Merge misc fixes from Andrew Morton: "28 patches. Subsystems affected by this series: mm (memblock, pagealloc, hugetlb, highmem, kfence, oom-kill, madvise, kasan, userfaultfd, memcg, and zram), core-kernel, kconfig, fork, binfmt, MAINTAINERS, kbuild, and ia64" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (28 commits) zram: fix broken page writeback zram: fix return value on writeback_store mm/memcg: set memcg when splitting page mm/memcg: rename mem_cgroup_split_huge_fixup to split_page_memcg and add nr_pages argument ia64: fix ptrace(PTRACE_SYSCALL_INFO_EXIT) sign ia64: fix ia64_syscall_get_set_arguments() for break-based syscalls mm/userfaultfd: fix memory corruption due to writeprotect kasan: fix KASAN_STACK dependency for HW_TAGS kasan, mm: fix crash with HW_TAGS and DEBUG_PAGEALLOC mm/madvise: replace ptrace attach requirement for process_madvise include/linux/sched/mm.h: use rcu_dereference in in_vfork() kfence: fix reports if constant function prefixes exist kfence, slab: fix cache_alloc_debugcheck_after() for bulk allocations kfence: fix printk format for ptrdiff_t linux/compiler-clang.h: define HAVE_BUILTIN_BSWAP* MAINTAINERS: exclude uapi directories in API/ABI section binfmt_misc: fix possible deadlock in bm_register_write mm/highmem.c: fix zero_user_segments() with start > end hugetlb: do early cow when page pinned on src mm mm: use is_cow_mapping() across tree where proper ...
This commit is contained in:
commit
50eb842fe5
@ -261,8 +261,8 @@ ABI/API
|
||||
L: linux-api@vger.kernel.org
|
||||
F: include/linux/syscalls.h
|
||||
F: kernel/sys_ni.c
|
||||
F: include/uapi/
|
||||
F: arch/*/include/uapi/
|
||||
X: include/uapi/
|
||||
X: arch/*/include/uapi/
|
||||
|
||||
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
|
||||
M: Hans de Goede <hdegoede@redhat.com>
|
||||
|
@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task,
|
||||
static inline long syscall_get_error(struct task_struct *task,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
return regs->r10 == -1 ? regs->r8:0;
|
||||
return regs->r10 == -1 ? -regs->r8:0;
|
||||
}
|
||||
|
||||
static inline long syscall_get_return_value(struct task_struct *task,
|
||||
|
@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
|
||||
{
|
||||
struct syscall_get_set_args *args = data;
|
||||
struct pt_regs *pt = args->regs;
|
||||
unsigned long *krbs, cfm, ndirty;
|
||||
unsigned long *krbs, cfm, ndirty, nlocals, nouts;
|
||||
int i, count;
|
||||
|
||||
if (unw_unwind_to_user(info) < 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We get here via a few paths:
|
||||
* - break instruction: cfm is shared with caller.
|
||||
* syscall args are in out= regs, locals are non-empty.
|
||||
* - epsinstruction: cfm is set by br.call
|
||||
* locals don't exist.
|
||||
*
|
||||
* For both cases argguments are reachable in cfm.sof - cfm.sol.
|
||||
* CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ]
|
||||
*/
|
||||
cfm = pt->cr_ifs;
|
||||
nlocals = (cfm >> 7) & 0x7f; /* aka sol */
|
||||
nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */
|
||||
krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
|
||||
ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
|
||||
|
||||
count = 0;
|
||||
if (in_syscall(pt))
|
||||
count = min_t(int, args->n, cfm & 0x7f);
|
||||
count = min_t(int, args->n, nouts);
|
||||
|
||||
/* Iterate over outs. */
|
||||
for (i = 0; i < count; i++) {
|
||||
int j = ndirty + nlocals + i + args->i;
|
||||
if (args->rw)
|
||||
*ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
|
||||
args->args[i];
|
||||
*ia64_rse_skip_regs(krbs, j) = args->args[i];
|
||||
else
|
||||
args->args[i] = *ia64_rse_skip_regs(krbs,
|
||||
ndirty + i + args->i);
|
||||
args->args[i] = *ia64_rse_skip_regs(krbs, j);
|
||||
}
|
||||
|
||||
if (!args->rw) {
|
||||
|
@ -627,7 +627,7 @@ static ssize_t writeback_store(struct device *dev,
|
||||
struct bio_vec bio_vec;
|
||||
struct page *page;
|
||||
ssize_t ret = len;
|
||||
int mode;
|
||||
int mode, err;
|
||||
unsigned long blk_idx = 0;
|
||||
|
||||
if (sysfs_streq(buf, "idle"))
|
||||
@ -638,8 +638,8 @@ static ssize_t writeback_store(struct device *dev,
|
||||
if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index);
|
||||
if (ret || index >= nr_pages)
|
||||
if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
|
||||
index >= nr_pages)
|
||||
return -EINVAL;
|
||||
|
||||
nr_pages = 1;
|
||||
@ -663,7 +663,7 @@ static ssize_t writeback_store(struct device *dev,
|
||||
goto release_init_lock;
|
||||
}
|
||||
|
||||
while (nr_pages--) {
|
||||
for (; nr_pages != 0; index++, nr_pages--) {
|
||||
struct bio_vec bvec;
|
||||
|
||||
bvec.bv_page = page;
|
||||
@ -728,12 +728,17 @@ static ssize_t writeback_store(struct device *dev,
|
||||
* XXX: A single page IO would be inefficient for write
|
||||
* but it would be not bad as starter.
|
||||
*/
|
||||
ret = submit_bio_wait(&bio);
|
||||
if (ret) {
|
||||
err = submit_bio_wait(&bio);
|
||||
if (err) {
|
||||
zram_slot_lock(zram, index);
|
||||
zram_clear_flag(zram, index, ZRAM_UNDER_WB);
|
||||
zram_clear_flag(zram, index, ZRAM_IDLE);
|
||||
zram_slot_unlock(zram, index);
|
||||
/*
|
||||
* Return last IO error unless every IO were
|
||||
* not suceeded.
|
||||
*/
|
||||
ret = err;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -500,8 +500,6 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
|
||||
vm_fault_t ret;
|
||||
pgoff_t fault_page_size;
|
||||
bool write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
bool is_cow_mapping =
|
||||
(vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
|
||||
switch (pe_size) {
|
||||
case PE_SIZE_PMD:
|
||||
@ -518,7 +516,7 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
|
||||
}
|
||||
|
||||
/* Always do write dirty-tracking and COW on PTE level. */
|
||||
if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
|
||||
if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
|
||||
return VM_FAULT_FALLBACK;
|
||||
|
||||
ret = ttm_bo_vm_reserve(bo, vmf);
|
||||
|
@ -49,7 +49,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
vma->vm_ops = &vmw_vm_ops;
|
||||
|
||||
/* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */
|
||||
if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE)
|
||||
if (!is_cow_mapping(vma->vm_flags))
|
||||
vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
|
||||
|
||||
return 0;
|
||||
|
@ -649,12 +649,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct dentry *root = sb->s_root, *dentry;
|
||||
int err = 0;
|
||||
struct file *f = NULL;
|
||||
|
||||
e = create_entry(buffer, count);
|
||||
|
||||
if (IS_ERR(e))
|
||||
return PTR_ERR(e);
|
||||
|
||||
if (e->flags & MISC_FMT_OPEN_FILE) {
|
||||
f = open_exec(e->interpreter);
|
||||
if (IS_ERR(f)) {
|
||||
pr_notice("register: failed to install interpreter file %s\n",
|
||||
e->interpreter);
|
||||
kfree(e);
|
||||
return PTR_ERR(f);
|
||||
}
|
||||
e->interp_file = f;
|
||||
}
|
||||
|
||||
inode_lock(d_inode(root));
|
||||
dentry = lookup_one_len(e->name, root, strlen(e->name));
|
||||
err = PTR_ERR(dentry);
|
||||
@ -678,21 +690,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
|
||||
goto out2;
|
||||
}
|
||||
|
||||
if (e->flags & MISC_FMT_OPEN_FILE) {
|
||||
struct file *f;
|
||||
|
||||
f = open_exec(e->interpreter);
|
||||
if (IS_ERR(f)) {
|
||||
err = PTR_ERR(f);
|
||||
pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
|
||||
simple_release_fs(&bm_mnt, &entry_count);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
goto out2;
|
||||
}
|
||||
e->interp_file = f;
|
||||
}
|
||||
|
||||
e->dentry = dget(dentry);
|
||||
inode->i_private = e;
|
||||
inode->i_fop = &bm_entry_operations;
|
||||
@ -709,6 +706,8 @@ out:
|
||||
inode_unlock(d_inode(root));
|
||||
|
||||
if (err) {
|
||||
if (f)
|
||||
filp_close(f, NULL);
|
||||
kfree(e);
|
||||
return err;
|
||||
}
|
||||
|
@ -1036,8 +1036,6 @@ struct clear_refs_private {
|
||||
|
||||
#ifdef CONFIG_MEM_SOFT_DIRTY
|
||||
|
||||
#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE)
|
||||
|
||||
static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
|
||||
{
|
||||
struct page *page;
|
||||
|
@ -31,6 +31,12 @@
|
||||
#define __no_sanitize_thread
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
|
||||
#define __HAVE_BUILTIN_BSWAP32__
|
||||
#define __HAVE_BUILTIN_BSWAP64__
|
||||
#define __HAVE_BUILTIN_BSWAP16__
|
||||
#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
|
||||
|
||||
#if __has_feature(undefined_behavior_sanitizer)
|
||||
/* GCC does not have __SANITIZE_UNDEFINED__ */
|
||||
#define __no_sanitize_undefined \
|
||||
|
@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size)
|
||||
/*
|
||||
* Set the allocation direction to bottom-up or top-down.
|
||||
*/
|
||||
static inline void memblock_set_bottom_up(bool enable)
|
||||
static inline __init void memblock_set_bottom_up(bool enable)
|
||||
{
|
||||
memblock.bottom_up = enable;
|
||||
}
|
||||
@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable)
|
||||
* if this is true, that said, memblock will allocate memory
|
||||
* in bottom-up direction.
|
||||
*/
|
||||
static inline bool memblock_bottom_up(void)
|
||||
static inline __init bool memblock_bottom_up(void)
|
||||
{
|
||||
return memblock.bottom_up;
|
||||
}
|
||||
|
@ -1061,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
void mem_cgroup_split_huge_fixup(struct page *head);
|
||||
#endif
|
||||
void split_page_memcg(struct page *head, unsigned int nr);
|
||||
|
||||
#else /* CONFIG_MEMCG */
|
||||
|
||||
@ -1400,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
static inline void split_page_memcg(struct page *head, unsigned int nr)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -1300,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page)
|
||||
GUP_PIN_COUNTING_BIAS;
|
||||
}
|
||||
|
||||
static inline bool is_cow_mapping(vm_flags_t flags)
|
||||
{
|
||||
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should most likely only be called during fork() to see whether we
|
||||
* should break the cow immediately for a page on the src mm.
|
||||
*/
|
||||
static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
|
||||
struct page *page)
|
||||
{
|
||||
if (!is_cow_mapping(vma->vm_flags))
|
||||
return false;
|
||||
|
||||
if (!atomic_read(&vma->vm_mm->has_pinned))
|
||||
return false;
|
||||
|
||||
return page_maybe_dma_pinned(page);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
|
||||
#define SECTION_IN_PAGE_FLAGS
|
||||
#endif
|
||||
|
@ -23,6 +23,7 @@
|
||||
#endif
|
||||
#define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
|
||||
|
||||
#define INIT_PASID 0
|
||||
|
||||
struct address_space;
|
||||
struct mem_cgroup;
|
||||
|
@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk)
|
||||
* another oom-unkillable task does this it should blame itself.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
|
||||
ret = tsk->vfork_done &&
|
||||
rcu_dereference(tsk->real_parent)->mm == tsk->mm;
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
|
@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus);
|
||||
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
|
||||
|
||||
static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
|
||||
static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
{
|
||||
unsigned long flags;
|
||||
@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int stop_machine(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
static __always_inline int
|
||||
stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
|
||||
{
|
||||
return stop_machine_cpuslocked(fn, data, cpus);
|
||||
}
|
||||
|
||||
static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
static __always_inline int
|
||||
stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
|
||||
const struct cpumask *cpus)
|
||||
{
|
||||
return stop_machine(fn, data, cpus);
|
||||
}
|
||||
|
@ -119,8 +119,7 @@ config INIT_ENV_ARG_LIMIT
|
||||
|
||||
config COMPILE_TEST
|
||||
bool "Compile also drivers which will not load"
|
||||
depends on !UML && !S390
|
||||
default n
|
||||
depends on HAS_IOMEM
|
||||
help
|
||||
Some drivers can be compiled on a different platform than they are
|
||||
intended to be run on. Despite they cannot be loaded there (or even
|
||||
|
@ -994,6 +994,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mm_init_pasid(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_IOMMU_SUPPORT
|
||||
mm->pasid = INIT_PASID;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void mm_init_uprobes_state(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_UPROBES
|
||||
@ -1024,6 +1031,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
|
||||
mm_init_cpumask(mm);
|
||||
mm_init_aio(mm);
|
||||
mm_init_owner(mm, p);
|
||||
mm_init_pasid(mm);
|
||||
RCU_INIT_POINTER(mm->exe_file, NULL);
|
||||
mmu_notifier_subscriptions_init(mm);
|
||||
init_tlb_flush_pending(mm);
|
||||
|
@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE
|
||||
|
||||
config KASAN_STACK
|
||||
int
|
||||
depends on KASAN_GENERIC || KASAN_SW_TAGS
|
||||
default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
|
||||
default 0
|
||||
|
||||
|
17
mm/highmem.c
17
mm/highmem.c
@ -368,20 +368,24 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
|
||||
|
||||
BUG_ON(end1 > page_size(page) || end2 > page_size(page));
|
||||
|
||||
if (start1 >= end1)
|
||||
start1 = end1 = 0;
|
||||
if (start2 >= end2)
|
||||
start2 = end2 = 0;
|
||||
|
||||
for (i = 0; i < compound_nr(page); i++) {
|
||||
void *kaddr = NULL;
|
||||
|
||||
if (start1 < PAGE_SIZE || start2 < PAGE_SIZE)
|
||||
kaddr = kmap_atomic(page + i);
|
||||
|
||||
if (start1 >= PAGE_SIZE) {
|
||||
start1 -= PAGE_SIZE;
|
||||
end1 -= PAGE_SIZE;
|
||||
} else {
|
||||
unsigned this_end = min_t(unsigned, end1, PAGE_SIZE);
|
||||
|
||||
if (end1 > start1)
|
||||
if (end1 > start1) {
|
||||
kaddr = kmap_atomic(page + i);
|
||||
memset(kaddr + start1, 0, this_end - start1);
|
||||
}
|
||||
end1 -= this_end;
|
||||
start1 = 0;
|
||||
}
|
||||
@ -392,8 +396,11 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
|
||||
} else {
|
||||
unsigned this_end = min_t(unsigned, end2, PAGE_SIZE);
|
||||
|
||||
if (end2 > start2)
|
||||
if (end2 > start2) {
|
||||
if (!kaddr)
|
||||
kaddr = kmap_atomic(page + i);
|
||||
memset(kaddr + start2, 0, this_end - start2);
|
||||
}
|
||||
end2 -= this_end;
|
||||
start2 = 0;
|
||||
}
|
||||
|
@ -1100,9 +1100,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
* best effort that the pinned pages won't be replaced by another
|
||||
* random page during the coming copy-on-write.
|
||||
*/
|
||||
if (unlikely(is_cow_mapping(vma->vm_flags) &&
|
||||
atomic_read(&src_mm->has_pinned) &&
|
||||
page_maybe_dma_pinned(src_page))) {
|
||||
if (unlikely(page_needs_cow_for_dma(vma, src_page))) {
|
||||
pte_free(dst_mm, pgtable);
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
@ -1214,9 +1212,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
|
||||
}
|
||||
|
||||
/* Please refer to comments in copy_huge_pmd() */
|
||||
if (unlikely(is_cow_mapping(vma->vm_flags) &&
|
||||
atomic_read(&src_mm->has_pinned) &&
|
||||
page_maybe_dma_pinned(pud_page(pud)))) {
|
||||
if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) {
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
__split_huge_pud(vma, src_pud, addr);
|
||||
@ -2471,7 +2467,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
|
||||
int i;
|
||||
|
||||
/* complete memcg works before add pages to LRU */
|
||||
mem_cgroup_split_huge_fixup(head);
|
||||
split_page_memcg(head, nr);
|
||||
|
||||
if (PageAnon(head) && PageSwapCache(head)) {
|
||||
swp_entry_t entry = { .val = page_private(head) };
|
||||
|
123
mm/hugetlb.c
123
mm/hugetlb.c
@ -331,6 +331,24 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
|
||||
}
|
||||
}
|
||||
|
||||
static inline long
|
||||
hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from,
|
||||
long to, struct hstate *h, struct hugetlb_cgroup *cg,
|
||||
long *regions_needed)
|
||||
{
|
||||
struct file_region *nrg;
|
||||
|
||||
if (!regions_needed) {
|
||||
nrg = get_file_region_entry_from_cache(map, from, to);
|
||||
record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg);
|
||||
list_add(&nrg->link, rg->link.prev);
|
||||
coalesce_file_region(map, nrg);
|
||||
} else
|
||||
*regions_needed += 1;
|
||||
|
||||
return to - from;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be called with resv->lock held.
|
||||
*
|
||||
@ -346,7 +364,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
|
||||
long add = 0;
|
||||
struct list_head *head = &resv->regions;
|
||||
long last_accounted_offset = f;
|
||||
struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
|
||||
struct file_region *rg = NULL, *trg = NULL;
|
||||
|
||||
if (regions_needed)
|
||||
*regions_needed = 0;
|
||||
@ -369,24 +387,17 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
|
||||
/* When we find a region that starts beyond our range, we've
|
||||
* finished.
|
||||
*/
|
||||
if (rg->from > t)
|
||||
if (rg->from >= t)
|
||||
break;
|
||||
|
||||
/* Add an entry for last_accounted_offset -> rg->from, and
|
||||
* update last_accounted_offset.
|
||||
*/
|
||||
if (rg->from > last_accounted_offset) {
|
||||
add += rg->from - last_accounted_offset;
|
||||
if (!regions_needed) {
|
||||
nrg = get_file_region_entry_from_cache(
|
||||
resv, last_accounted_offset, rg->from);
|
||||
record_hugetlb_cgroup_uncharge_info(h_cg, h,
|
||||
resv, nrg);
|
||||
list_add(&nrg->link, rg->link.prev);
|
||||
coalesce_file_region(resv, nrg);
|
||||
} else
|
||||
*regions_needed += 1;
|
||||
}
|
||||
if (rg->from > last_accounted_offset)
|
||||
add += hugetlb_resv_map_add(resv, rg,
|
||||
last_accounted_offset,
|
||||
rg->from, h, h_cg,
|
||||
regions_needed);
|
||||
|
||||
last_accounted_offset = rg->to;
|
||||
}
|
||||
@ -394,17 +405,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
|
||||
/* Handle the case where our range extends beyond
|
||||
* last_accounted_offset.
|
||||
*/
|
||||
if (last_accounted_offset < t) {
|
||||
add += t - last_accounted_offset;
|
||||
if (!regions_needed) {
|
||||
nrg = get_file_region_entry_from_cache(
|
||||
resv, last_accounted_offset, t);
|
||||
record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
|
||||
list_add(&nrg->link, rg->link.prev);
|
||||
coalesce_file_region(resv, nrg);
|
||||
} else
|
||||
*regions_needed += 1;
|
||||
}
|
||||
if (last_accounted_offset < t)
|
||||
add += hugetlb_resv_map_add(resv, rg, last_accounted_offset,
|
||||
t, h, h_cg, regions_needed);
|
||||
|
||||
VM_BUG_ON(add < 0);
|
||||
return add;
|
||||
@ -3725,21 +3728,32 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
|
||||
struct page *new_page)
|
||||
{
|
||||
__SetPageUptodate(new_page);
|
||||
set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
|
||||
hugepage_add_new_anon_rmap(new_page, vma, addr);
|
||||
hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
|
||||
ClearHPageRestoreReserve(new_page);
|
||||
SetHPageMigratable(new_page);
|
||||
}
|
||||
|
||||
int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
struct vm_area_struct *vma)
|
||||
{
|
||||
pte_t *src_pte, *dst_pte, entry, dst_entry;
|
||||
struct page *ptepage;
|
||||
unsigned long addr;
|
||||
int cow;
|
||||
bool cow = is_cow_mapping(vma->vm_flags);
|
||||
struct hstate *h = hstate_vma(vma);
|
||||
unsigned long sz = huge_page_size(h);
|
||||
unsigned long npages = pages_per_huge_page(h);
|
||||
struct address_space *mapping = vma->vm_file->f_mapping;
|
||||
struct mmu_notifier_range range;
|
||||
int ret = 0;
|
||||
|
||||
cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
|
||||
if (cow) {
|
||||
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
|
||||
vma->vm_start,
|
||||
@ -3784,6 +3798,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
|
||||
entry = huge_ptep_get(src_pte);
|
||||
dst_entry = huge_ptep_get(dst_pte);
|
||||
again:
|
||||
if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
|
||||
/*
|
||||
* Skip if src entry none. Also, skip in the
|
||||
@ -3807,6 +3822,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
}
|
||||
set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
|
||||
} else {
|
||||
entry = huge_ptep_get(src_pte);
|
||||
ptepage = pte_page(entry);
|
||||
get_page(ptepage);
|
||||
|
||||
/*
|
||||
* This is a rare case where we see pinned hugetlb
|
||||
* pages while they're prone to COW. We need to do the
|
||||
* COW earlier during fork.
|
||||
*
|
||||
* When pre-allocating the page or copying data, we
|
||||
* need to be without the pgtable locks since we could
|
||||
* sleep during the process.
|
||||
*/
|
||||
if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
|
||||
pte_t src_pte_old = entry;
|
||||
struct page *new;
|
||||
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
/* Do not use reserve as it's private owned */
|
||||
new = alloc_huge_page(vma, addr, 1);
|
||||
if (IS_ERR(new)) {
|
||||
put_page(ptepage);
|
||||
ret = PTR_ERR(new);
|
||||
break;
|
||||
}
|
||||
copy_user_huge_page(new, ptepage, addr, vma,
|
||||
npages);
|
||||
put_page(ptepage);
|
||||
|
||||
/* Install the new huge page if src pte stable */
|
||||
dst_ptl = huge_pte_lock(h, dst, dst_pte);
|
||||
src_ptl = huge_pte_lockptr(h, src, src_pte);
|
||||
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
|
||||
entry = huge_ptep_get(src_pte);
|
||||
if (!pte_same(src_pte_old, entry)) {
|
||||
put_page(new);
|
||||
/* dst_entry won't change as in child */
|
||||
goto again;
|
||||
}
|
||||
hugetlb_install_page(vma, dst_pte, addr, new);
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (cow) {
|
||||
/*
|
||||
* No need to notify as we are downgrading page
|
||||
@ -3817,12 +3878,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
|
||||
*/
|
||||
huge_ptep_set_wrprotect(src, addr, src_pte);
|
||||
}
|
||||
entry = huge_ptep_get(src_pte);
|
||||
ptepage = pte_page(entry);
|
||||
get_page(ptepage);
|
||||
|
||||
page_dup_rmap(ptepage, true);
|
||||
set_huge_pte_at(dst, addr, dst_pte, entry);
|
||||
hugetlb_count_add(pages_per_huge_page(h), dst);
|
||||
hugetlb_count_add(npages, dst);
|
||||
}
|
||||
spin_unlock(src_ptl);
|
||||
spin_unlock(dst_ptl);
|
||||
|
@ -296,11 +296,6 @@ static inline unsigned int buddy_order(struct page *page)
|
||||
*/
|
||||
#define buddy_order_unsafe(page) READ_ONCE(page_private(page))
|
||||
|
||||
static inline bool is_cow_mapping(vm_flags_t flags)
|
||||
{
|
||||
return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
|
||||
}
|
||||
|
||||
/*
|
||||
* These three helpers classifies VMAs for virtual memory accounting.
|
||||
*/
|
||||
|
@ -20,6 +20,11 @@
|
||||
|
||||
#include "kfence.h"
|
||||
|
||||
/* May be overridden by <asm/kfence.h>. */
|
||||
#ifndef ARCH_FUNC_PREFIX
|
||||
#define ARCH_FUNC_PREFIX ""
|
||||
#endif
|
||||
|
||||
extern bool no_hash_pointers;
|
||||
|
||||
/* Helper function to either print to a seq_file or to console. */
|
||||
@ -67,8 +72,9 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
|
||||
for (skipnr = 0; skipnr < num_entries; skipnr++) {
|
||||
int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
|
||||
|
||||
if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") ||
|
||||
!strncmp(buf, "__slab_free", len)) {
|
||||
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
|
||||
!strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
|
||||
/*
|
||||
* In case of tail calls from any of the below
|
||||
* to any of the above.
|
||||
@ -77,10 +83,10 @@ static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries
|
||||
}
|
||||
|
||||
/* Also the *_bulk() variants by only checking prefixes. */
|
||||
if (str_has_prefix(buf, "kfree") ||
|
||||
str_has_prefix(buf, "kmem_cache_free") ||
|
||||
str_has_prefix(buf, "__kmalloc") ||
|
||||
str_has_prefix(buf, "kmem_cache_alloc"))
|
||||
if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
|
||||
str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
|
||||
goto found;
|
||||
}
|
||||
if (fallback < num_entries)
|
||||
@ -116,12 +122,12 @@ void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *met
|
||||
lockdep_assert_held(&meta->lock);
|
||||
|
||||
if (meta->state == KFENCE_OBJECT_UNUSED) {
|
||||
seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata);
|
||||
seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata);
|
||||
return;
|
||||
}
|
||||
|
||||
seq_con_printf(seq,
|
||||
"kfence-#%zd [0x%p-0x%p"
|
||||
"kfence-#%td [0x%p-0x%p"
|
||||
", size=%d, cache=%s] allocated by task %d:\n",
|
||||
meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
|
||||
(cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
|
||||
@ -204,7 +210,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
||||
|
||||
pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write),
|
||||
(void *)stack_entries[skipnr]);
|
||||
pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n",
|
||||
pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n",
|
||||
get_access_type(is_write), (void *)address,
|
||||
left_of_object ? meta->addr - address : address - meta->addr,
|
||||
left_of_object ? "left" : "right", object_index);
|
||||
@ -213,14 +219,14 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
||||
case KFENCE_ERROR_UAF:
|
||||
pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write),
|
||||
(void *)stack_entries[skipnr]);
|
||||
pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n",
|
||||
pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n",
|
||||
get_access_type(is_write), (void *)address, object_index);
|
||||
break;
|
||||
case KFENCE_ERROR_CORRUPTION:
|
||||
pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]);
|
||||
pr_err("Corrupted memory at 0x%p ", (void *)address);
|
||||
print_diff_canary(address, 16, meta);
|
||||
pr_cont(" (in kfence-#%zd):\n", object_index);
|
||||
pr_cont(" (in kfence-#%td):\n", object_index);
|
||||
break;
|
||||
case KFENCE_ERROR_INVALID:
|
||||
pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write),
|
||||
@ -230,7 +236,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
|
||||
break;
|
||||
case KFENCE_ERROR_INVALID_FREE:
|
||||
pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]);
|
||||
pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address,
|
||||
pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address,
|
||||
object_index);
|
||||
break;
|
||||
}
|
||||
|
13
mm/madvise.c
13
mm/madvise.c
@ -1198,12 +1198,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
||||
goto release_task;
|
||||
}
|
||||
|
||||
mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
|
||||
/* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
|
||||
mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
|
||||
if (IS_ERR_OR_NULL(mm)) {
|
||||
ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
|
||||
goto release_task;
|
||||
}
|
||||
|
||||
/*
|
||||
* Require CAP_SYS_NICE for influencing process performance. Note that
|
||||
* only non-destructive hints are currently supported.
|
||||
*/
|
||||
if (!capable(CAP_SYS_NICE)) {
|
||||
ret = -EPERM;
|
||||
goto release_mm;
|
||||
}
|
||||
|
||||
total_len = iov_iter_count(&iter);
|
||||
|
||||
while (iov_iter_count(&iter)) {
|
||||
@ -1218,6 +1228,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
|
||||
if (ret == 0)
|
||||
ret = total_len - iov_iter_count(&iter);
|
||||
|
||||
release_mm:
|
||||
mmput(mm);
|
||||
release_task:
|
||||
put_task_struct(task);
|
||||
|
@ -3287,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
|
||||
|
||||
#endif /* CONFIG_MEMCG_KMEM */
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
/*
|
||||
* Because page_memcg(head) is not set on compound tails, set it now.
|
||||
* Because page_memcg(head) is not set on tails, set it now.
|
||||
*/
|
||||
void mem_cgroup_split_huge_fixup(struct page *head)
|
||||
void split_page_memcg(struct page *head, unsigned int nr)
|
||||
{
|
||||
struct mem_cgroup *memcg = page_memcg(head);
|
||||
int i;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
if (mem_cgroup_disabled() || !memcg)
|
||||
return;
|
||||
|
||||
for (i = 1; i < HPAGE_PMD_NR; i++) {
|
||||
css_get(&memcg->css);
|
||||
head[i].memcg_data = (unsigned long)memcg;
|
||||
}
|
||||
for (i = 1; i < nr; i++)
|
||||
head[i].memcg_data = head->memcg_data;
|
||||
css_get_many(&memcg->css, nr - 1);
|
||||
}
|
||||
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
||||
|
||||
#ifdef CONFIG_MEMCG_SWAP
|
||||
/**
|
||||
|
16
mm/memory.c
16
mm/memory.c
@ -809,12 +809,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
|
||||
pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
|
||||
struct page **prealloc, pte_t pte, struct page *page)
|
||||
{
|
||||
struct mm_struct *src_mm = src_vma->vm_mm;
|
||||
struct page *new_page;
|
||||
|
||||
if (!is_cow_mapping(src_vma->vm_flags))
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* What we want to do is to check whether this page may
|
||||
* have been pinned by the parent process. If so,
|
||||
@ -828,9 +824,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
|
||||
* the page count. That might give false positives for
|
||||
* for pinning, but it will work correctly.
|
||||
*/
|
||||
if (likely(!atomic_read(&src_mm->has_pinned)))
|
||||
return 1;
|
||||
if (likely(!page_maybe_dma_pinned(page)))
|
||||
if (likely(!page_needs_cow_for_dma(src_vma, page)))
|
||||
return 1;
|
||||
|
||||
new_page = *prealloc;
|
||||
@ -3103,6 +3097,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
|
||||
return handle_userfault(vmf, VM_UFFD_WP);
|
||||
}
|
||||
|
||||
/*
|
||||
* Userfaultfd write-protect can defer flushes. Ensure the TLB
|
||||
* is flushed in this case before copying.
|
||||
*/
|
||||
if (unlikely(userfaultfd_wp(vmf->vma) &&
|
||||
mm_tlb_flush_pending(vmf->vma->vm_mm)))
|
||||
flush_tlb_page(vmf->vma, vmf->address);
|
||||
|
||||
vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
|
||||
if (!vmf->page) {
|
||||
/*
|
||||
|
167
mm/page_alloc.c
167
mm/page_alloc.c
@ -1281,6 +1281,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
||||
|
||||
kernel_poison_pages(page, 1 << order);
|
||||
|
||||
/*
|
||||
* With hardware tag-based KASAN, memory tags must be set before the
|
||||
* page becomes unavailable via debug_pagealloc or arch_free_page.
|
||||
*/
|
||||
kasan_free_nondeferred_pages(page, order);
|
||||
|
||||
/*
|
||||
* arch_free_page() can make the page's contents inaccessible. s390
|
||||
* does this. So nothing which can access the page's contents should
|
||||
@ -1290,8 +1296,6 @@ static __always_inline bool free_pages_prepare(struct page *page,
|
||||
|
||||
debug_pagealloc_unmap_pages(page, 1 << order);
|
||||
|
||||
kasan_free_nondeferred_pages(page, order);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3310,6 +3314,7 @@ void split_page(struct page *page, unsigned int order)
|
||||
for (i = 1; i < (1 << order); i++)
|
||||
set_page_refcounted(page + i);
|
||||
split_page_owner(page, 1 << order);
|
||||
split_page_memcg(page, 1 << order);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(split_page);
|
||||
|
||||
@ -6259,12 +6264,65 @@ static void __meminit zone_init_free_lists(struct zone *zone)
|
||||
}
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
|
||||
/*
|
||||
* Only struct pages that correspond to ranges defined by memblock.memory
|
||||
* are zeroed and initialized by going through __init_single_page() during
|
||||
* memmap_init_zone().
|
||||
*
|
||||
* But, there could be struct pages that correspond to holes in
|
||||
* memblock.memory. This can happen because of the following reasons:
|
||||
* - physical memory bank size is not necessarily the exact multiple of the
|
||||
* arbitrary section size
|
||||
* - early reserved memory may not be listed in memblock.memory
|
||||
* - memory layouts defined with memmap= kernel parameter may not align
|
||||
* nicely with memmap sections
|
||||
*
|
||||
* Explicitly initialize those struct pages so that:
|
||||
* - PG_Reserved is set
|
||||
* - zone and node links point to zone and node that span the page if the
|
||||
* hole is in the middle of a zone
|
||||
* - zone and node links point to adjacent zone/node if the hole falls on
|
||||
* the zone boundary; the pages in such holes will be prepended to the
|
||||
* zone/node above the hole except for the trailing pages in the last
|
||||
* section that will be appended to the zone/node below.
|
||||
*/
|
||||
static u64 __meminit init_unavailable_range(unsigned long spfn,
|
||||
unsigned long epfn,
|
||||
int zone, int node)
|
||||
{
|
||||
unsigned long pfn;
|
||||
u64 pgcnt = 0;
|
||||
|
||||
for (pfn = spfn; pfn < epfn; pfn++) {
|
||||
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
|
||||
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
|
||||
+ pageblock_nr_pages - 1;
|
||||
continue;
|
||||
}
|
||||
__init_single_page(pfn_to_page(pfn), pfn, zone, node);
|
||||
__SetPageReserved(pfn_to_page(pfn));
|
||||
pgcnt++;
|
||||
}
|
||||
|
||||
return pgcnt;
|
||||
}
|
||||
#else
|
||||
static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn,
|
||||
int zone, int node)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
void __meminit __weak memmap_init_zone(struct zone *zone)
|
||||
{
|
||||
unsigned long zone_start_pfn = zone->zone_start_pfn;
|
||||
unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
|
||||
int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone);
|
||||
static unsigned long hole_pfn;
|
||||
unsigned long start_pfn, end_pfn;
|
||||
u64 pgcnt = 0;
|
||||
|
||||
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
|
||||
start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
|
||||
@ -6274,7 +6332,29 @@ void __meminit __weak memmap_init_zone(struct zone *zone)
|
||||
memmap_init_range(end_pfn - start_pfn, nid,
|
||||
zone_id, start_pfn, zone_end_pfn,
|
||||
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
|
||||
|
||||
if (hole_pfn < start_pfn)
|
||||
pgcnt += init_unavailable_range(hole_pfn, start_pfn,
|
||||
zone_id, nid);
|
||||
hole_pfn = end_pfn;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM
|
||||
/*
|
||||
* Initialize the hole in the range [zone_end_pfn, section_end].
|
||||
* If zone boundary falls in the middle of a section, this hole
|
||||
* will be re-initialized during the call to this function for the
|
||||
* higher zone.
|
||||
*/
|
||||
end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION);
|
||||
if (hole_pfn < end_pfn)
|
||||
pgcnt += init_unavailable_range(hole_pfn, end_pfn,
|
||||
zone_id, nid);
|
||||
#endif
|
||||
|
||||
if (pgcnt)
|
||||
pr_info(" %s zone: %llu pages in unavailable ranges\n",
|
||||
zone->name, pgcnt);
|
||||
}
|
||||
|
||||
static int zone_batchsize(struct zone *zone)
|
||||
@ -7071,88 +7151,6 @@ void __init free_area_init_memoryless_node(int nid)
|
||||
free_area_init_node(nid);
|
||||
}
|
||||
|
||||
#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
|
||||
/*
|
||||
* Initialize all valid struct pages in the range [spfn, epfn) and mark them
|
||||
* PageReserved(). Return the number of struct pages that were initialized.
|
||||
*/
|
||||
static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
|
||||
{
|
||||
unsigned long pfn;
|
||||
u64 pgcnt = 0;
|
||||
|
||||
for (pfn = spfn; pfn < epfn; pfn++) {
|
||||
if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
|
||||
pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
|
||||
+ pageblock_nr_pages - 1;
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* Use a fake node/zone (0) for now. Some of these pages
|
||||
* (in memblock.reserved but not in memblock.memory) will
|
||||
* get re-initialized via reserve_bootmem_region() later.
|
||||
*/
|
||||
__init_single_page(pfn_to_page(pfn), pfn, 0, 0);
|
||||
__SetPageReserved(pfn_to_page(pfn));
|
||||
pgcnt++;
|
||||
}
|
||||
|
||||
return pgcnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only struct pages that are backed by physical memory are zeroed and
|
||||
* initialized by going through __init_single_page(). But, there are some
|
||||
* struct pages which are reserved in memblock allocator and their fields
|
||||
* may be accessed (for example page_to_pfn() on some configuration accesses
|
||||
* flags). We must explicitly initialize those struct pages.
|
||||
*
|
||||
* This function also addresses a similar issue where struct pages are left
|
||||
* uninitialized because the physical address range is not covered by
|
||||
* memblock.memory or memblock.reserved. That could happen when memblock
|
||||
* layout is manually configured via memmap=, or when the highest physical
|
||||
* address (max_pfn) does not end on a section boundary.
|
||||
*/
|
||||
static void __init init_unavailable_mem(void)
|
||||
{
|
||||
phys_addr_t start, end;
|
||||
u64 i, pgcnt;
|
||||
phys_addr_t next = 0;
|
||||
|
||||
/*
|
||||
* Loop through unavailable ranges not covered by memblock.memory.
|
||||
*/
|
||||
pgcnt = 0;
|
||||
for_each_mem_range(i, &start, &end) {
|
||||
if (next < start)
|
||||
pgcnt += init_unavailable_range(PFN_DOWN(next),
|
||||
PFN_UP(start));
|
||||
next = end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Early sections always have a fully populated memmap for the whole
|
||||
* section - see pfn_valid(). If the last section has holes at the
|
||||
* end and that section is marked "online", the memmap will be
|
||||
* considered initialized. Make sure that memmap has a well defined
|
||||
* state.
|
||||
*/
|
||||
pgcnt += init_unavailable_range(PFN_DOWN(next),
|
||||
round_up(max_pfn, PAGES_PER_SECTION));
|
||||
|
||||
/*
|
||||
* Struct pages that do not have backing memory. This could be because
|
||||
* firmware is using some of this memory, or for some other reasons.
|
||||
*/
|
||||
if (pgcnt)
|
||||
pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
|
||||
}
|
||||
#else
|
||||
static inline void __init init_unavailable_mem(void)
|
||||
{
|
||||
}
|
||||
#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
|
||||
|
||||
#if MAX_NUMNODES > 1
|
||||
/*
|
||||
* Figure out the number of possible node ids.
|
||||
@ -7576,7 +7574,6 @@ void __init free_area_init(unsigned long *max_zone_pfn)
|
||||
/* Initialise every node */
|
||||
mminit_verify_pageflags_layout();
|
||||
setup_nr_node_ids();
|
||||
init_unavailable_mem();
|
||||
for_each_online_node(nid) {
|
||||
pg_data_t *pgdat = NODE_DATA(nid);
|
||||
free_area_init_node(nid);
|
||||
|
@ -2992,7 +2992,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
|
||||
gfp_t flags, void *objp, unsigned long caller)
|
||||
{
|
||||
WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
|
||||
if (!objp)
|
||||
if (!objp || is_kfence_address(objp))
|
||||
return objp;
|
||||
if (cachep->flags & SLAB_POISON) {
|
||||
check_poison_obj(cachep, objp);
|
||||
|
Loading…
x
Reference in New Issue
Block a user