Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "155 patches. Subsystems affected by this patch series: mm (dax, debug, thp, readahead, page-poison, util, memory-hotplug, zram, cleanups), misc, core-kernel, get_maintainer, MAINTAINERS, lib, bitops, checkpatch, binfmt, ramfs, autofs, nilfs, rapidio, panic, relay, kgdb, ubsan, romfs, and fault-injection" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (155 commits) lib, uaccess: add failure injection to usercopy functions lib, include/linux: add usercopy failure capability ROMFS: support inode blocks calculation ubsan: introduce CONFIG_UBSAN_LOCAL_BOUNDS for Clang sched.h: drop in_ubsan field when UBSAN is in trap mode scripts/gdb/tasks: add headers and improve spacing format scripts/gdb/proc: add struct mount & struct super_block addr in lx-mounts command kernel/relay.c: drop unneeded initialization panic: dump registers on panic_on_warn rapidio: fix the missed put_device() for rio_mport_add_riodev rapidio: fix error handling path nilfs2: fix some kernel-doc warnings for nilfs2 autofs: harden ioctl table ramfs: fix nommu mmap with gaps in the page cache mm: remove the now-unnecessary mmget_still_valid() hack mm/gup: take mmap_lock in get_dump_page() binfmt_elf, binfmt_elf_fdpic: use a VMA list snapshot coredump: rework elf/elf_fdpic vma_dump_size() into common helper coredump: refactor page range dumping into common helper coredump: let dump_emit() bail out on short writes ...
This commit is contained in:
commit
c4cf498dc0
1
.mailmap
1
.mailmap
@ -133,6 +133,7 @@ James Ketrenos <jketreno@io.(none)>
|
||||
Jan Glauber <jan.glauber@gmail.com> <jang@de.ibm.com>
|
||||
Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
|
||||
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
|
||||
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
|
||||
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
|
||||
|
@ -1343,6 +1343,7 @@
|
||||
current integrity status.
|
||||
|
||||
failslab=
|
||||
fail_usercopy=
|
||||
fail_page_alloc=
|
||||
fail_make_request=[KNL]
|
||||
General fault injection mechanism.
|
||||
|
@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
|
||||
Each entry will only be returned once, no matter how many indices it
|
||||
occupies.
|
||||
|
||||
Using xas_next() or xas_prev() with a multi-index xa_state
|
||||
is not supported. Using either of these functions on a multi-index entry
|
||||
will reveal sibling entries; these should be skipped over by the caller.
|
||||
Using xas_next() or xas_prev() with a multi-index xa_state is not
|
||||
supported. Using either of these functions on a multi-index entry will
|
||||
reveal sibling entries; these should be skipped over by the caller.
|
||||
|
||||
Storing ``NULL`` into any index of a multi-index entry will set the entry
|
||||
at every index to ``NULL`` and dissolve the tie. Splitting a multi-index
|
||||
entry into entries occupying smaller ranges is not yet supported.
|
||||
Storing ``NULL`` into any index of a multi-index entry will set the
|
||||
entry at every index to ``NULL`` and dissolve the tie. A multi-index
|
||||
entry can be split into entries occupying smaller ranges by calling
|
||||
xas_split_alloc() without the xa_lock held, followed by taking the lock
|
||||
and calling xas_split().
|
||||
|
||||
Functions and structures
|
||||
========================
|
||||
|
@ -16,6 +16,10 @@ Available fault injection capabilities
|
||||
|
||||
injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
|
||||
|
||||
- fail_usercopy
|
||||
|
||||
injects failures in user memory access functions. (copy_from_user(), get_user(), ...)
|
||||
|
||||
- fail_futex
|
||||
|
||||
injects futex deadlock and uaddr fault errors.
|
||||
@ -177,6 +181,7 @@ use the boot option::
|
||||
|
||||
failslab=
|
||||
fail_page_alloc=
|
||||
fail_usercopy=
|
||||
fail_make_request=
|
||||
fail_futex=
|
||||
mmc_core.fail_request=<interval>,<probability>,<space>,<times>
|
||||
@ -222,7 +227,7 @@ How to add new fault injection capability
|
||||
|
||||
- debugfs entries
|
||||
|
||||
failslab, fail_page_alloc, and fail_make_request use this way.
|
||||
failslab, fail_page_alloc, fail_usercopy, and fail_make_request use this way.
|
||||
Helper functions:
|
||||
|
||||
fault_create_debugfs_attr(name, parent, attr);
|
||||
|
@ -9715,7 +9715,7 @@ F: security/keys/encrypted-keys/
|
||||
|
||||
KEYS-TRUSTED
|
||||
M: James Bottomley <jejb@linux.ibm.com>
|
||||
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
|
||||
M: Jarkko Sakkinen <jarkko@kernel.org>
|
||||
M: Mimi Zohar <zohar@linux.ibm.com>
|
||||
L: linux-integrity@vger.kernel.org
|
||||
L: keyrings@vger.kernel.org
|
||||
@ -9727,7 +9727,7 @@ F: security/keys/trusted-keys/
|
||||
|
||||
KEYS/KEYRINGS
|
||||
M: David Howells <dhowells@redhat.com>
|
||||
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
|
||||
M: Jarkko Sakkinen <jarkko@kernel.org>
|
||||
L: keyrings@vger.kernel.org
|
||||
S: Maintained
|
||||
F: Documentation/security/keys/core.rst
|
||||
@ -17717,7 +17717,7 @@ F: drivers/platform/x86/toshiba-wmi.c
|
||||
|
||||
TPM DEVICE DRIVER
|
||||
M: Peter Huewe <peterhuewe@gmx.de>
|
||||
M: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
|
||||
M: Jarkko Sakkinen <jarkko@kernel.org>
|
||||
R: Jason Gunthorpe <jgg@ziepe.ca>
|
||||
L: linux-integrity@vger.kernel.org
|
||||
S: Maintained
|
||||
|
@ -537,7 +537,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
|
||||
if (map_start < map_end)
|
||||
memmap_init_zone((unsigned long)(map_end - map_start),
|
||||
args->nid, args->zone, page_to_pfn(map_start),
|
||||
MEMINIT_EARLY, NULL);
|
||||
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -547,7 +547,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
|
||||
{
|
||||
if (!vmem_map) {
|
||||
memmap_init_zone(size, nid, zone, start_pfn,
|
||||
MEMINIT_EARLY, NULL);
|
||||
MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
|
||||
} else {
|
||||
struct page *start;
|
||||
struct memmap_init_callback_data args;
|
||||
|
@ -615,7 +615,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
|
||||
VM_BUG_ON(pfn >> (64 - PAGE_SHIFT));
|
||||
VM_BUG_ON((pfn << PAGE_SHIFT) & ~PTE_RPN_MASK);
|
||||
|
||||
return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot));
|
||||
return __pte(((pte_basic_t)pfn << PAGE_SHIFT) | pgprot_val(pgprot) | _PAGE_PTE);
|
||||
}
|
||||
|
||||
static inline unsigned long pte_pfn(pte_t pte)
|
||||
@ -651,11 +651,6 @@ static inline pte_t pte_mkexec(pte_t pte)
|
||||
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkpte(pte_t pte)
|
||||
{
|
||||
return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkwrite(pte_t pte)
|
||||
{
|
||||
/*
|
||||
@ -819,6 +814,14 @@ static inline int pte_none(pte_t pte)
|
||||
static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
|
||||
pte_t *ptep, pte_t pte, int percpu)
|
||||
{
|
||||
|
||||
VM_WARN_ON(!(pte_raw(pte) & cpu_to_be64(_PAGE_PTE)));
|
||||
/*
|
||||
* Keep the _PAGE_PTE added till we are sure we handle _PAGE_PTE
|
||||
* in all the callers.
|
||||
*/
|
||||
pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
|
||||
|
||||
if (radix_enabled())
|
||||
return radix__set_pte_at(mm, addr, ptep, pte, percpu);
|
||||
return hash__set_pte_at(mm, addr, ptep, pte, percpu);
|
||||
@ -866,6 +869,13 @@ static inline bool pte_ci(pte_t pte)
|
||||
|
||||
static inline void pmd_clear(pmd_t *pmdp)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
|
||||
/*
|
||||
* Don't use this if we can possibly have a hash page table
|
||||
* entry mapping this.
|
||||
*/
|
||||
WARN_ON((pmd_val(*pmdp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
|
||||
}
|
||||
*pmdp = __pmd(0);
|
||||
}
|
||||
|
||||
@ -914,6 +924,13 @@ static inline int pmd_bad(pmd_t pmd)
|
||||
|
||||
static inline void pud_clear(pud_t *pudp)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_DEBUG_VM) && !radix_enabled()) {
|
||||
/*
|
||||
* Don't use this if we can possibly have a hash page table
|
||||
* entry mapping this.
|
||||
*/
|
||||
WARN_ON((pud_val(*pudp) & (H_PAGE_HASHPTE | _PAGE_PTE)) == (H_PAGE_HASHPTE | _PAGE_PTE));
|
||||
}
|
||||
*pudp = __pud(0);
|
||||
}
|
||||
|
||||
|
@ -140,11 +140,6 @@ static inline pte_t pte_mkold(pte_t pte)
|
||||
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkpte(pte_t pte)
|
||||
{
|
||||
return pte;
|
||||
}
|
||||
|
||||
static inline pte_t pte_mkspecial(pte_t pte)
|
||||
{
|
||||
return __pte(pte_val(pte) | _PAGE_SPECIAL);
|
||||
|
@ -184,9 +184,6 @@ void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
|
||||
*/
|
||||
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
|
||||
|
||||
/* Add the pte bit when trying to set a pte */
|
||||
pte = pte_mkpte(pte);
|
||||
|
||||
/* Note: mm->context.id might not yet have been assigned as
|
||||
* this context might not have been activated yet when this
|
||||
* is called.
|
||||
@ -275,8 +272,6 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_
|
||||
*/
|
||||
VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
|
||||
|
||||
pte = pte_mkpte(pte);
|
||||
|
||||
pte = set_pte_filter(pte);
|
||||
|
||||
val = pte_val(pte);
|
||||
|
@ -224,7 +224,7 @@ static int memtrace_online(void)
|
||||
ent->mem = 0;
|
||||
}
|
||||
|
||||
if (add_memory(ent->nid, ent->start, ent->size)) {
|
||||
if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
|
||||
pr_err("Failed to add trace memory to node %d\n",
|
||||
ent->nid);
|
||||
ret += 1;
|
||||
|
@ -606,7 +606,7 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
|
||||
block_sz = memory_block_size_bytes();
|
||||
|
||||
/* Add the memory */
|
||||
rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
|
||||
rc = __add_memory(lmb->nid, lmb->base_addr, block_sz, MHP_NONE);
|
||||
if (rc) {
|
||||
invalidate_lmb_associativity_index(lmb);
|
||||
return rc;
|
||||
|
@ -194,7 +194,8 @@ static int acpi_memory_enable_device(struct acpi_memory_device *mem_device)
|
||||
if (node < 0)
|
||||
node = memory_add_physaddr_to_nid(info->start_addr);
|
||||
|
||||
result = __add_memory(node, info->start_addr, info->length);
|
||||
result = __add_memory(node, info->start_addr, info->length,
|
||||
MHP_NONE);
|
||||
|
||||
/*
|
||||
* If the memory block has been used by the kernel, add_memory()
|
||||
|
@ -432,7 +432,8 @@ static ssize_t probe_store(struct device *dev, struct device_attribute *attr,
|
||||
|
||||
nid = memory_add_physaddr_to_nid(phys_addr);
|
||||
ret = __add_memory(nid, phys_addr,
|
||||
MIN_MEMORY_BLOCK_SIZE * sections_per_block);
|
||||
MIN_MEMORY_BLOCK_SIZE * sections_per_block,
|
||||
MHP_NONE);
|
||||
|
||||
if (ret)
|
||||
goto out;
|
||||
|
@ -772,8 +772,8 @@ static int __ref get_nid_for_pfn(unsigned long pfn)
|
||||
return pfn_to_nid(pfn);
|
||||
}
|
||||
|
||||
static int do_register_memory_block_under_node(int nid,
|
||||
struct memory_block *mem_blk)
|
||||
static void do_register_memory_block_under_node(int nid,
|
||||
struct memory_block *mem_blk)
|
||||
{
|
||||
int ret;
|
||||
|
||||
@ -786,12 +786,19 @@ static int do_register_memory_block_under_node(int nid,
|
||||
ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj,
|
||||
&mem_blk->dev.kobj,
|
||||
kobject_name(&mem_blk->dev.kobj));
|
||||
if (ret)
|
||||
return ret;
|
||||
if (ret && ret != -EEXIST)
|
||||
dev_err_ratelimited(&node_devices[nid]->dev,
|
||||
"can't create link to %s in sysfs (%d)\n",
|
||||
kobject_name(&mem_blk->dev.kobj), ret);
|
||||
|
||||
return sysfs_create_link_nowarn(&mem_blk->dev.kobj,
|
||||
ret = sysfs_create_link_nowarn(&mem_blk->dev.kobj,
|
||||
&node_devices[nid]->dev.kobj,
|
||||
kobject_name(&node_devices[nid]->dev.kobj));
|
||||
if (ret && ret != -EEXIST)
|
||||
dev_err_ratelimited(&mem_blk->dev,
|
||||
"can't create link to %s in sysfs (%d)\n",
|
||||
kobject_name(&node_devices[nid]->dev.kobj),
|
||||
ret);
|
||||
}
|
||||
|
||||
/* register memory section under specified node if it spans that node */
|
||||
@ -827,7 +834,8 @@ static int register_mem_block_under_node_early(struct memory_block *mem_blk,
|
||||
if (page_nid != nid)
|
||||
continue;
|
||||
|
||||
return do_register_memory_block_under_node(nid, mem_blk);
|
||||
do_register_memory_block_under_node(nid, mem_blk);
|
||||
return 0;
|
||||
}
|
||||
/* mem section does not span the specified node */
|
||||
return 0;
|
||||
@ -842,7 +850,8 @@ static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk,
|
||||
{
|
||||
int nid = *(int *)arg;
|
||||
|
||||
return do_register_memory_block_under_node(nid, mem_blk);
|
||||
do_register_memory_block_under_node(nid, mem_blk);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -860,8 +869,8 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk)
|
||||
kobject_name(&node_devices[mem_blk->nid]->dev.kobj));
|
||||
}
|
||||
|
||||
int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
|
||||
enum meminit_context context)
|
||||
void link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
|
||||
enum meminit_context context)
|
||||
{
|
||||
walk_memory_blocks_func_t func;
|
||||
|
||||
@ -870,9 +879,9 @@ int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn,
|
||||
else
|
||||
func = register_mem_block_under_node_early;
|
||||
|
||||
return walk_memory_blocks(PFN_PHYS(start_pfn),
|
||||
PFN_PHYS(end_pfn - start_pfn), (void *)&nid,
|
||||
func);
|
||||
walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn),
|
||||
(void *)&nid, func);
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_HUGETLBFS
|
||||
|
@ -1270,7 +1270,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
|
||||
zram_slot_unlock(zram, index);
|
||||
|
||||
/* Should NEVER happen. Return bio error if it does. */
|
||||
if (unlikely(ret))
|
||||
if (WARN_ON(ret))
|
||||
pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
|
||||
|
||||
return ret;
|
||||
|
@ -35,11 +35,17 @@ static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct dax_kmem_data {
|
||||
const char *res_name;
|
||||
struct resource *res[];
|
||||
};
|
||||
|
||||
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
|
||||
{
|
||||
struct device *dev = &dev_dax->dev;
|
||||
struct dax_kmem_data *data;
|
||||
int rc = -ENOMEM;
|
||||
int i, mapped = 0;
|
||||
char *res_name;
|
||||
int numa_node;
|
||||
|
||||
/*
|
||||
@ -55,14 +61,17 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
res_name = kstrdup(dev_name(dev), GFP_KERNEL);
|
||||
if (!res_name)
|
||||
data = kzalloc(sizeof(*data) + sizeof(struct resource *) * dev_dax->nr_range, GFP_KERNEL);
|
||||
if (!data)
|
||||
return -ENOMEM;
|
||||
|
||||
data->res_name = kstrdup(dev_name(dev), GFP_KERNEL);
|
||||
if (!data->res_name)
|
||||
goto err_res_name;
|
||||
|
||||
for (i = 0; i < dev_dax->nr_range; i++) {
|
||||
struct resource *res;
|
||||
struct range range;
|
||||
int rc;
|
||||
|
||||
rc = dax_kmem_range(dev_dax, i, &range);
|
||||
if (rc) {
|
||||
@ -72,7 +81,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
|
||||
}
|
||||
|
||||
/* Region is permanently reserved if hotremove fails. */
|
||||
res = request_mem_region(range.start, range_len(&range), res_name);
|
||||
res = request_mem_region(range.start, range_len(&range), data->res_name);
|
||||
if (!res) {
|
||||
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
|
||||
i, range.start, range.end);
|
||||
@ -82,9 +91,10 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
|
||||
*/
|
||||
if (mapped)
|
||||
continue;
|
||||
kfree(res_name);
|
||||
return -EBUSY;
|
||||
rc = -EBUSY;
|
||||
goto err_request_mem;
|
||||
}
|
||||
data->res[i] = res;
|
||||
|
||||
/*
|
||||
* Set flags appropriate for System RAM. Leave ..._BUSY clear
|
||||
@ -99,23 +109,30 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
|
||||
* this as RAM automatically.
|
||||
*/
|
||||
rc = add_memory_driver_managed(numa_node, range.start,
|
||||
range_len(&range), kmem_name);
|
||||
range_len(&range), kmem_name, MHP_NONE);
|
||||
|
||||
if (rc) {
|
||||
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
|
||||
i, range.start, range.end);
|
||||
release_mem_region(range.start, range_len(&range));
|
||||
release_resource(res);
|
||||
kfree(res);
|
||||
data->res[i] = NULL;
|
||||
if (mapped)
|
||||
continue;
|
||||
kfree(res_name);
|
||||
return rc;
|
||||
goto err_request_mem;
|
||||
}
|
||||
mapped++;
|
||||
}
|
||||
|
||||
dev_set_drvdata(dev, res_name);
|
||||
dev_set_drvdata(dev, data);
|
||||
|
||||
return 0;
|
||||
|
||||
err_request_mem:
|
||||
kfree(data->res_name);
|
||||
err_res_name:
|
||||
kfree(data);
|
||||
return rc;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
@ -123,7 +140,7 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
|
||||
{
|
||||
int i, success = 0;
|
||||
struct device *dev = &dev_dax->dev;
|
||||
const char *res_name = dev_get_drvdata(dev);
|
||||
struct dax_kmem_data *data = dev_get_drvdata(dev);
|
||||
|
||||
/*
|
||||
* We have one shot for removing memory, if some memory blocks were not
|
||||
@ -142,7 +159,9 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
|
||||
rc = remove_memory(dev_dax->target_node, range.start,
|
||||
range_len(&range));
|
||||
if (rc == 0) {
|
||||
release_mem_region(range.start, range_len(&range));
|
||||
release_resource(data->res[i]);
|
||||
kfree(data->res[i]);
|
||||
data->res[i] = NULL;
|
||||
success++;
|
||||
continue;
|
||||
}
|
||||
@ -153,7 +172,8 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
|
||||
}
|
||||
|
||||
if (success >= dev_dax->nr_range) {
|
||||
kfree(res_name);
|
||||
kfree(data->res_name);
|
||||
kfree(data);
|
||||
dev_set_drvdata(dev, NULL);
|
||||
}
|
||||
|
||||
|
@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
|
||||
|
||||
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
|
||||
ret = add_memory(nid, PFN_PHYS((start_pfn)),
|
||||
(HA_CHUNK << PAGE_SHIFT));
|
||||
(HA_CHUNK << PAGE_SHIFT), MEMHP_MERGE_RESOURCE);
|
||||
|
||||
if (ret) {
|
||||
pr_err("hot_add memory failed error is %d\n", ret);
|
||||
|
@ -845,8 +845,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
|
||||
* will only be one mm, so no big deal.
|
||||
*/
|
||||
mmap_read_lock(mm);
|
||||
if (!mmget_still_valid(mm))
|
||||
goto skip_mm;
|
||||
mutex_lock(&ufile->umap_lock);
|
||||
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
|
||||
list) {
|
||||
@ -865,7 +863,6 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
|
||||
}
|
||||
}
|
||||
mutex_unlock(&ufile->umap_lock);
|
||||
skip_mm:
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
|
@ -871,15 +871,16 @@ rio_dma_transfer(struct file *filp, u32 transfer_mode,
|
||||
rmcd_error("pin_user_pages_fast err=%ld",
|
||||
pinned);
|
||||
nr_pages = 0;
|
||||
} else
|
||||
} else {
|
||||
rmcd_error("pinned %ld out of %ld pages",
|
||||
pinned, nr_pages);
|
||||
/*
|
||||
* Set nr_pages up to mean "how many pages to unpin, in
|
||||
* the error handler:
|
||||
*/
|
||||
nr_pages = pinned;
|
||||
}
|
||||
ret = -EFAULT;
|
||||
/*
|
||||
* Set nr_pages up to mean "how many pages to unpin, in
|
||||
* the error handler:
|
||||
*/
|
||||
nr_pages = pinned;
|
||||
goto err_pg;
|
||||
}
|
||||
|
||||
@ -1679,6 +1680,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
|
||||
struct rio_dev *rdev;
|
||||
struct rio_switch *rswitch = NULL;
|
||||
struct rio_mport *mport;
|
||||
struct device *dev;
|
||||
size_t size;
|
||||
u32 rval;
|
||||
u32 swpinfo = 0;
|
||||
@ -1693,8 +1695,10 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
|
||||
rmcd_debug(RDEV, "name:%s ct:0x%x did:0x%x hc:0x%x", dev_info.name,
|
||||
dev_info.comptag, dev_info.destid, dev_info.hopcount);
|
||||
|
||||
if (bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name)) {
|
||||
dev = bus_find_device_by_name(&rio_bus_type, NULL, dev_info.name);
|
||||
if (dev) {
|
||||
rmcd_debug(RDEV, "device %s already exists", dev_info.name);
|
||||
put_device(dev);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
|
@ -406,7 +406,7 @@ static void __init add_memory_merged(u16 rn)
|
||||
if (!size)
|
||||
goto skip_add;
|
||||
for (addr = start; addr < start + size; addr += block_size)
|
||||
add_memory(0, addr, block_size);
|
||||
add_memory(0, addr, block_size, MHP_NONE);
|
||||
skip_add:
|
||||
first_rn = rn;
|
||||
num = 1;
|
||||
|
@ -1480,31 +1480,29 @@ static int vfio_pci_zap_and_vma_lock(struct vfio_pci_device *vdev, bool try)
|
||||
} else {
|
||||
mmap_read_lock(mm);
|
||||
}
|
||||
if (mmget_still_valid(mm)) {
|
||||
if (try) {
|
||||
if (!mutex_trylock(&vdev->vma_lock)) {
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
mutex_lock(&vdev->vma_lock);
|
||||
if (try) {
|
||||
if (!mutex_trylock(&vdev->vma_lock)) {
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
return 0;
|
||||
}
|
||||
list_for_each_entry_safe(mmap_vma, tmp,
|
||||
&vdev->vma_list, vma_next) {
|
||||
struct vm_area_struct *vma = mmap_vma->vma;
|
||||
|
||||
if (vma->vm_mm != mm)
|
||||
continue;
|
||||
|
||||
list_del(&mmap_vma->vma_next);
|
||||
kfree(mmap_vma);
|
||||
|
||||
zap_vma_ptes(vma, vma->vm_start,
|
||||
vma->vm_end - vma->vm_start);
|
||||
}
|
||||
mutex_unlock(&vdev->vma_lock);
|
||||
} else {
|
||||
mutex_lock(&vdev->vma_lock);
|
||||
}
|
||||
list_for_each_entry_safe(mmap_vma, tmp,
|
||||
&vdev->vma_list, vma_next) {
|
||||
struct vm_area_struct *vma = mmap_vma->vma;
|
||||
|
||||
if (vma->vm_mm != mm)
|
||||
continue;
|
||||
|
||||
list_del(&mmap_vma->vma_next);
|
||||
kfree(mmap_vma);
|
||||
|
||||
zap_vma_ptes(vma, vma->vm_start,
|
||||
vma->vm_end - vma->vm_start);
|
||||
}
|
||||
mutex_unlock(&vdev->vma_lock);
|
||||
mmap_read_unlock(mm);
|
||||
mmput(mm);
|
||||
}
|
||||
|
@ -424,7 +424,8 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id)
|
||||
|
||||
dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id);
|
||||
return add_memory_driver_managed(nid, addr, memory_block_size_bytes(),
|
||||
vm->resource_name);
|
||||
vm->resource_name,
|
||||
MEMHP_MERGE_RESOURCE);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void)
|
||||
mutex_unlock(&balloon_mutex);
|
||||
/* add_memory_resource() requires the device_hotplug lock */
|
||||
lock_device_hotplug();
|
||||
rc = add_memory_resource(nid, resource);
|
||||
rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE);
|
||||
unlock_device_hotplug();
|
||||
mutex_lock(&balloon_mutex);
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/compat.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/nospec.h>
|
||||
|
||||
#include "autofs_i.h"
|
||||
|
||||
@ -563,7 +564,7 @@ out:
|
||||
|
||||
static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
|
||||
{
|
||||
static ioctl_fn _ioctls[] = {
|
||||
static const ioctl_fn _ioctls[] = {
|
||||
autofs_dev_ioctl_version,
|
||||
autofs_dev_ioctl_protover,
|
||||
autofs_dev_ioctl_protosubver,
|
||||
@ -581,7 +582,10 @@ static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
|
||||
};
|
||||
unsigned int idx = cmd_idx(cmd);
|
||||
|
||||
return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx];
|
||||
if (idx >= ARRAY_SIZE(_ioctls))
|
||||
return NULL;
|
||||
idx = array_index_nospec(idx, ARRAY_SIZE(_ioctls));
|
||||
return _ioctls[idx];
|
||||
}
|
||||
|
||||
/* ioctl dispatcher */
|
||||
|
263
fs/binfmt_elf.c
263
fs/binfmt_elf.c
@ -13,6 +13,7 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/errno.h>
|
||||
@ -421,6 +422,26 @@ static int elf_read(struct file *file, void *buf, size_t len, loff_t pos)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned long maximum_alignment(struct elf_phdr *cmds, int nr)
|
||||
{
|
||||
unsigned long alignment = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (cmds[i].p_type == PT_LOAD) {
|
||||
unsigned long p_align = cmds[i].p_align;
|
||||
|
||||
/* skip non-power of two alignments as invalid */
|
||||
if (!is_power_of_2(p_align))
|
||||
continue;
|
||||
alignment = max(alignment, p_align);
|
||||
}
|
||||
}
|
||||
|
||||
/* ensure we align to at least one page */
|
||||
return ELF_PAGEALIGN(alignment);
|
||||
}
|
||||
|
||||
/**
|
||||
* load_elf_phdrs() - load ELF program headers
|
||||
* @elf_ex: ELF header of the binary whose program headers should be loaded
|
||||
@ -1008,6 +1029,7 @@ out_free_interp:
|
||||
int elf_prot, elf_flags;
|
||||
unsigned long k, vaddr;
|
||||
unsigned long total_size = 0;
|
||||
unsigned long alignment;
|
||||
|
||||
if (elf_ppnt->p_type != PT_LOAD)
|
||||
continue;
|
||||
@ -1086,6 +1108,9 @@ out_free_interp:
|
||||
load_bias = ELF_ET_DYN_BASE;
|
||||
if (current->flags & PF_RANDOMIZE)
|
||||
load_bias += arch_mmap_rnd();
|
||||
alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum);
|
||||
if (alignment)
|
||||
load_bias &= ~(alignment - 1);
|
||||
elf_flags |= MAP_FIXED;
|
||||
} else
|
||||
load_bias = 0;
|
||||
@ -1389,126 +1414,6 @@ out:
|
||||
* Jeremy Fitzhardinge <jeremy@sw.oz.au>
|
||||
*/
|
||||
|
||||
/*
|
||||
* The purpose of always_dump_vma() is to make sure that special kernel mappings
|
||||
* that are useful for post-mortem analysis are included in every core dump.
|
||||
* In that way we ensure that the core dump is fully interpretable later
|
||||
* without matching up the same kernel and hardware config to see what PC values
|
||||
* meant. These special mappings include - vDSO, vsyscall, and other
|
||||
* architecture specific mappings
|
||||
*/
|
||||
static bool always_dump_vma(struct vm_area_struct *vma)
|
||||
{
|
||||
/* Any vsyscall mappings? */
|
||||
if (vma == get_gate_vma(vma->vm_mm))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Assume that all vmas with a .name op should always be dumped.
|
||||
* If this changes, a new vm_ops field can easily be added.
|
||||
*/
|
||||
if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* arch_vma_name() returns non-NULL for special architecture mappings,
|
||||
* such as vDSO sections.
|
||||
*/
|
||||
if (arch_vma_name(vma))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide what to dump of a segment, part, all or none.
|
||||
*/
|
||||
static unsigned long vma_dump_size(struct vm_area_struct *vma,
|
||||
unsigned long mm_flags)
|
||||
{
|
||||
#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
|
||||
|
||||
/* always dump the vdso and vsyscall sections */
|
||||
if (always_dump_vma(vma))
|
||||
goto whole;
|
||||
|
||||
if (vma->vm_flags & VM_DONTDUMP)
|
||||
return 0;
|
||||
|
||||
/* support for DAX */
|
||||
if (vma_is_dax(vma)) {
|
||||
if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
|
||||
goto whole;
|
||||
if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
|
||||
goto whole;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Hugetlb memory check */
|
||||
if (is_vm_hugetlb_page(vma)) {
|
||||
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
|
||||
goto whole;
|
||||
if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
|
||||
goto whole;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Do not dump I/O mapped devices or special mappings */
|
||||
if (vma->vm_flags & VM_IO)
|
||||
return 0;
|
||||
|
||||
/* By default, dump shared memory if mapped from an anonymous file. */
|
||||
if (vma->vm_flags & VM_SHARED) {
|
||||
if (file_inode(vma->vm_file)->i_nlink == 0 ?
|
||||
FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
|
||||
goto whole;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Dump segments that have been written to. */
|
||||
if (vma->anon_vma && FILTER(ANON_PRIVATE))
|
||||
goto whole;
|
||||
if (vma->vm_file == NULL)
|
||||
return 0;
|
||||
|
||||
if (FILTER(MAPPED_PRIVATE))
|
||||
goto whole;
|
||||
|
||||
/*
|
||||
* If this looks like the beginning of a DSO or executable mapping,
|
||||
* check for an ELF header. If we find one, dump the first page to
|
||||
* aid in determining what was mapped here.
|
||||
*/
|
||||
if (FILTER(ELF_HEADERS) &&
|
||||
vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
|
||||
u32 __user *header = (u32 __user *) vma->vm_start;
|
||||
u32 word;
|
||||
/*
|
||||
* Doing it this way gets the constant folded by GCC.
|
||||
*/
|
||||
union {
|
||||
u32 cmp;
|
||||
char elfmag[SELFMAG];
|
||||
} magic;
|
||||
BUILD_BUG_ON(SELFMAG != sizeof word);
|
||||
magic.elfmag[EI_MAG0] = ELFMAG0;
|
||||
magic.elfmag[EI_MAG1] = ELFMAG1;
|
||||
magic.elfmag[EI_MAG2] = ELFMAG2;
|
||||
magic.elfmag[EI_MAG3] = ELFMAG3;
|
||||
if (unlikely(get_user(word, header)))
|
||||
word = 0;
|
||||
if (word == magic.cmp)
|
||||
return PAGE_SIZE;
|
||||
}
|
||||
|
||||
#undef FILTER
|
||||
|
||||
return 0;
|
||||
|
||||
whole:
|
||||
return vma->vm_end - vma->vm_start;
|
||||
}
|
||||
|
||||
/* An ELF note in memory */
|
||||
struct memelfnote
|
||||
{
|
||||
@ -2220,32 +2125,6 @@ static void free_note_info(struct elf_note_info *info)
|
||||
|
||||
#endif
|
||||
|
||||
static struct vm_area_struct *first_vma(struct task_struct *tsk,
|
||||
struct vm_area_struct *gate_vma)
|
||||
{
|
||||
struct vm_area_struct *ret = tsk->mm->mmap;
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
return gate_vma;
|
||||
}
|
||||
/*
|
||||
* Helper function for iterating across a vma list. It ensures that the caller
|
||||
* will visit `gate_vma' prior to terminating the search.
|
||||
*/
|
||||
static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
|
||||
struct vm_area_struct *gate_vma)
|
||||
{
|
||||
struct vm_area_struct *ret;
|
||||
|
||||
ret = this_vma->vm_next;
|
||||
if (ret)
|
||||
return ret;
|
||||
if (this_vma == gate_vma)
|
||||
return NULL;
|
||||
return gate_vma;
|
||||
}
|
||||
|
||||
static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
|
||||
elf_addr_t e_shoff, int segs)
|
||||
{
|
||||
@ -2272,9 +2151,8 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
|
||||
static int elf_core_dump(struct coredump_params *cprm)
|
||||
{
|
||||
int has_dumped = 0;
|
||||
int segs, i;
|
||||
size_t vma_data_size = 0;
|
||||
struct vm_area_struct *vma, *gate_vma;
|
||||
int vma_count, segs, i;
|
||||
size_t vma_data_size;
|
||||
struct elfhdr elf;
|
||||
loff_t offset = 0, dataoff;
|
||||
struct elf_note_info info = { };
|
||||
@ -2282,30 +2160,16 @@ static int elf_core_dump(struct coredump_params *cprm)
|
||||
struct elf_shdr *shdr4extnum = NULL;
|
||||
Elf_Half e_phnum;
|
||||
elf_addr_t e_shoff;
|
||||
elf_addr_t *vma_filesz = NULL;
|
||||
struct core_vma_metadata *vma_meta;
|
||||
|
||||
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* We no longer stop all VM operations.
|
||||
*
|
||||
* This is because those proceses that could possibly change map_count
|
||||
* or the mmap / vma pages are now blocked in do_exit on current
|
||||
* finishing this core dump.
|
||||
*
|
||||
* Only ptrace can touch these memory addresses, but it doesn't change
|
||||
* the map_count or the pages allocated. So no possibility of crashing
|
||||
* exists while dumping the mm->vm_next areas to the core file.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The number of segs are recored into ELF header as 16bit value.
|
||||
* Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
|
||||
*/
|
||||
segs = current->mm->map_count;
|
||||
segs += elf_core_extra_phdrs();
|
||||
|
||||
gate_vma = get_gate_vma(current->mm);
|
||||
if (gate_vma != NULL)
|
||||
segs++;
|
||||
segs = vma_count + elf_core_extra_phdrs();
|
||||
|
||||
/* for notes section */
|
||||
segs++;
|
||||
@ -2343,24 +2207,6 @@ static int elf_core_dump(struct coredump_params *cprm)
|
||||
|
||||
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
|
||||
|
||||
/*
|
||||
* Zero vma process will get ZERO_SIZE_PTR here.
|
||||
* Let coredump continue for register state at least.
|
||||
*/
|
||||
vma_filesz = kvmalloc(array_size(sizeof(*vma_filesz), (segs - 1)),
|
||||
GFP_KERNEL);
|
||||
if (!vma_filesz)
|
||||
goto end_coredump;
|
||||
|
||||
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
|
||||
vma = next_vma(vma, gate_vma)) {
|
||||
unsigned long dump_size;
|
||||
|
||||
dump_size = vma_dump_size(vma, cprm->mm_flags);
|
||||
vma_filesz[i++] = dump_size;
|
||||
vma_data_size += dump_size;
|
||||
}
|
||||
|
||||
offset += vma_data_size;
|
||||
offset += elf_core_extra_data_size();
|
||||
e_shoff = offset;
|
||||
@ -2381,21 +2227,23 @@ static int elf_core_dump(struct coredump_params *cprm)
|
||||
goto end_coredump;
|
||||
|
||||
/* Write program headers for segments dump */
|
||||
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
|
||||
vma = next_vma(vma, gate_vma)) {
|
||||
for (i = 0; i < vma_count; i++) {
|
||||
struct core_vma_metadata *meta = vma_meta + i;
|
||||
struct elf_phdr phdr;
|
||||
|
||||
phdr.p_type = PT_LOAD;
|
||||
phdr.p_offset = offset;
|
||||
phdr.p_vaddr = vma->vm_start;
|
||||
phdr.p_vaddr = meta->start;
|
||||
phdr.p_paddr = 0;
|
||||
phdr.p_filesz = vma_filesz[i++];
|
||||
phdr.p_memsz = vma->vm_end - vma->vm_start;
|
||||
phdr.p_filesz = meta->dump_size;
|
||||
phdr.p_memsz = meta->end - meta->start;
|
||||
offset += phdr.p_filesz;
|
||||
phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
phdr.p_flags = 0;
|
||||
if (meta->flags & VM_READ)
|
||||
phdr.p_flags |= PF_R;
|
||||
if (meta->flags & VM_WRITE)
|
||||
phdr.p_flags |= PF_W;
|
||||
if (vma->vm_flags & VM_EXEC)
|
||||
if (meta->flags & VM_EXEC)
|
||||
phdr.p_flags |= PF_X;
|
||||
phdr.p_align = ELF_EXEC_PAGESIZE;
|
||||
|
||||
@ -2417,28 +2265,11 @@ static int elf_core_dump(struct coredump_params *cprm)
|
||||
if (!dump_skip(cprm, dataoff - cprm->pos))
|
||||
goto end_coredump;
|
||||
|
||||
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
|
||||
vma = next_vma(vma, gate_vma)) {
|
||||
unsigned long addr;
|
||||
unsigned long end;
|
||||
for (i = 0; i < vma_count; i++) {
|
||||
struct core_vma_metadata *meta = vma_meta + i;
|
||||
|
||||
end = vma->vm_start + vma_filesz[i++];
|
||||
|
||||
for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
|
||||
struct page *page;
|
||||
int stop;
|
||||
|
||||
page = get_dump_page(addr);
|
||||
if (page) {
|
||||
void *kaddr = kmap(page);
|
||||
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
} else
|
||||
stop = !dump_skip(cprm, PAGE_SIZE);
|
||||
if (stop)
|
||||
goto end_coredump;
|
||||
}
|
||||
if (!dump_user_range(cprm, meta->start, meta->dump_size))
|
||||
goto end_coredump;
|
||||
}
|
||||
dump_truncate(cprm);
|
||||
|
||||
@ -2453,7 +2284,7 @@ static int elf_core_dump(struct coredump_params *cprm)
|
||||
end_coredump:
|
||||
free_note_info(&info);
|
||||
kfree(shdr4extnum);
|
||||
kvfree(vma_filesz);
|
||||
kvfree(vma_meta);
|
||||
kfree(phdr4note);
|
||||
return has_dumped;
|
||||
}
|
||||
|
@ -1215,76 +1215,6 @@ struct elf_prstatus_fdpic
|
||||
int pr_fpvalid; /* True if math co-processor being used. */
|
||||
};
|
||||
|
||||
/*
|
||||
* Decide whether a segment is worth dumping; default is yes to be
|
||||
* sure (missing info is worse than too much; etc).
|
||||
* Personally I'd include everything, and use the coredump limit...
|
||||
*
|
||||
* I think we should skip something. But I am not sure how. H.J.
|
||||
*/
|
||||
static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
|
||||
{
|
||||
int dump_ok;
|
||||
|
||||
/* Do not dump I/O mapped devices or special mappings */
|
||||
if (vma->vm_flags & VM_IO) {
|
||||
kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If we may not read the contents, don't allow us to dump
|
||||
* them either. "dump_write()" can't handle it anyway.
|
||||
*/
|
||||
if (!(vma->vm_flags & VM_READ)) {
|
||||
kdcore("%08lx: %08lx: no (!read)", vma->vm_start, vma->vm_flags);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* support for DAX */
|
||||
if (vma_is_dax(vma)) {
|
||||
if (vma->vm_flags & VM_SHARED) {
|
||||
dump_ok = test_bit(MMF_DUMP_DAX_SHARED, &mm_flags);
|
||||
kdcore("%08lx: %08lx: %s (DAX shared)", vma->vm_start,
|
||||
vma->vm_flags, dump_ok ? "yes" : "no");
|
||||
} else {
|
||||
dump_ok = test_bit(MMF_DUMP_DAX_PRIVATE, &mm_flags);
|
||||
kdcore("%08lx: %08lx: %s (DAX private)", vma->vm_start,
|
||||
vma->vm_flags, dump_ok ? "yes" : "no");
|
||||
}
|
||||
return dump_ok;
|
||||
}
|
||||
|
||||
/* By default, dump shared memory if mapped from an anonymous file. */
|
||||
if (vma->vm_flags & VM_SHARED) {
|
||||
if (file_inode(vma->vm_file)->i_nlink == 0) {
|
||||
dump_ok = test_bit(MMF_DUMP_ANON_SHARED, &mm_flags);
|
||||
kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
|
||||
vma->vm_flags, dump_ok ? "yes" : "no");
|
||||
return dump_ok;
|
||||
}
|
||||
|
||||
dump_ok = test_bit(MMF_DUMP_MAPPED_SHARED, &mm_flags);
|
||||
kdcore("%08lx: %08lx: %s (share)", vma->vm_start,
|
||||
vma->vm_flags, dump_ok ? "yes" : "no");
|
||||
return dump_ok;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
/* By default, if it hasn't been written to, don't write it out */
|
||||
if (!vma->anon_vma) {
|
||||
dump_ok = test_bit(MMF_DUMP_MAPPED_PRIVATE, &mm_flags);
|
||||
kdcore("%08lx: %08lx: %s (!anon)", vma->vm_start,
|
||||
vma->vm_flags, dump_ok ? "yes" : "no");
|
||||
return dump_ok;
|
||||
}
|
||||
#endif
|
||||
|
||||
dump_ok = test_bit(MMF_DUMP_ANON_PRIVATE, &mm_flags);
|
||||
kdcore("%08lx: %08lx: %s", vma->vm_start, vma->vm_flags,
|
||||
dump_ok ? "yes" : "no");
|
||||
return dump_ok;
|
||||
}
|
||||
|
||||
/* An ELF note in memory */
|
||||
struct memelfnote
|
||||
{
|
||||
@ -1524,54 +1454,21 @@ static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
|
||||
/*
|
||||
* dump the segments for an MMU process
|
||||
*/
|
||||
static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
|
||||
static bool elf_fdpic_dump_segments(struct coredump_params *cprm,
|
||||
struct core_vma_metadata *vma_meta,
|
||||
int vma_count)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
int i;
|
||||
|
||||
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
|
||||
#ifdef CONFIG_MMU
|
||||
unsigned long addr;
|
||||
#endif
|
||||
for (i = 0; i < vma_count; i++) {
|
||||
struct core_vma_metadata *meta = vma_meta + i;
|
||||
|
||||
if (!maydump(vma, cprm->mm_flags))
|
||||
continue;
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
for (addr = vma->vm_start; addr < vma->vm_end;
|
||||
addr += PAGE_SIZE) {
|
||||
bool res;
|
||||
struct page *page = get_dump_page(addr);
|
||||
if (page) {
|
||||
void *kaddr = kmap(page);
|
||||
res = dump_emit(cprm, kaddr, PAGE_SIZE);
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
} else {
|
||||
res = dump_skip(cprm, PAGE_SIZE);
|
||||
}
|
||||
if (!res)
|
||||
return false;
|
||||
}
|
||||
#else
|
||||
if (!dump_emit(cprm, (void *) vma->vm_start,
|
||||
vma->vm_end - vma->vm_start))
|
||||
if (!dump_user_range(cprm, meta->start, meta->dump_size))
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static size_t elf_core_vma_data_size(unsigned long mm_flags)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
size_t size = 0;
|
||||
|
||||
for (vma = current->mm->mmap; vma; vma = vma->vm_next)
|
||||
if (maydump(vma, mm_flags))
|
||||
size += vma->vm_end - vma->vm_start;
|
||||
return size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Actual dumper
|
||||
*
|
||||
@ -1582,9 +1479,8 @@ static size_t elf_core_vma_data_size(unsigned long mm_flags)
|
||||
static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
||||
{
|
||||
int has_dumped = 0;
|
||||
int segs;
|
||||
int vma_count, segs;
|
||||
int i;
|
||||
struct vm_area_struct *vma;
|
||||
struct elfhdr *elf = NULL;
|
||||
loff_t offset = 0, dataoff;
|
||||
struct memelfnote psinfo_note, auxv_note;
|
||||
@ -1598,18 +1494,8 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
||||
elf_addr_t e_shoff;
|
||||
struct core_thread *ct;
|
||||
struct elf_thread_status *tmp;
|
||||
|
||||
/*
|
||||
* We no longer stop all VM operations.
|
||||
*
|
||||
* This is because those proceses that could possibly change map_count
|
||||
* or the mmap / vma pages are now blocked in do_exit on current
|
||||
* finishing this core dump.
|
||||
*
|
||||
* Only ptrace can touch these memory addresses, but it doesn't change
|
||||
* the map_count or the pages allocated. So no possibility of crashing
|
||||
* exists while dumping the mm->vm_next areas to the core file.
|
||||
*/
|
||||
struct core_vma_metadata *vma_meta = NULL;
|
||||
size_t vma_data_size;
|
||||
|
||||
/* alloc memory for large data structures: too large to be on stack */
|
||||
elf = kmalloc(sizeof(*elf), GFP_KERNEL);
|
||||
@ -1619,6 +1505,9 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
||||
if (!psinfo)
|
||||
goto end_coredump;
|
||||
|
||||
if (dump_vma_snapshot(cprm, &vma_count, &vma_meta, &vma_data_size))
|
||||
goto end_coredump;
|
||||
|
||||
for (ct = current->mm->core_state->dumper.next;
|
||||
ct; ct = ct->next) {
|
||||
tmp = elf_dump_thread_status(cprm->siginfo->si_signo,
|
||||
@ -1638,8 +1527,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
||||
tmp->next = thread_list;
|
||||
thread_list = tmp;
|
||||
|
||||
segs = current->mm->map_count;
|
||||
segs += elf_core_extra_phdrs();
|
||||
segs = vma_count + elf_core_extra_phdrs();
|
||||
|
||||
/* for notes section */
|
||||
segs++;
|
||||
@ -1684,7 +1572,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
||||
/* Page-align dumped data */
|
||||
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
|
||||
|
||||
offset += elf_core_vma_data_size(cprm->mm_flags);
|
||||
offset += vma_data_size;
|
||||
offset += elf_core_extra_data_size();
|
||||
e_shoff = offset;
|
||||
|
||||
@ -1704,23 +1592,26 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
||||
goto end_coredump;
|
||||
|
||||
/* write program headers for segments dump */
|
||||
for (vma = current->mm->mmap; vma; vma = vma->vm_next) {
|
||||
for (i = 0; i < vma_count; i++) {
|
||||
struct core_vma_metadata *meta = vma_meta + i;
|
||||
struct elf_phdr phdr;
|
||||
size_t sz;
|
||||
|
||||
sz = vma->vm_end - vma->vm_start;
|
||||
sz = meta->end - meta->start;
|
||||
|
||||
phdr.p_type = PT_LOAD;
|
||||
phdr.p_offset = offset;
|
||||
phdr.p_vaddr = vma->vm_start;
|
||||
phdr.p_vaddr = meta->start;
|
||||
phdr.p_paddr = 0;
|
||||
phdr.p_filesz = maydump(vma, cprm->mm_flags) ? sz : 0;
|
||||
phdr.p_filesz = meta->dump_size;
|
||||
phdr.p_memsz = sz;
|
||||
offset += phdr.p_filesz;
|
||||
phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
phdr.p_flags = 0;
|
||||
if (meta->flags & VM_READ)
|
||||
phdr.p_flags |= PF_R;
|
||||
if (meta->flags & VM_WRITE)
|
||||
phdr.p_flags |= PF_W;
|
||||
if (vma->vm_flags & VM_EXEC)
|
||||
if (meta->flags & VM_EXEC)
|
||||
phdr.p_flags |= PF_X;
|
||||
phdr.p_align = ELF_EXEC_PAGESIZE;
|
||||
|
||||
@ -1752,7 +1643,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
|
||||
if (!dump_skip(cprm, dataoff - cprm->pos))
|
||||
goto end_coredump;
|
||||
|
||||
if (!elf_fdpic_dump_segments(cprm))
|
||||
if (!elf_fdpic_dump_segments(cprm, vma_meta, vma_count))
|
||||
goto end_coredump;
|
||||
|
||||
if (!elf_core_write_extra_data(cprm))
|
||||
@ -1776,6 +1667,7 @@ end_coredump:
|
||||
thread_list = thread_list->next;
|
||||
kfree(tmp);
|
||||
}
|
||||
kvfree(vma_meta);
|
||||
kfree(phdr4note);
|
||||
kfree(elf);
|
||||
kfree(psinfo);
|
||||
|
@ -1168,7 +1168,7 @@ EXPORT_SYMBOL(configfs_depend_item);
|
||||
|
||||
/*
|
||||
* Release the dependent linkage. This is much simpler than
|
||||
* configfs_depend_item() because we know that that the client driver is
|
||||
* configfs_depend_item() because we know that the client driver is
|
||||
* pinned, thus the subsystem is pinned, and therefore configfs is pinned.
|
||||
*/
|
||||
void configfs_undepend_item(struct config_item *target)
|
||||
|
@ -267,7 +267,7 @@ flush_write_buffer(struct file *file, struct configfs_buffer *buffer, size_t cou
|
||||
* There is no easy way for us to know if userspace is only doing a partial
|
||||
* write, so we don't support them. We expect the entire buffer to come
|
||||
* on the first write.
|
||||
* Hint: if you're writing a value, first read the file, modify only the
|
||||
* Hint: if you're writing a value, first read the file, modify only
|
||||
* the value you're changing, then write entire buffer back.
|
||||
*/
|
||||
|
||||
|
236
fs/coredump.c
236
fs/coredump.c
@ -840,17 +840,17 @@ int dump_emit(struct coredump_params *cprm, const void *addr, int nr)
|
||||
ssize_t n;
|
||||
if (cprm->written + nr > cprm->limit)
|
||||
return 0;
|
||||
while (nr) {
|
||||
if (dump_interrupted())
|
||||
return 0;
|
||||
n = __kernel_write(file, addr, nr, &pos);
|
||||
if (n <= 0)
|
||||
return 0;
|
||||
file->f_pos = pos;
|
||||
cprm->written += n;
|
||||
cprm->pos += n;
|
||||
nr -= n;
|
||||
}
|
||||
|
||||
|
||||
if (dump_interrupted())
|
||||
return 0;
|
||||
n = __kernel_write(file, addr, nr, &pos);
|
||||
if (n != nr)
|
||||
return 0;
|
||||
file->f_pos = pos;
|
||||
cprm->written += n;
|
||||
cprm->pos += n;
|
||||
|
||||
return 1;
|
||||
}
|
||||
EXPORT_SYMBOL(dump_emit);
|
||||
@ -876,6 +876,40 @@ int dump_skip(struct coredump_params *cprm, size_t nr)
|
||||
}
|
||||
EXPORT_SYMBOL(dump_skip);
|
||||
|
||||
#ifdef CONFIG_ELF_CORE
|
||||
int dump_user_range(struct coredump_params *cprm, unsigned long start,
|
||||
unsigned long len)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
for (addr = start; addr < start + len; addr += PAGE_SIZE) {
|
||||
struct page *page;
|
||||
int stop;
|
||||
|
||||
/*
|
||||
* To avoid having to allocate page tables for virtual address
|
||||
* ranges that have never been used yet, and also to make it
|
||||
* easy to generate sparse core files, use a helper that returns
|
||||
* NULL when encountering an empty page table entry that would
|
||||
* otherwise have been filled with the zero page.
|
||||
*/
|
||||
page = get_dump_page(addr);
|
||||
if (page) {
|
||||
void *kaddr = kmap(page);
|
||||
|
||||
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
|
||||
kunmap(page);
|
||||
put_page(page);
|
||||
} else {
|
||||
stop = !dump_skip(cprm, PAGE_SIZE);
|
||||
}
|
||||
if (stop)
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
int dump_align(struct coredump_params *cprm, int align)
|
||||
{
|
||||
unsigned mod = cprm->pos & (align - 1);
|
||||
@ -902,3 +936,183 @@ void dump_truncate(struct coredump_params *cprm)
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(dump_truncate);
|
||||
|
||||
/*
|
||||
* The purpose of always_dump_vma() is to make sure that special kernel mappings
|
||||
* that are useful for post-mortem analysis are included in every core dump.
|
||||
* In that way we ensure that the core dump is fully interpretable later
|
||||
* without matching up the same kernel and hardware config to see what PC values
|
||||
* meant. These special mappings include - vDSO, vsyscall, and other
|
||||
* architecture specific mappings
|
||||
*/
|
||||
static bool always_dump_vma(struct vm_area_struct *vma)
|
||||
{
|
||||
/* Any vsyscall mappings? */
|
||||
if (vma == get_gate_vma(vma->vm_mm))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Assume that all vmas with a .name op should always be dumped.
|
||||
* If this changes, a new vm_ops field can easily be added.
|
||||
*/
|
||||
if (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma))
|
||||
return true;
|
||||
|
||||
/*
|
||||
* arch_vma_name() returns non-NULL for special architecture mappings,
|
||||
* such as vDSO sections.
|
||||
*/
|
||||
if (arch_vma_name(vma))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decide how much of @vma's contents should be included in a core dump.
|
||||
*/
|
||||
static unsigned long vma_dump_size(struct vm_area_struct *vma,
|
||||
unsigned long mm_flags)
|
||||
{
|
||||
#define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type))
|
||||
|
||||
/* always dump the vdso and vsyscall sections */
|
||||
if (always_dump_vma(vma))
|
||||
goto whole;
|
||||
|
||||
if (vma->vm_flags & VM_DONTDUMP)
|
||||
return 0;
|
||||
|
||||
/* support for DAX */
|
||||
if (vma_is_dax(vma)) {
|
||||
if ((vma->vm_flags & VM_SHARED) && FILTER(DAX_SHARED))
|
||||
goto whole;
|
||||
if (!(vma->vm_flags & VM_SHARED) && FILTER(DAX_PRIVATE))
|
||||
goto whole;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Hugetlb memory check */
|
||||
if (is_vm_hugetlb_page(vma)) {
|
||||
if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
|
||||
goto whole;
|
||||
if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
|
||||
goto whole;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Do not dump I/O mapped devices or special mappings */
|
||||
if (vma->vm_flags & VM_IO)
|
||||
return 0;
|
||||
|
||||
/* By default, dump shared memory if mapped from an anonymous file. */
|
||||
if (vma->vm_flags & VM_SHARED) {
|
||||
if (file_inode(vma->vm_file)->i_nlink == 0 ?
|
||||
FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
|
||||
goto whole;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Dump segments that have been written to. */
|
||||
if ((!IS_ENABLED(CONFIG_MMU) || vma->anon_vma) && FILTER(ANON_PRIVATE))
|
||||
goto whole;
|
||||
if (vma->vm_file == NULL)
|
||||
return 0;
|
||||
|
||||
if (FILTER(MAPPED_PRIVATE))
|
||||
goto whole;
|
||||
|
||||
/*
|
||||
* If this is the beginning of an executable file mapping,
|
||||
* dump the first page to aid in determining what was mapped here.
|
||||
*/
|
||||
if (FILTER(ELF_HEADERS) &&
|
||||
vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ) &&
|
||||
(READ_ONCE(file_inode(vma->vm_file)->i_mode) & 0111) != 0)
|
||||
return PAGE_SIZE;
|
||||
|
||||
#undef FILTER
|
||||
|
||||
return 0;
|
||||
|
||||
whole:
|
||||
return vma->vm_end - vma->vm_start;
|
||||
}
|
||||
|
||||
static struct vm_area_struct *first_vma(struct task_struct *tsk,
|
||||
struct vm_area_struct *gate_vma)
|
||||
{
|
||||
struct vm_area_struct *ret = tsk->mm->mmap;
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
return gate_vma;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper function for iterating across a vma list. It ensures that the caller
|
||||
* will visit `gate_vma' prior to terminating the search.
|
||||
*/
|
||||
static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
|
||||
struct vm_area_struct *gate_vma)
|
||||
{
|
||||
struct vm_area_struct *ret;
|
||||
|
||||
ret = this_vma->vm_next;
|
||||
if (ret)
|
||||
return ret;
|
||||
if (this_vma == gate_vma)
|
||||
return NULL;
|
||||
return gate_vma;
|
||||
}
|
||||
|
||||
/*
|
||||
* Under the mmap_lock, take a snapshot of relevant information about the task's
|
||||
* VMAs.
|
||||
*/
|
||||
int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
|
||||
struct core_vma_metadata **vma_meta,
|
||||
size_t *vma_data_size_ptr)
|
||||
{
|
||||
struct vm_area_struct *vma, *gate_vma;
|
||||
struct mm_struct *mm = current->mm;
|
||||
int i;
|
||||
size_t vma_data_size = 0;
|
||||
|
||||
/*
|
||||
* Once the stack expansion code is fixed to not change VMA bounds
|
||||
* under mmap_lock in read mode, this can be changed to take the
|
||||
* mmap_lock in read mode.
|
||||
*/
|
||||
if (mmap_write_lock_killable(mm))
|
||||
return -EINTR;
|
||||
|
||||
gate_vma = get_gate_vma(mm);
|
||||
*vma_count = mm->map_count + (gate_vma ? 1 : 0);
|
||||
|
||||
*vma_meta = kvmalloc_array(*vma_count, sizeof(**vma_meta), GFP_KERNEL);
|
||||
if (!*vma_meta) {
|
||||
mmap_write_unlock(mm);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (i = 0, vma = first_vma(current, gate_vma); vma != NULL;
|
||||
vma = next_vma(vma, gate_vma), i++) {
|
||||
struct core_vma_metadata *m = (*vma_meta) + i;
|
||||
|
||||
m->start = vma->vm_start;
|
||||
m->end = vma->vm_end;
|
||||
m->flags = vma->vm_flags;
|
||||
m->dump_size = vma_dump_size(vma, cprm->mm_flags);
|
||||
|
||||
vma_data_size += m->dump_size;
|
||||
}
|
||||
|
||||
mmap_write_unlock(mm);
|
||||
|
||||
if (WARN_ON(i != *vma_count))
|
||||
return -EFAULT;
|
||||
|
||||
*vma_data_size_ptr = vma_data_size;
|
||||
return 0;
|
||||
}
|
||||
|
@ -349,6 +349,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
|
||||
pgoff_t index,
|
||||
unsigned long num_ra_pages)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
|
||||
struct page *page;
|
||||
|
||||
index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
|
||||
@ -358,8 +359,7 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode,
|
||||
if (page)
|
||||
put_page(page);
|
||||
else if (num_ra_pages > 1)
|
||||
page_cache_readahead_unbounded(inode->i_mapping, NULL,
|
||||
index, num_ra_pages, 0);
|
||||
page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
|
||||
page = read_mapping_page(inode->i_mapping, index, NULL);
|
||||
}
|
||||
return page;
|
||||
|
@ -228,6 +228,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
|
||||
pgoff_t index,
|
||||
unsigned long num_ra_pages)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, index);
|
||||
struct page *page;
|
||||
|
||||
index += f2fs_verity_metadata_pos(inode) >> PAGE_SHIFT;
|
||||
@ -237,8 +238,7 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode,
|
||||
if (page)
|
||||
put_page(page);
|
||||
else if (num_ra_pages > 1)
|
||||
page_cache_readahead_unbounded(inode->i_mapping, NULL,
|
||||
index, num_ra_pages, 0);
|
||||
page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
|
||||
page = read_mapping_page(inode->i_mapping, index, NULL);
|
||||
}
|
||||
return page;
|
||||
|
@ -181,6 +181,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
|
||||
mapping->a_ops = &empty_aops;
|
||||
mapping->host = inode;
|
||||
mapping->flags = 0;
|
||||
if (sb->s_type->fs_flags & FS_THP_SUPPORT)
|
||||
__set_bit(AS_THP_SUPPORT, &mapping->flags);
|
||||
mapping->wb_err = 0;
|
||||
atomic_set(&mapping->i_mmap_writable, 0);
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
|
@ -355,7 +355,7 @@ void nilfs_bmap_lookup_dirty_buffers(struct nilfs_bmap *bmap,
|
||||
/**
|
||||
* nilfs_bmap_assign - assign a new block number to a block
|
||||
* @bmap: bmap
|
||||
* @bhp: pointer to buffer head
|
||||
* @bh: pointer to buffer head
|
||||
* @blocknr: block number
|
||||
* @binfo: block information
|
||||
*
|
||||
|
@ -889,7 +889,7 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno)
|
||||
* nilfs_cpfile_change_cpmode - change checkpoint mode
|
||||
* @cpfile: inode of checkpoint file
|
||||
* @cno: checkpoint number
|
||||
* @status: mode of checkpoint
|
||||
* @mode: mode of checkpoint
|
||||
*
|
||||
* Description: nilfs_change_cpmode() changes the mode of the checkpoint
|
||||
* specified by @cno. The mode @mode is NILFS_CHECKPOINT or NILFS_SNAPSHOT.
|
||||
@ -930,12 +930,12 @@ int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode)
|
||||
/**
|
||||
* nilfs_cpfile_get_stat - get checkpoint statistics
|
||||
* @cpfile: inode of checkpoint file
|
||||
* @stat: pointer to a structure of checkpoint statistics
|
||||
* @cpstat: pointer to a structure of checkpoint statistics
|
||||
*
|
||||
* Description: nilfs_cpfile_get_stat() returns information about checkpoints.
|
||||
*
|
||||
* Return Value: On success, 0 is returned, and checkpoints information is
|
||||
* stored in the place pointed by @stat. On error, one of the following
|
||||
* stored in the place pointed by @cpstat. On error, one of the following
|
||||
* negative error codes is returned.
|
||||
*
|
||||
* %-EIO - I/O error.
|
||||
|
@ -69,7 +69,6 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
|
||||
|
||||
/**
|
||||
* nilfs_forget_buffer - discard dirty state
|
||||
* @inode: owner inode of the buffer
|
||||
* @bh: buffer head of the buffer to be discarded
|
||||
*/
|
||||
void nilfs_forget_buffer(struct buffer_head *bh)
|
||||
|
@ -546,13 +546,13 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum,
|
||||
/**
|
||||
* nilfs_sufile_get_stat - get segment usage statistics
|
||||
* @sufile: inode of segment usage file
|
||||
* @stat: pointer to a structure of segment usage statistics
|
||||
* @sustat: pointer to a structure of segment usage statistics
|
||||
*
|
||||
* Description: nilfs_sufile_get_stat() returns information about segment
|
||||
* usage.
|
||||
*
|
||||
* Return Value: On success, 0 is returned, and segment usage information is
|
||||
* stored in the place pointed by @stat. On error, one of the following
|
||||
* stored in the place pointed by @sustat. On error, one of the following
|
||||
* negative error codes is returned.
|
||||
*
|
||||
* %-EIO - I/O error.
|
||||
|
@ -1244,24 +1244,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
|
||||
count = -EINTR;
|
||||
goto out_mm;
|
||||
}
|
||||
/*
|
||||
* Avoid to modify vma->vm_flags
|
||||
* without locked ops while the
|
||||
* coredump reads the vm_flags.
|
||||
*/
|
||||
if (!mmget_still_valid(mm)) {
|
||||
/*
|
||||
* Silently return "count"
|
||||
* like if get_task_mm()
|
||||
* failed. FIXME: should this
|
||||
* function have returned
|
||||
* -ESRCH if get_task_mm()
|
||||
* failed like if
|
||||
* get_proc_task() fails?
|
||||
*/
|
||||
mmap_write_unlock(mm);
|
||||
goto out_mm;
|
||||
}
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
vma->vm_flags &= ~VM_SOFTDIRTY;
|
||||
vma_set_page_prot(vma);
|
||||
|
@ -224,7 +224,7 @@ static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
|
||||
if (!pages)
|
||||
goto out_free;
|
||||
|
||||
nr = find_get_pages(inode->i_mapping, &pgoff, lpages, pages);
|
||||
nr = find_get_pages_contig(inode->i_mapping, pgoff, lpages, pages);
|
||||
if (nr != lpages)
|
||||
goto out_free_pages; /* leave if some pages were missing */
|
||||
|
||||
|
@ -356,6 +356,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
|
||||
}
|
||||
|
||||
i->i_mode = mode;
|
||||
i->i_blocks = (i->i_size + 511) >> 9;
|
||||
|
||||
unlock_new_inode(i);
|
||||
return i;
|
||||
|
@ -601,8 +601,6 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
|
||||
|
||||
/* the various vma->vm_userfaultfd_ctx still points to it */
|
||||
mmap_write_lock(mm);
|
||||
/* no task can run (and in turn coredump) yet */
|
||||
VM_WARN_ON(!mmget_still_valid(mm));
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next)
|
||||
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
|
||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||
@ -842,7 +840,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||
/* len == 0 means wake all */
|
||||
struct userfaultfd_wake_range range = { .len = 0, };
|
||||
unsigned long new_flags;
|
||||
bool still_valid;
|
||||
|
||||
WRITE_ONCE(ctx->released, true);
|
||||
|
||||
@ -858,7 +855,6 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||
* taking the mmap_lock for writing.
|
||||
*/
|
||||
mmap_write_lock(mm);
|
||||
still_valid = mmget_still_valid(mm);
|
||||
prev = NULL;
|
||||
for (vma = mm->mmap; vma; vma = vma->vm_next) {
|
||||
cond_resched();
|
||||
@ -869,17 +865,15 @@ static int userfaultfd_release(struct inode *inode, struct file *file)
|
||||
continue;
|
||||
}
|
||||
new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
|
||||
if (still_valid) {
|
||||
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
|
||||
new_flags, vma->anon_vma,
|
||||
vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
NULL_VM_UFFD_CTX);
|
||||
if (prev)
|
||||
vma = prev;
|
||||
else
|
||||
prev = vma;
|
||||
}
|
||||
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
|
||||
new_flags, vma->anon_vma,
|
||||
vma->vm_file, vma->vm_pgoff,
|
||||
vma_policy(vma),
|
||||
NULL_VM_UFFD_CTX);
|
||||
if (prev)
|
||||
vma = prev;
|
||||
else
|
||||
prev = vma;
|
||||
vma->vm_flags = new_flags;
|
||||
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
|
||||
}
|
||||
@ -1309,8 +1303,6 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx,
|
||||
goto out;
|
||||
|
||||
mmap_write_lock(mm);
|
||||
if (!mmget_still_valid(mm))
|
||||
goto out_unlock;
|
||||
vma = find_vma_prev(mm, start, &prev);
|
||||
if (!vma)
|
||||
goto out_unlock;
|
||||
@ -1511,8 +1503,6 @@ static int userfaultfd_unregister(struct userfaultfd_ctx *ctx,
|
||||
goto out;
|
||||
|
||||
mmap_write_lock(mm);
|
||||
if (!mmget_still_valid(mm))
|
||||
goto out_unlock;
|
||||
vma = find_vma_prev(mm, start, &prev);
|
||||
if (!vma)
|
||||
goto out_unlock;
|
||||
|
@ -188,12 +188,10 @@ static inline unsigned fls_long(unsigned long l)
|
||||
|
||||
static inline int get_count_order(unsigned int count)
|
||||
{
|
||||
int order;
|
||||
if (count == 0)
|
||||
return -1;
|
||||
|
||||
order = fls(count) - 1;
|
||||
if (count & (count - 1))
|
||||
order++;
|
||||
return order;
|
||||
return fls(--count);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -206,10 +204,7 @@ static inline int get_count_order_long(unsigned long l)
|
||||
{
|
||||
if (l == 0UL)
|
||||
return -1;
|
||||
else if (l & (l - 1UL))
|
||||
return (int)fls_long(l);
|
||||
else
|
||||
return (int)fls_long(l) - 1;
|
||||
return (int)fls_long(--l);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/llist.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
@ -7,10 +7,14 @@
|
||||
#ifndef __LINUX_BVEC_ITER_H
|
||||
#define __LINUX_BVEC_ITER_H
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct page;
|
||||
|
||||
/**
|
||||
* struct bio_vec - a contiguous range of physical memory addresses
|
||||
|
@ -7,6 +7,12 @@
|
||||
#include <linux/fs.h>
|
||||
#include <asm/siginfo.h>
|
||||
|
||||
struct core_vma_metadata {
|
||||
unsigned long start, end;
|
||||
unsigned long flags;
|
||||
unsigned long dump_size;
|
||||
};
|
||||
|
||||
/*
|
||||
* These are the only things you should do on a core-file: use only these
|
||||
* functions to write out all the necessary info.
|
||||
@ -16,6 +22,11 @@ extern int dump_skip(struct coredump_params *cprm, size_t nr);
|
||||
extern int dump_emit(struct coredump_params *cprm, const void *addr, int nr);
|
||||
extern int dump_align(struct coredump_params *cprm, int align);
|
||||
extern void dump_truncate(struct coredump_params *cprm);
|
||||
int dump_user_range(struct coredump_params *cprm, unsigned long start,
|
||||
unsigned long len);
|
||||
int dump_vma_snapshot(struct coredump_params *cprm, int *vma_count,
|
||||
struct core_vma_metadata **vma_meta,
|
||||
size_t *vma_data_size_ptr);
|
||||
#ifdef CONFIG_COREDUMP
|
||||
extern void do_coredump(const kernel_siginfo_t *siginfo);
|
||||
#else
|
||||
|
22
include/linux/fault-inject-usercopy.h
Normal file
22
include/linux/fault-inject-usercopy.h
Normal file
@ -0,0 +1,22 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __LINUX_FAULT_INJECT_USERCOPY_H__
|
||||
#define __LINUX_FAULT_INJECT_USERCOPY_H__
|
||||
|
||||
/*
|
||||
* This header provides a wrapper for injecting failures to user space memory
|
||||
* access functions.
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_USERCOPY
|
||||
|
||||
bool should_fail_usercopy(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline bool should_fail_usercopy(void) { return false; }
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION_USERCOPY */
|
||||
|
||||
#endif /* __LINUX_FAULT_INJECT_USERCOPY_H__ */
|
@ -2209,6 +2209,7 @@ struct file_system_type {
|
||||
#define FS_HAS_SUBTYPE 4
|
||||
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
|
||||
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
|
||||
#define FS_THP_SUPPORT 8192 /* Remove once all fs converted */
|
||||
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
|
||||
int (*init_fs_context)(struct fs_context *);
|
||||
const struct fs_parameter_spec *parameters;
|
||||
@ -2696,33 +2697,6 @@ static inline errseq_t file_sample_sb_err(struct file *file)
|
||||
return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err);
|
||||
}
|
||||
|
||||
static inline int filemap_nr_thps(struct address_space *mapping)
|
||||
{
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
return atomic_read(&mapping->nr_thps);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void filemap_nr_thps_inc(struct address_space *mapping)
|
||||
{
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
atomic_inc(&mapping->nr_thps);
|
||||
#else
|
||||
WARN_ON_ONCE(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void filemap_nr_thps_dec(struct address_space *mapping)
|
||||
{
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
atomic_dec(&mapping->nr_thps);
|
||||
#else
|
||||
WARN_ON_ONCE(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end,
|
||||
int datasync);
|
||||
extern int vfs_fsync(struct file *file, int datasync);
|
||||
|
@ -263,7 +263,8 @@ void ida_destroy(struct ida *ida);
|
||||
*
|
||||
* Allocate an ID between 0 and %INT_MAX, inclusive.
|
||||
*
|
||||
* Context: Any context.
|
||||
* Context: Any context. It is safe to call this function without
|
||||
* locking in your code.
|
||||
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
|
||||
* or %-ENOSPC if there are no free IDs.
|
||||
*/
|
||||
@ -280,7 +281,8 @@ static inline int ida_alloc(struct ida *ida, gfp_t gfp)
|
||||
*
|
||||
* Allocate an ID between @min and %INT_MAX, inclusive.
|
||||
*
|
||||
* Context: Any context.
|
||||
* Context: Any context. It is safe to call this function without
|
||||
* locking in your code.
|
||||
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
|
||||
* or %-ENOSPC if there are no free IDs.
|
||||
*/
|
||||
@ -297,7 +299,8 @@ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
|
||||
*
|
||||
* Allocate an ID between 0 and @max, inclusive.
|
||||
*
|
||||
* Context: Any context.
|
||||
* Context: Any context. It is safe to call this function without
|
||||
* locking in your code.
|
||||
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
|
||||
* or %-ENOSPC if there are no free IDs.
|
||||
*/
|
||||
@ -311,6 +314,10 @@ static inline void ida_init(struct ida *ida)
|
||||
xa_init_flags(&ida->xa, IDA_INIT_FLAGS);
|
||||
}
|
||||
|
||||
/*
|
||||
* ida_simple_get() and ida_simple_remove() are deprecated. Use
|
||||
* ida_alloc() and ida_free() instead respectively.
|
||||
*/
|
||||
#define ida_simple_get(ida, start, end, gfp) \
|
||||
ida_alloc_range(ida, start, (end) - 1, gfp)
|
||||
#define ida_simple_remove(ida, id) ida_free(ida, id)
|
||||
|
@ -58,6 +58,10 @@ struct resource {
|
||||
#define IORESOURCE_EXT_TYPE_BITS 0x01000000 /* Resource extended types */
|
||||
#define IORESOURCE_SYSRAM 0x01000000 /* System RAM (modifier) */
|
||||
|
||||
/* IORESOURCE_SYSRAM specific bits. */
|
||||
#define IORESOURCE_SYSRAM_DRIVER_MANAGED 0x02000000 /* Always detected via a driver. */
|
||||
#define IORESOURCE_SYSRAM_MERGEABLE 0x04000000 /* Resource can be merged. */
|
||||
|
||||
#define IORESOURCE_EXCLUSIVE 0x08000000 /* Userland may not map this resource */
|
||||
|
||||
#define IORESOURCE_DISABLED 0x10000000
|
||||
@ -103,7 +107,6 @@ struct resource {
|
||||
#define IORESOURCE_MEM_32BIT (3<<3)
|
||||
#define IORESOURCE_MEM_SHADOWABLE (1<<5) /* dup: IORESOURCE_SHADOWABLE */
|
||||
#define IORESOURCE_MEM_EXPANSIONROM (1<<6)
|
||||
#define IORESOURCE_MEM_DRIVER_MANAGED (1<<7)
|
||||
|
||||
/* PnP I/O specific bits (IORESOURCE_BITS) */
|
||||
#define IORESOURCE_IO_16BIT_ADDR (1<<0)
|
||||
@ -248,8 +251,10 @@ extern struct resource * __request_region(struct resource *,
|
||||
extern void __release_region(struct resource *, resource_size_t,
|
||||
resource_size_t);
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
extern int release_mem_region_adjustable(struct resource *, resource_size_t,
|
||||
resource_size_t);
|
||||
extern void release_mem_region_adjustable(resource_size_t, resource_size_t);
|
||||
#endif
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
extern void merge_system_ram_resource(struct resource *res);
|
||||
#endif
|
||||
|
||||
/* Wrappers for managed devices */
|
||||
|
@ -3,8 +3,9 @@
|
||||
#define _LINUX_JIFFIES_H
|
||||
|
||||
#include <linux/cache.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/timex.h>
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/typecheck.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/build_bug.h>
|
||||
@ -833,155 +834,6 @@ ftrace_vprintk(const char *fmt, va_list ap)
|
||||
static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
|
||||
#endif /* CONFIG_TRACING */
|
||||
|
||||
/*
|
||||
* min()/max()/clamp() macros must accomplish three things:
|
||||
*
|
||||
* - avoid multiple evaluations of the arguments (so side-effects like
|
||||
* "x++" happen only once) when non-constant.
|
||||
* - perform strict type-checking (to generate warnings instead of
|
||||
* nasty runtime surprises). See the "unnecessary" pointer comparison
|
||||
* in __typecheck().
|
||||
* - retain result as a constant expressions when called with only
|
||||
* constant expressions (to avoid tripping VLA warnings in stack
|
||||
* allocation usage).
|
||||
*/
|
||||
#define __typecheck(x, y) \
|
||||
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
|
||||
|
||||
/*
|
||||
* This returns a constant expression while determining if an argument is
|
||||
* a constant expression, most importantly without evaluating the argument.
|
||||
* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
|
||||
*/
|
||||
#define __is_constexpr(x) \
|
||||
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
|
||||
|
||||
#define __no_side_effects(x, y) \
|
||||
(__is_constexpr(x) && __is_constexpr(y))
|
||||
|
||||
#define __safe_cmp(x, y) \
|
||||
(__typecheck(x, y) && __no_side_effects(x, y))
|
||||
|
||||
#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
|
||||
|
||||
#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
|
||||
typeof(x) unique_x = (x); \
|
||||
typeof(y) unique_y = (y); \
|
||||
__cmp(unique_x, unique_y, op); })
|
||||
|
||||
#define __careful_cmp(x, y, op) \
|
||||
__builtin_choose_expr(__safe_cmp(x, y), \
|
||||
__cmp(x, y, op), \
|
||||
__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
|
||||
|
||||
/**
|
||||
* min - return minimum of two values of the same or compatible types
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define min(x, y) __careful_cmp(x, y, <)
|
||||
|
||||
/**
|
||||
* max - return maximum of two values of the same or compatible types
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define max(x, y) __careful_cmp(x, y, >)
|
||||
|
||||
/**
|
||||
* min3 - return minimum of three values
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
* @z: third value
|
||||
*/
|
||||
#define min3(x, y, z) min((typeof(x))min(x, y), z)
|
||||
|
||||
/**
|
||||
* max3 - return maximum of three values
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
* @z: third value
|
||||
*/
|
||||
#define max3(x, y, z) max((typeof(x))max(x, y), z)
|
||||
|
||||
/**
|
||||
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
|
||||
* @x: value1
|
||||
* @y: value2
|
||||
*/
|
||||
#define min_not_zero(x, y) ({ \
|
||||
typeof(x) __x = (x); \
|
||||
typeof(y) __y = (y); \
|
||||
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
|
||||
|
||||
/**
|
||||
* clamp - return a value clamped to a given range with strict typechecking
|
||||
* @val: current value
|
||||
* @lo: lowest allowable value
|
||||
* @hi: highest allowable value
|
||||
*
|
||||
* This macro does strict typechecking of @lo/@hi to make sure they are of the
|
||||
* same type as @val. See the unnecessary pointer comparisons.
|
||||
*/
|
||||
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
|
||||
|
||||
/*
|
||||
* ..and if you can't take the strict
|
||||
* types, you can specify one yourself.
|
||||
*
|
||||
* Or not use min/max/clamp at all, of course.
|
||||
*/
|
||||
|
||||
/**
|
||||
* min_t - return minimum of two values, using the specified type
|
||||
* @type: data type to use
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
|
||||
|
||||
/**
|
||||
* max_t - return maximum of two values, using the specified type
|
||||
* @type: data type to use
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
|
||||
|
||||
/**
|
||||
* clamp_t - return a value clamped to a given range using a given type
|
||||
* @type: the type of variable to use
|
||||
* @val: current value
|
||||
* @lo: minimum allowable value
|
||||
* @hi: maximum allowable value
|
||||
*
|
||||
* This macro does no typechecking and uses temporary variables of type
|
||||
* @type to make all the comparisons.
|
||||
*/
|
||||
#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
|
||||
|
||||
/**
|
||||
* clamp_val - return a value clamped to a given range using val's type
|
||||
* @val: current value
|
||||
* @lo: minimum allowable value
|
||||
* @hi: maximum allowable value
|
||||
*
|
||||
* This macro does no typechecking and uses temporary variables of whatever
|
||||
* type the input argument @val is. This is useful when @val is an unsigned
|
||||
* type and @lo and @hi are literals that will otherwise be assigned a signed
|
||||
* integer type.
|
||||
*/
|
||||
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
|
||||
|
||||
|
||||
/**
|
||||
* swap - swap values of @a and @b
|
||||
* @a: first value
|
||||
* @b: second value
|
||||
*/
|
||||
#define swap(a, b) \
|
||||
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
|
||||
|
||||
/* This counts to 12. Any more, it will return 13th argument. */
|
||||
#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
|
||||
#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
|
||||
|
@ -609,6 +609,15 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
pos != (head); \
|
||||
pos = n, n = pos->prev)
|
||||
|
||||
/**
|
||||
* list_entry_is_head - test if the entry points to the head of the list
|
||||
* @pos: the type * to cursor
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_head within the struct.
|
||||
*/
|
||||
#define list_entry_is_head(pos, head, member) \
|
||||
(&pos->member == (head))
|
||||
|
||||
/**
|
||||
* list_for_each_entry - iterate over list of given type
|
||||
* @pos: the type * to use as a loop cursor.
|
||||
@ -617,7 +626,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
*/
|
||||
#define list_for_each_entry(pos, head, member) \
|
||||
for (pos = list_first_entry(head, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = list_next_entry(pos, member))
|
||||
|
||||
/**
|
||||
@ -628,7 +637,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
*/
|
||||
#define list_for_each_entry_reverse(pos, head, member) \
|
||||
for (pos = list_last_entry(head, typeof(*pos), member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = list_prev_entry(pos, member))
|
||||
|
||||
/**
|
||||
@ -653,7 +662,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
*/
|
||||
#define list_for_each_entry_continue(pos, head, member) \
|
||||
for (pos = list_next_entry(pos, member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = list_next_entry(pos, member))
|
||||
|
||||
/**
|
||||
@ -667,7 +676,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
*/
|
||||
#define list_for_each_entry_continue_reverse(pos, head, member) \
|
||||
for (pos = list_prev_entry(pos, member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = list_prev_entry(pos, member))
|
||||
|
||||
/**
|
||||
@ -679,7 +688,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
* Iterate over list of given type, continuing from current position.
|
||||
*/
|
||||
#define list_for_each_entry_from(pos, head, member) \
|
||||
for (; &pos->member != (head); \
|
||||
for (; !list_entry_is_head(pos, head, member); \
|
||||
pos = list_next_entry(pos, member))
|
||||
|
||||
/**
|
||||
@ -692,7 +701,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
* Iterate backwards over list of given type, continuing from current position.
|
||||
*/
|
||||
#define list_for_each_entry_from_reverse(pos, head, member) \
|
||||
for (; &pos->member != (head); \
|
||||
for (; !list_entry_is_head(pos, head, member); \
|
||||
pos = list_prev_entry(pos, member))
|
||||
|
||||
/**
|
||||
@ -705,7 +714,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
#define list_for_each_entry_safe(pos, n, head, member) \
|
||||
for (pos = list_first_entry(head, typeof(*pos), member), \
|
||||
n = list_next_entry(pos, member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = n, n = list_next_entry(n, member))
|
||||
|
||||
/**
|
||||
@ -721,7 +730,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
#define list_for_each_entry_safe_continue(pos, n, head, member) \
|
||||
for (pos = list_next_entry(pos, member), \
|
||||
n = list_next_entry(pos, member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = n, n = list_next_entry(n, member))
|
||||
|
||||
/**
|
||||
@ -736,7 +745,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
*/
|
||||
#define list_for_each_entry_safe_from(pos, n, head, member) \
|
||||
for (n = list_next_entry(pos, member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = n, n = list_next_entry(n, member))
|
||||
|
||||
/**
|
||||
@ -752,7 +761,7 @@ static inline void list_splice_tail_init(struct list_head *list,
|
||||
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
|
||||
for (pos = list_last_entry(head, typeof(*pos), member), \
|
||||
n = list_prev_entry(pos, member); \
|
||||
&pos->member != (head); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = n, n = list_prev_entry(n, member))
|
||||
|
||||
/**
|
||||
|
@ -57,6 +57,19 @@ enum {
|
||||
MMOP_ONLINE_MOVABLE,
|
||||
};
|
||||
|
||||
/* Flags for add_memory() and friends to specify memory hotplug details. */
|
||||
typedef int __bitwise mhp_t;
|
||||
|
||||
/* No special request */
|
||||
#define MHP_NONE ((__force mhp_t)0)
|
||||
/*
|
||||
* Allow merging of the added System RAM resource with adjacent,
|
||||
* mergeable resources. After a successful call to add_memory_resource()
|
||||
* with this flag set, the resource pointer must no longer be used as it
|
||||
* might be stale, or the resource might have changed.
|
||||
*/
|
||||
#define MEMHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
|
||||
|
||||
/*
|
||||
* Extended parameters for memory hotplug:
|
||||
* altmap: alternative allocator for memmap array (optional)
|
||||
@ -103,8 +116,8 @@ extern int online_pages(unsigned long pfn, unsigned long nr_pages,
|
||||
int online_type, int nid);
|
||||
extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
|
||||
unsigned long end_pfn);
|
||||
extern unsigned long __offline_isolated_pages(unsigned long start_pfn,
|
||||
unsigned long end_pfn);
|
||||
extern void __offline_isolated_pages(unsigned long start_pfn,
|
||||
unsigned long end_pfn);
|
||||
|
||||
typedef void (*online_page_callback_t)(struct page *page, unsigned int order);
|
||||
|
||||
@ -247,13 +260,6 @@ static inline void zone_span_writelock(struct zone *zone) {}
|
||||
static inline void zone_span_writeunlock(struct zone *zone) {}
|
||||
static inline void zone_seqlock_init(struct zone *zone) {}
|
||||
|
||||
static inline int mhp_notimplemented(const char *func)
|
||||
{
|
||||
printk(KERN_WARNING "%s() called, with CONFIG_MEMORY_HOTPLUG disabled\n", func);
|
||||
dump_stack();
|
||||
return -ENOSYS;
|
||||
}
|
||||
|
||||
static inline void register_page_bootmem_info_node(struct pglist_data *pgdat)
|
||||
{
|
||||
}
|
||||
@ -344,14 +350,18 @@ static inline void __remove_memory(int nid, u64 start, u64 size) {}
|
||||
extern void set_zone_contiguous(struct zone *zone);
|
||||
extern void clear_zone_contiguous(struct zone *zone);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
extern void __ref free_area_init_core_hotplug(int nid);
|
||||
extern int __add_memory(int nid, u64 start, u64 size);
|
||||
extern int add_memory(int nid, u64 start, u64 size);
|
||||
extern int add_memory_resource(int nid, struct resource *resource);
|
||||
extern int __add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
|
||||
extern int add_memory(int nid, u64 start, u64 size, mhp_t mhp_flags);
|
||||
extern int add_memory_resource(int nid, struct resource *resource,
|
||||
mhp_t mhp_flags);
|
||||
extern int add_memory_driver_managed(int nid, u64 start, u64 size,
|
||||
const char *resource_name);
|
||||
const char *resource_name,
|
||||
mhp_t mhp_flags);
|
||||
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
|
||||
unsigned long nr_pages, struct vmem_altmap *altmap);
|
||||
unsigned long nr_pages,
|
||||
struct vmem_altmap *altmap, int migratetype);
|
||||
extern void remove_pfn_range_from_zone(struct zone *zone,
|
||||
unsigned long start_pfn,
|
||||
unsigned long nr_pages);
|
||||
@ -363,8 +373,8 @@ extern void sparse_remove_section(struct mem_section *ms,
|
||||
unsigned long map_offset, struct vmem_altmap *altmap);
|
||||
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
|
||||
unsigned long pnum);
|
||||
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,
|
||||
int online_type);
|
||||
extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
|
||||
unsigned long nr_pages);
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
#endif /* __LINUX_MEMORY_HOTPLUG_H */
|
||||
|
153
include/linux/minmax.h
Normal file
153
include/linux/minmax.h
Normal file
@ -0,0 +1,153 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_MINMAX_H
|
||||
#define _LINUX_MINMAX_H
|
||||
|
||||
/*
|
||||
* min()/max()/clamp() macros must accomplish three things:
|
||||
*
|
||||
* - avoid multiple evaluations of the arguments (so side-effects like
|
||||
* "x++" happen only once) when non-constant.
|
||||
* - perform strict type-checking (to generate warnings instead of
|
||||
* nasty runtime surprises). See the "unnecessary" pointer comparison
|
||||
* in __typecheck().
|
||||
* - retain result as a constant expressions when called with only
|
||||
* constant expressions (to avoid tripping VLA warnings in stack
|
||||
* allocation usage).
|
||||
*/
|
||||
#define __typecheck(x, y) \
|
||||
(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
|
||||
|
||||
/*
|
||||
* This returns a constant expression while determining if an argument is
|
||||
* a constant expression, most importantly without evaluating the argument.
|
||||
* Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
|
||||
*/
|
||||
#define __is_constexpr(x) \
|
||||
(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
|
||||
|
||||
#define __no_side_effects(x, y) \
|
||||
(__is_constexpr(x) && __is_constexpr(y))
|
||||
|
||||
#define __safe_cmp(x, y) \
|
||||
(__typecheck(x, y) && __no_side_effects(x, y))
|
||||
|
||||
#define __cmp(x, y, op) ((x) op (y) ? (x) : (y))
|
||||
|
||||
#define __cmp_once(x, y, unique_x, unique_y, op) ({ \
|
||||
typeof(x) unique_x = (x); \
|
||||
typeof(y) unique_y = (y); \
|
||||
__cmp(unique_x, unique_y, op); })
|
||||
|
||||
#define __careful_cmp(x, y, op) \
|
||||
__builtin_choose_expr(__safe_cmp(x, y), \
|
||||
__cmp(x, y, op), \
|
||||
__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
|
||||
|
||||
/**
|
||||
* min - return minimum of two values of the same or compatible types
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define min(x, y) __careful_cmp(x, y, <)
|
||||
|
||||
/**
|
||||
* max - return maximum of two values of the same or compatible types
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define max(x, y) __careful_cmp(x, y, >)
|
||||
|
||||
/**
|
||||
* min3 - return minimum of three values
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
* @z: third value
|
||||
*/
|
||||
#define min3(x, y, z) min((typeof(x))min(x, y), z)
|
||||
|
||||
/**
|
||||
* max3 - return maximum of three values
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
* @z: third value
|
||||
*/
|
||||
#define max3(x, y, z) max((typeof(x))max(x, y), z)
|
||||
|
||||
/**
|
||||
* min_not_zero - return the minimum that is _not_ zero, unless both are zero
|
||||
* @x: value1
|
||||
* @y: value2
|
||||
*/
|
||||
#define min_not_zero(x, y) ({ \
|
||||
typeof(x) __x = (x); \
|
||||
typeof(y) __y = (y); \
|
||||
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
|
||||
|
||||
/**
|
||||
* clamp - return a value clamped to a given range with strict typechecking
|
||||
* @val: current value
|
||||
* @lo: lowest allowable value
|
||||
* @hi: highest allowable value
|
||||
*
|
||||
* This macro does strict typechecking of @lo/@hi to make sure they are of the
|
||||
* same type as @val. See the unnecessary pointer comparisons.
|
||||
*/
|
||||
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
|
||||
|
||||
/*
|
||||
* ..and if you can't take the strict
|
||||
* types, you can specify one yourself.
|
||||
*
|
||||
* Or not use min/max/clamp at all, of course.
|
||||
*/
|
||||
|
||||
/**
|
||||
* min_t - return minimum of two values, using the specified type
|
||||
* @type: data type to use
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <)
|
||||
|
||||
/**
|
||||
* max_t - return maximum of two values, using the specified type
|
||||
* @type: data type to use
|
||||
* @x: first value
|
||||
* @y: second value
|
||||
*/
|
||||
#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >)
|
||||
|
||||
/**
|
||||
* clamp_t - return a value clamped to a given range using a given type
|
||||
* @type: the type of variable to use
|
||||
* @val: current value
|
||||
* @lo: minimum allowable value
|
||||
* @hi: maximum allowable value
|
||||
*
|
||||
* This macro does no typechecking and uses temporary variables of type
|
||||
* @type to make all the comparisons.
|
||||
*/
|
||||
#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
|
||||
|
||||
/**
|
||||
* clamp_val - return a value clamped to a given range using val's type
|
||||
* @val: current value
|
||||
* @lo: minimum allowable value
|
||||
* @hi: maximum allowable value
|
||||
*
|
||||
* This macro does no typechecking and uses temporary variables of whatever
|
||||
* type the input argument @val is. This is useful when @val is an unsigned
|
||||
* type and @lo and @hi are literals that will otherwise be assigned a signed
|
||||
* integer type.
|
||||
*/
|
||||
#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
|
||||
|
||||
/**
|
||||
* swap - swap values of @a and @b
|
||||
* @a: first value
|
||||
* @b: second value
|
||||
*/
|
||||
#define swap(a, b) \
|
||||
do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
|
||||
|
||||
#endif /* _LINUX_MINMAX_H */
|
@ -2440,7 +2440,7 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,
|
||||
|
||||
extern void set_dma_reserve(unsigned long new_dma_reserve);
|
||||
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
|
||||
enum meminit_context, struct vmem_altmap *);
|
||||
enum meminit_context, struct vmem_altmap *, int migratetype);
|
||||
extern void setup_per_zone_wmarks(void);
|
||||
extern int __meminit init_per_zone_wmark_min(void);
|
||||
extern void mem_init(void);
|
||||
@ -3025,8 +3025,6 @@ extern int memory_failure(unsigned long pfn, int flags);
|
||||
extern void memory_failure_queue(unsigned long pfn, int flags);
|
||||
extern void memory_failure_queue_kick(int cpu);
|
||||
extern int unpoison_memory(unsigned long pfn);
|
||||
extern int get_hwpoison_page(struct page *page);
|
||||
#define put_hwpoison_page(page) put_page(page)
|
||||
extern int sysctl_memory_failure_early_kill;
|
||||
extern int sysctl_memory_failure_recovery;
|
||||
extern void shake_page(struct page *p, int access);
|
||||
@ -3066,6 +3064,7 @@ enum mf_action_page_type {
|
||||
MF_MSG_BUDDY,
|
||||
MF_MSG_BUDDY_2ND,
|
||||
MF_MSG_DAX,
|
||||
MF_MSG_UNSPLIT_THP,
|
||||
MF_MSG_UNKNOWN,
|
||||
};
|
||||
|
||||
|
@ -266,6 +266,8 @@ static inline bool is_active_lru(enum lru_list lru)
|
||||
return (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE);
|
||||
}
|
||||
|
||||
#define ANON_AND_FILE 2
|
||||
|
||||
enum lruvec_flags {
|
||||
LRUVEC_CONGESTED, /* lruvec has many dirty pages
|
||||
* backed by a congested BDI
|
||||
@ -283,8 +285,8 @@ struct lruvec {
|
||||
unsigned long file_cost;
|
||||
/* Non-resident age, driven by LRU movement */
|
||||
atomic_long_t nonresident_age;
|
||||
/* Refaults at the time of last reclaim cycle, anon=0, file=1 */
|
||||
unsigned long refaults[2];
|
||||
/* Refaults at the time of last reclaim cycle */
|
||||
unsigned long refaults[ANON_AND_FILE];
|
||||
/* Various lruvec state flags (enum lruvec_flags) */
|
||||
unsigned long flags;
|
||||
#ifdef CONFIG_MEMCG
|
||||
@ -441,6 +443,8 @@ enum zone_type {
|
||||
|
||||
#ifndef __GENERATING_BOUNDS_H
|
||||
|
||||
#define ASYNC_AND_SYNC 2
|
||||
|
||||
struct zone {
|
||||
/* Read-mostly fields */
|
||||
|
||||
@ -560,8 +564,8 @@ struct zone {
|
||||
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
|
||||
/* pfn where compaction free scanner should start */
|
||||
unsigned long compact_cached_free_pfn;
|
||||
/* pfn where async and sync compaction migration scanner should start */
|
||||
unsigned long compact_cached_migrate_pfn[2];
|
||||
/* pfn where compaction migration scanner should start */
|
||||
unsigned long compact_cached_migrate_pfn[ASYNC_AND_SYNC];
|
||||
unsigned long compact_init_migrate_pfn;
|
||||
unsigned long compact_init_free_pfn;
|
||||
#endif
|
||||
@ -1416,7 +1420,6 @@ static inline unsigned long next_present_section_nr(unsigned long section_nr)
|
||||
#define pfn_to_nid(pfn) (0)
|
||||
#endif
|
||||
|
||||
#define early_pfn_valid(pfn) pfn_valid(pfn)
|
||||
void sparse_init(void);
|
||||
#else
|
||||
#define sparse_init() do {} while (0)
|
||||
@ -1436,10 +1439,6 @@ struct mminit_pfnnid_cache {
|
||||
int last_nid;
|
||||
};
|
||||
|
||||
#ifndef early_pfn_valid
|
||||
#define early_pfn_valid(pfn) (1)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
|
||||
* need to check pfn validity within that MAX_ORDER_NR_PAGES block.
|
||||
|
@ -99,15 +99,14 @@ extern struct node *node_devices[];
|
||||
typedef void (*node_registration_func_t)(struct node *);
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA)
|
||||
int link_mem_sections(int nid, unsigned long start_pfn,
|
||||
unsigned long end_pfn,
|
||||
enum meminit_context context);
|
||||
void link_mem_sections(int nid, unsigned long start_pfn,
|
||||
unsigned long end_pfn,
|
||||
enum meminit_context context);
|
||||
#else
|
||||
static inline int link_mem_sections(int nid, unsigned long start_pfn,
|
||||
unsigned long end_pfn,
|
||||
enum meminit_context context)
|
||||
static inline void link_mem_sections(int nid, unsigned long start_pfn,
|
||||
unsigned long end_pfn,
|
||||
enum meminit_context context)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -130,8 +129,7 @@ static inline int register_one_node(int nid)
|
||||
if (error)
|
||||
return error;
|
||||
/* link memory sections under this node */
|
||||
error = link_mem_sections(nid, start_pfn, end_pfn,
|
||||
MEMINIT_EARLY);
|
||||
link_mem_sections(nid, start_pfn, end_pfn, MEMINIT_EARLY);
|
||||
}
|
||||
|
||||
return error;
|
||||
|
@ -90,9 +90,9 @@
|
||||
* for such situations. See below and CPUMASK_ALLOC also.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/threads.h>
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/numa.h>
|
||||
|
||||
typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
|
||||
|
@ -431,13 +431,9 @@ PAGEFLAG_FALSE(Uncached)
|
||||
PAGEFLAG(HWPoison, hwpoison, PF_ANY)
|
||||
TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
|
||||
#define __PG_HWPOISON (1UL << PG_hwpoison)
|
||||
extern bool set_hwpoison_free_buddy_page(struct page *page);
|
||||
extern bool take_page_off_buddy(struct page *page);
|
||||
#else
|
||||
PAGEFLAG_FALSE(HWPoison)
|
||||
static inline bool set_hwpoison_free_buddy_page(struct page *page)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#define __PG_HWPOISON 0
|
||||
#endif
|
||||
|
||||
|
@ -11,7 +11,7 @@ extern struct page_ext_operations page_owner_ops;
|
||||
extern void __reset_page_owner(struct page *page, unsigned int order);
|
||||
extern void __set_page_owner(struct page *page,
|
||||
unsigned int order, gfp_t gfp_mask);
|
||||
extern void __split_page_owner(struct page *page, unsigned int order);
|
||||
extern void __split_page_owner(struct page *page, unsigned int nr);
|
||||
extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
|
||||
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
|
||||
extern void __dump_page_owner(struct page *page);
|
||||
@ -31,10 +31,10 @@ static inline void set_page_owner(struct page *page,
|
||||
__set_page_owner(page, order, gfp_mask);
|
||||
}
|
||||
|
||||
static inline void split_page_owner(struct page *page, unsigned int order)
|
||||
static inline void split_page_owner(struct page *page, unsigned int nr)
|
||||
{
|
||||
if (static_branch_unlikely(&page_owner_inited))
|
||||
__split_page_owner(page, order);
|
||||
__split_page_owner(page, nr);
|
||||
}
|
||||
static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
|
||||
{
|
||||
|
@ -29,6 +29,7 @@ enum mapping_flags {
|
||||
AS_EXITING = 4, /* final truncate in progress */
|
||||
/* writeback related tags are not used */
|
||||
AS_NO_WRITEBACK_TAGS = 5,
|
||||
AS_THP_SUPPORT = 6, /* THPs supported */
|
||||
};
|
||||
|
||||
/**
|
||||
@ -120,6 +121,40 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
|
||||
m->gfp_mask = mask;
|
||||
}
|
||||
|
||||
static inline bool mapping_thp_support(struct address_space *mapping)
|
||||
{
|
||||
return test_bit(AS_THP_SUPPORT, &mapping->flags);
|
||||
}
|
||||
|
||||
static inline int filemap_nr_thps(struct address_space *mapping)
|
||||
{
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
return atomic_read(&mapping->nr_thps);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void filemap_nr_thps_inc(struct address_space *mapping)
|
||||
{
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
if (!mapping_thp_support(mapping))
|
||||
atomic_inc(&mapping->nr_thps);
|
||||
#else
|
||||
WARN_ON_ONCE(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void filemap_nr_thps_dec(struct address_space *mapping)
|
||||
{
|
||||
#ifdef CONFIG_READ_ONLY_THP_FOR_FS
|
||||
if (!mapping_thp_support(mapping))
|
||||
atomic_dec(&mapping->nr_thps);
|
||||
#else
|
||||
WARN_ON_ONCE(1);
|
||||
#endif
|
||||
}
|
||||
|
||||
void release_pages(struct page **pages, int nr);
|
||||
|
||||
/*
|
||||
@ -726,17 +761,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
|
||||
void delete_from_page_cache_batch(struct address_space *mapping,
|
||||
struct pagevec *pvec);
|
||||
|
||||
#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
|
||||
|
||||
void page_cache_sync_readahead(struct address_space *, struct file_ra_state *,
|
||||
struct file *, pgoff_t index, unsigned long req_count);
|
||||
void page_cache_async_readahead(struct address_space *, struct file_ra_state *,
|
||||
struct file *, struct page *, pgoff_t index,
|
||||
unsigned long req_count);
|
||||
void page_cache_readahead_unbounded(struct address_space *, struct file *,
|
||||
pgoff_t index, unsigned long nr_to_read,
|
||||
unsigned long lookahead_count);
|
||||
|
||||
/*
|
||||
* Like add_to_page_cache_locked, but used to add newly allocated pages:
|
||||
* the page is new, so we can just run __SetPageLocked() against it.
|
||||
@ -777,6 +801,67 @@ struct readahead_control {
|
||||
unsigned int _batch_count;
|
||||
};
|
||||
|
||||
#define DEFINE_READAHEAD(rac, f, m, i) \
|
||||
struct readahead_control rac = { \
|
||||
.file = f, \
|
||||
.mapping = m, \
|
||||
._index = i, \
|
||||
}
|
||||
|
||||
#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
|
||||
|
||||
void page_cache_ra_unbounded(struct readahead_control *,
|
||||
unsigned long nr_to_read, unsigned long lookahead_count);
|
||||
void page_cache_sync_ra(struct readahead_control *, struct file_ra_state *,
|
||||
unsigned long req_count);
|
||||
void page_cache_async_ra(struct readahead_control *, struct file_ra_state *,
|
||||
struct page *, unsigned long req_count);
|
||||
|
||||
/**
|
||||
* page_cache_sync_readahead - generic file readahead
|
||||
* @mapping: address_space which holds the pagecache and I/O vectors
|
||||
* @ra: file_ra_state which holds the readahead state
|
||||
* @file: Used by the filesystem for authentication.
|
||||
* @index: Index of first page to be read.
|
||||
* @req_count: Total number of pages being read by the caller.
|
||||
*
|
||||
* page_cache_sync_readahead() should be called when a cache miss happened:
|
||||
* it will submit the read. The readahead logic may decide to piggyback more
|
||||
* pages onto the read request if access patterns suggest it will improve
|
||||
* performance.
|
||||
*/
|
||||
static inline
|
||||
void page_cache_sync_readahead(struct address_space *mapping,
|
||||
struct file_ra_state *ra, struct file *file, pgoff_t index,
|
||||
unsigned long req_count)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, file, mapping, index);
|
||||
page_cache_sync_ra(&ractl, ra, req_count);
|
||||
}
|
||||
|
||||
/**
|
||||
* page_cache_async_readahead - file readahead for marked pages
|
||||
* @mapping: address_space which holds the pagecache and I/O vectors
|
||||
* @ra: file_ra_state which holds the readahead state
|
||||
* @file: Used by the filesystem for authentication.
|
||||
* @page: The page at @index which triggered the readahead call.
|
||||
* @index: Index of first page to be read.
|
||||
* @req_count: Total number of pages being read by the caller.
|
||||
*
|
||||
* page_cache_async_readahead() should be called when a page is used which
|
||||
* is marked as PageReadahead; this is a marker to suggest that the application
|
||||
* has used up enough of the readahead window that we should start pulling in
|
||||
* more pages.
|
||||
*/
|
||||
static inline
|
||||
void page_cache_async_readahead(struct address_space *mapping,
|
||||
struct file_ra_state *ra, struct file *file,
|
||||
struct page *page, pgoff_t index, unsigned long req_count)
|
||||
{
|
||||
DEFINE_READAHEAD(ractl, file, mapping, index);
|
||||
page_cache_async_ra(&ractl, ra, page, req_count);
|
||||
}
|
||||
|
||||
/**
|
||||
* readahead_page - Get the next page to read.
|
||||
* @rac: The current readahead request.
|
||||
|
@ -1013,7 +1013,7 @@ struct task_struct {
|
||||
struct held_lock held_locks[MAX_LOCK_DEPTH];
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_UBSAN
|
||||
#if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP)
|
||||
unsigned int in_ubsan;
|
||||
#endif
|
||||
|
||||
|
@ -49,31 +49,6 @@ static inline void mmdrop(struct mm_struct *mm)
|
||||
__mmdrop(mm);
|
||||
}
|
||||
|
||||
/*
|
||||
* This has to be called after a get_task_mm()/mmget_not_zero()
|
||||
* followed by taking the mmap_lock for writing before modifying the
|
||||
* vmas or anything the coredump pretends not to change from under it.
|
||||
*
|
||||
* It also has to be called when mmgrab() is used in the context of
|
||||
* the process, but then the mm_count refcount is transferred outside
|
||||
* the context of the process to run down_write() on that pinned mm.
|
||||
*
|
||||
* NOTE: find_extend_vma() called from GUP context is the only place
|
||||
* that can modify the "mm" (notably the vm_start/end) under mmap_lock
|
||||
* for reading and outside the context of the process, so it is also
|
||||
* the only case that holds the mmap_lock for reading that must call
|
||||
* this function. Generally if the mmap_lock is hold for reading
|
||||
* there's no need of this check after get_task_mm()/mmget_not_zero().
|
||||
*
|
||||
* This function can be obsoleted and the check can be removed, after
|
||||
* the coredump code will hold the mmap_lock for writing before
|
||||
* invoking the ->core_dump methods.
|
||||
*/
|
||||
static inline bool mmget_still_valid(struct mm_struct *mm)
|
||||
{
|
||||
return likely(!mm->core_state);
|
||||
}
|
||||
|
||||
/**
|
||||
* mmget() - Pin the address space associated with a &struct mm_struct.
|
||||
* @mm: The address space to pin.
|
||||
|
@ -2,7 +2,9 @@
|
||||
#ifndef __LINUX_UACCESS_H__
|
||||
#define __LINUX_UACCESS_H__
|
||||
|
||||
#include <linux/fault-inject-usercopy.h>
|
||||
#include <linux/instrumented.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/thread_info.h>
|
||||
|
||||
@ -83,6 +85,8 @@ static __always_inline __must_check unsigned long
|
||||
__copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
might_fault();
|
||||
if (should_fail_usercopy())
|
||||
return n;
|
||||
instrument_copy_from_user(to, from, n);
|
||||
check_object_size(to, n, false);
|
||||
return raw_copy_from_user(to, from, n);
|
||||
@ -104,6 +108,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
static __always_inline __must_check unsigned long
|
||||
__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
if (should_fail_usercopy())
|
||||
return n;
|
||||
instrument_copy_to_user(to, from, n);
|
||||
check_object_size(from, n, true);
|
||||
return raw_copy_to_user(to, from, n);
|
||||
@ -113,6 +119,8 @@ static __always_inline __must_check unsigned long
|
||||
__copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
might_fault();
|
||||
if (should_fail_usercopy())
|
||||
return n;
|
||||
instrument_copy_to_user(to, from, n);
|
||||
check_object_size(from, n, true);
|
||||
return raw_copy_to_user(to, from, n);
|
||||
@ -124,7 +132,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
|
||||
{
|
||||
unsigned long res = n;
|
||||
might_fault();
|
||||
if (likely(access_ok(from, n))) {
|
||||
if (!should_fail_usercopy() && likely(access_ok(from, n))) {
|
||||
instrument_copy_from_user(to, from, n);
|
||||
res = raw_copy_from_user(to, from, n);
|
||||
}
|
||||
@ -142,6 +150,8 @@ static inline __must_check unsigned long
|
||||
_copy_to_user(void __user *to, const void *from, unsigned long n)
|
||||
{
|
||||
might_fault();
|
||||
if (should_fail_usercopy())
|
||||
return n;
|
||||
if (access_ok(to, n)) {
|
||||
instrument_copy_to_user(to, from, n);
|
||||
n = raw_copy_to_user(to, from, n);
|
||||
|
@ -28,7 +28,7 @@ struct reclaim_stat {
|
||||
unsigned nr_writeback;
|
||||
unsigned nr_immediate;
|
||||
unsigned nr_pageout;
|
||||
unsigned nr_activate[2];
|
||||
unsigned nr_activate[ANON_AND_FILE];
|
||||
unsigned nr_ref_keep;
|
||||
unsigned nr_unmap_fail;
|
||||
unsigned nr_lazyfree_fail;
|
||||
|
@ -1505,6 +1505,28 @@ void xas_pause(struct xa_state *);
|
||||
|
||||
void xas_create_range(struct xa_state *);
|
||||
|
||||
#ifdef CONFIG_XARRAY_MULTI
|
||||
int xa_get_order(struct xarray *, unsigned long index);
|
||||
void xas_split(struct xa_state *, void *entry, unsigned int order);
|
||||
void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
|
||||
#else
|
||||
static inline int xa_get_order(struct xarray *xa, unsigned long index)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void xas_split(struct xa_state *xas, void *entry,
|
||||
unsigned int order)
|
||||
{
|
||||
xas_store(xas, entry);
|
||||
}
|
||||
|
||||
static inline void xas_split_alloc(struct xa_state *xas, void *entry,
|
||||
unsigned int order, gfp_t gfp)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* xas_reload() - Refetch an entry from the xarray.
|
||||
* @xas: XArray operation state.
|
||||
|
@ -361,6 +361,7 @@ TRACE_EVENT(aer_event,
|
||||
EM ( MF_MSG_POISONED_HUGE, "huge page already hardware poisoned" ) \
|
||||
EM ( MF_MSG_HUGE, "huge page" ) \
|
||||
EM ( MF_MSG_FREE_HUGE, "free huge page" ) \
|
||||
EM ( MF_MSG_NON_PMD_HUGE, "non-pmd-sized huge page" ) \
|
||||
EM ( MF_MSG_UNMAP_FAILED, "unmapping failed page" ) \
|
||||
EM ( MF_MSG_DIRTY_SWAPCACHE, "dirty swapcache page" ) \
|
||||
EM ( MF_MSG_CLEAN_SWAPCACHE, "clean swapcache page" ) \
|
||||
@ -373,6 +374,8 @@ TRACE_EVENT(aer_event,
|
||||
EM ( MF_MSG_TRUNCATED_LRU, "already truncated LRU page" ) \
|
||||
EM ( MF_MSG_BUDDY, "free buddy page" ) \
|
||||
EM ( MF_MSG_BUDDY_2ND, "free buddy page (2nd try)" ) \
|
||||
EM ( MF_MSG_DAX, "dax page" ) \
|
||||
EM ( MF_MSG_UNSPLIT_THP, "unsplit thp" ) \
|
||||
EMe ( MF_MSG_UNKNOWN, "unknown page" )
|
||||
|
||||
/*
|
||||
|
@ -25,7 +25,7 @@
|
||||
* Now we silently close acct_file on attempt to reopen. Cleaned sys_acct().
|
||||
* XTerms and EMACS are manifestations of pure evil. 21/10/98, AV.
|
||||
*
|
||||
* Fixed a nasty interaction with with sys_umount(). If the accointing
|
||||
* Fixed a nasty interaction with sys_umount(). If the accounting
|
||||
* was suspeneded we failed to stop it on umount(). Messy.
|
||||
* Another one: remount to readonly didn't stop accounting.
|
||||
* Question: what should we do if we have CAP_SYS_ADMIN but not
|
||||
@ -263,12 +263,12 @@ static DEFINE_MUTEX(acct_on_mutex);
|
||||
* sys_acct - enable/disable process accounting
|
||||
* @name: file name for accounting records or NULL to shutdown accounting
|
||||
*
|
||||
* Returns 0 for success or negative errno values for failure.
|
||||
*
|
||||
* sys_acct() is the only system call needed to implement process
|
||||
* accounting. It takes the name of the file where accounting records
|
||||
* should be written. If the filename is NULL, accounting will be
|
||||
* shutdown.
|
||||
*
|
||||
* Returns: 0 for success or negative errno values for failure.
|
||||
*/
|
||||
SYSCALL_DEFINE1(acct, const char __user *, name)
|
||||
{
|
||||
@ -586,9 +586,7 @@ static void slow_acct_process(struct pid_namespace *ns)
|
||||
}
|
||||
|
||||
/**
|
||||
* acct_process
|
||||
*
|
||||
* handles process accounting for an exiting task
|
||||
* acct_process - handles process accounting for an exiting task
|
||||
*/
|
||||
void acct_process(void)
|
||||
{
|
||||
|
@ -390,7 +390,7 @@ static void guarantee_online_cpus(struct cpuset *cs, struct cpumask *pmask)
|
||||
* The top cpuset doesn't have any online cpu as a
|
||||
* consequence of a race between cpuset_hotplug_work
|
||||
* and cpu hotplug notifier. But we know the top
|
||||
* cpuset's effective_cpus is on its way to to be
|
||||
* cpuset's effective_cpus is on its way to be
|
||||
* identical to cpu_online_mask.
|
||||
*/
|
||||
cpumask_copy(pmask, cpu_online_mask);
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "direct.h"
|
||||
|
||||
/*
|
||||
* Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
|
||||
* Most architectures use ZONE_DMA for the first 16 Megabytes, but some use
|
||||
* it for entirely different regions. In that case the arch code needs to
|
||||
* override the variable below for dma-direct to work properly.
|
||||
*/
|
||||
|
@ -556,7 +556,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
|
||||
|
||||
get_file(file);
|
||||
if (tmp->vm_flags & VM_DENYWRITE)
|
||||
atomic_dec(&inode->i_writecount);
|
||||
put_write_access(inode);
|
||||
i_mmap_lock_write(mapping);
|
||||
if (tmp->vm_flags & VM_SHARED)
|
||||
mapping_allow_writable(mapping);
|
||||
@ -2189,7 +2189,7 @@ static __latent_entropy struct task_struct *copy_process(
|
||||
|
||||
/*
|
||||
* Ensure that the cgroup subsystem policies allow the new process to be
|
||||
* forked. It should be noted the the new process's css_set can be changed
|
||||
* forked. It should be noted that the new process's css_set can be changed
|
||||
* between here and cgroup_post_fork() if an organisation operation is in
|
||||
* progress.
|
||||
*/
|
||||
|
@ -916,7 +916,7 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
|
||||
* [10] Found | Found | task | !=taskTID | 0/1 | Invalid
|
||||
*
|
||||
* [1] Indicates that the kernel can acquire the futex atomically. We
|
||||
* came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
|
||||
* came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit.
|
||||
*
|
||||
* [2] Valid, if TID does not belong to a kernel thread. If no matching
|
||||
* thread is found then it indicates that the owner TID has died.
|
||||
|
@ -604,7 +604,7 @@ int irq_timings_alloc(int irq)
|
||||
|
||||
/*
|
||||
* Some platforms can have the same private interrupt per cpu,
|
||||
* so this function may be be called several times with the
|
||||
* so this function may be called several times with the
|
||||
* same interrupt number. Just bail out in case the per cpu
|
||||
* stat structure is already allocated.
|
||||
*/
|
||||
|
@ -19,7 +19,7 @@
|
||||
#include <linux/cpu.h>
|
||||
#include <asm/sections.h>
|
||||
|
||||
/* mutex to protect coming/going of the the jump_label table */
|
||||
/* mutex to protect coming/going of the jump_label table */
|
||||
static DEFINE_MUTEX(jump_label_mutex);
|
||||
|
||||
void jump_label_lock(void)
|
||||
|
@ -32,7 +32,7 @@
|
||||
* 1. different addresses but with the same encoded address race;
|
||||
* 2. and both map onto the same watchpoint slots;
|
||||
*
|
||||
* Both these are assumed to be very unlikely. However, in case it still happens
|
||||
* Both these are assumed to be very unlikely. However, in case it still
|
||||
* happens, the report logic will filter out the false positive (see report.c).
|
||||
*/
|
||||
#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS)
|
||||
|
@ -109,7 +109,7 @@ EXPORT_SYMBOL_GPL(kexec_crash_loaded);
|
||||
* defined more restrictively in <asm/kexec.h>.
|
||||
*
|
||||
* The code for the transition from the current kernel to the
|
||||
* the new kernel is placed in the control_code_buffer, whose size
|
||||
* new kernel is placed in the control_code_buffer, whose size
|
||||
* is given by KEXEC_CONTROL_PAGE_SIZE. In the best case only a single
|
||||
* page of memory is necessary, but some architectures require more.
|
||||
* Because this memory must be identity mapped in the transition from
|
||||
|
@ -521,7 +521,7 @@ static int locate_mem_hole_callback(struct resource *res, void *arg)
|
||||
/* Returning 0 will take to next memory range */
|
||||
|
||||
/* Don't use memory that will be detected and handled by a driver. */
|
||||
if (res->flags & IORESOURCE_MEM_DRIVER_MANAGED)
|
||||
if (res->flags & IORESOURCE_SYSRAM_DRIVER_MANAGED)
|
||||
return 0;
|
||||
|
||||
if (sz < kbuf->memsz)
|
||||
|
@ -775,7 +775,7 @@ EXPORT_SYMBOL(kthread_create_worker);
|
||||
|
||||
/**
|
||||
* kthread_create_worker_on_cpu - create a kthread worker and bind it
|
||||
* it to a given CPU and the associated NUMA node.
|
||||
* to a given CPU and the associated NUMA node.
|
||||
* @cpu: CPU number
|
||||
* @flags: flags modifying the default behavior of the worker
|
||||
* @namefmt: printf-style name for the kthread worker (task).
|
||||
|
@ -55,7 +55,7 @@ EXPORT_SYMBOL_GPL(klp_get_state);
|
||||
*
|
||||
* The function can be called only during transition when a new
|
||||
* livepatch is being enabled or when such a transition is reverted.
|
||||
* It is typically called only from from pre/post (un)patch
|
||||
* It is typically called only from pre/post (un)patch
|
||||
* callbacks.
|
||||
*
|
||||
* Return: pointer to the latest struct klp_state from already
|
||||
|
@ -589,6 +589,11 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
|
||||
if (args)
|
||||
vprintk(args->fmt, args->args);
|
||||
|
||||
print_modules();
|
||||
|
||||
if (regs)
|
||||
show_regs(regs);
|
||||
|
||||
if (panic_on_warn) {
|
||||
/*
|
||||
* This thread may hit another WARN() in the panic path.
|
||||
@ -600,12 +605,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
|
||||
panic("panic_on_warn set ...\n");
|
||||
}
|
||||
|
||||
print_modules();
|
||||
|
||||
if (regs)
|
||||
show_regs(regs);
|
||||
else
|
||||
dump_stack();
|
||||
dump_stack();
|
||||
|
||||
print_irqtrace_events(current);
|
||||
|
||||
|
@ -233,7 +233,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
|
||||
* to pid_ns->child_reaper. Thus pidns->child_reaper needs to
|
||||
* stay valid until they all go away.
|
||||
*
|
||||
* The code relies on the the pid_ns->child_reaper ignoring
|
||||
* The code relies on the pid_ns->child_reaper ignoring
|
||||
* SIGCHILD to cause those EXIT_ZOMBIE processes to be
|
||||
* autoreaped if reparented.
|
||||
*
|
||||
|
@ -735,7 +735,7 @@ zone_found:
|
||||
*/
|
||||
|
||||
/*
|
||||
* If the zone we wish to scan is the the current zone and the
|
||||
* If the zone we wish to scan is the current zone and the
|
||||
* pfn falls into the current node then we do not need to walk
|
||||
* the tree.
|
||||
*/
|
||||
|
@ -2,8 +2,9 @@
|
||||
/*
|
||||
* Range add and subtract
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/range.h>
|
||||
|
@ -1002,7 +1002,7 @@ static int relay_file_read_avail(struct rchan_buf *buf)
|
||||
size_t subbuf_size = buf->chan->subbuf_size;
|
||||
size_t n_subbufs = buf->chan->n_subbufs;
|
||||
size_t produced = buf->subbufs_produced;
|
||||
size_t consumed = buf->subbufs_consumed;
|
||||
size_t consumed;
|
||||
|
||||
relay_file_read_consume(buf, 0, 0);
|
||||
|
||||
|
@ -1240,7 +1240,6 @@ EXPORT_SYMBOL(__release_region);
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
/**
|
||||
* release_mem_region_adjustable - release a previously reserved memory region
|
||||
* @parent: parent resource descriptor
|
||||
* @start: resource start address
|
||||
* @size: resource region size
|
||||
*
|
||||
@ -1258,21 +1257,28 @@ EXPORT_SYMBOL(__release_region);
|
||||
* assumes that all children remain in the lower address entry for
|
||||
* simplicity. Enhance this logic when necessary.
|
||||
*/
|
||||
int release_mem_region_adjustable(struct resource *parent,
|
||||
resource_size_t start, resource_size_t size)
|
||||
void release_mem_region_adjustable(resource_size_t start, resource_size_t size)
|
||||
{
|
||||
struct resource *parent = &iomem_resource;
|
||||
struct resource *new_res = NULL;
|
||||
bool alloc_nofail = false;
|
||||
struct resource **p;
|
||||
struct resource *res;
|
||||
struct resource *new_res;
|
||||
resource_size_t end;
|
||||
int ret = -EINVAL;
|
||||
|
||||
end = start + size - 1;
|
||||
if ((start < parent->start) || (end > parent->end))
|
||||
return ret;
|
||||
if (WARN_ON_ONCE((start < parent->start) || (end > parent->end)))
|
||||
return;
|
||||
|
||||
/* The alloc_resource() result gets checked later */
|
||||
new_res = alloc_resource(GFP_KERNEL);
|
||||
/*
|
||||
* We free up quite a lot of memory on memory hotunplug (esp., memap),
|
||||
* just before releasing the region. This is highly unlikely to
|
||||
* fail - let's play save and make it never fail as the caller cannot
|
||||
* perform any error handling (e.g., trying to re-add memory will fail
|
||||
* similarly).
|
||||
*/
|
||||
retry:
|
||||
new_res = alloc_resource(GFP_KERNEL | (alloc_nofail ? __GFP_NOFAIL : 0));
|
||||
|
||||
p = &parent->child;
|
||||
write_lock(&resource_lock);
|
||||
@ -1298,7 +1304,6 @@ int release_mem_region_adjustable(struct resource *parent,
|
||||
* so if we are dealing with them, let us just back off here.
|
||||
*/
|
||||
if (!(res->flags & IORESOURCE_SYSRAM)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1315,20 +1320,23 @@ int release_mem_region_adjustable(struct resource *parent,
|
||||
/* free the whole entry */
|
||||
*p = res->sibling;
|
||||
free_resource(res);
|
||||
ret = 0;
|
||||
} else if (res->start == start && res->end != end) {
|
||||
/* adjust the start */
|
||||
ret = __adjust_resource(res, end + 1,
|
||||
res->end - end);
|
||||
WARN_ON_ONCE(__adjust_resource(res, end + 1,
|
||||
res->end - end));
|
||||
} else if (res->start != start && res->end == end) {
|
||||
/* adjust the end */
|
||||
ret = __adjust_resource(res, res->start,
|
||||
start - res->start);
|
||||
WARN_ON_ONCE(__adjust_resource(res, res->start,
|
||||
start - res->start));
|
||||
} else {
|
||||
/* split into two entries */
|
||||
/* split into two entries - we need a new resource */
|
||||
if (!new_res) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
new_res = alloc_resource(GFP_ATOMIC);
|
||||
if (!new_res) {
|
||||
alloc_nofail = true;
|
||||
write_unlock(&resource_lock);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
new_res->name = res->name;
|
||||
new_res->start = end + 1;
|
||||
@ -1339,9 +1347,8 @@ int release_mem_region_adjustable(struct resource *parent,
|
||||
new_res->sibling = res->sibling;
|
||||
new_res->child = NULL;
|
||||
|
||||
ret = __adjust_resource(res, res->start,
|
||||
start - res->start);
|
||||
if (ret)
|
||||
if (WARN_ON_ONCE(__adjust_resource(res, res->start,
|
||||
start - res->start)))
|
||||
break;
|
||||
res->sibling = new_res;
|
||||
new_res = NULL;
|
||||
@ -1352,10 +1359,69 @@ int release_mem_region_adjustable(struct resource *parent,
|
||||
|
||||
write_unlock(&resource_lock);
|
||||
free_resource(new_res);
|
||||
return ret;
|
||||
}
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static bool system_ram_resources_mergeable(struct resource *r1,
|
||||
struct resource *r2)
|
||||
{
|
||||
/* We assume either r1 or r2 is IORESOURCE_SYSRAM_MERGEABLE. */
|
||||
return r1->flags == r2->flags && r1->end + 1 == r2->start &&
|
||||
r1->name == r2->name && r1->desc == r2->desc &&
|
||||
!r1->child && !r2->child;
|
||||
}
|
||||
|
||||
/*
|
||||
* merge_system_ram_resource - mark the System RAM resource mergeable and try to
|
||||
* merge it with adjacent, mergeable resources
|
||||
* @res: resource descriptor
|
||||
*
|
||||
* This interface is intended for memory hotplug, whereby lots of contiguous
|
||||
* system ram resources are added (e.g., via add_memory*()) by a driver, and
|
||||
* the actual resource boundaries are not of interest (e.g., it might be
|
||||
* relevant for DIMMs). Only resources that are marked mergeable, that have the
|
||||
* same parent, and that don't have any children are considered. All mergeable
|
||||
* resources must be immutable during the request.
|
||||
*
|
||||
* Note:
|
||||
* - The caller has to make sure that no pointers to resources that are
|
||||
* marked mergeable are used anymore after this call - the resource might
|
||||
* be freed and the pointer might be stale!
|
||||
* - release_mem_region_adjustable() will split on demand on memory hotunplug
|
||||
*/
|
||||
void merge_system_ram_resource(struct resource *res)
|
||||
{
|
||||
const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
|
||||
struct resource *cur;
|
||||
|
||||
if (WARN_ON_ONCE((res->flags & flags) != flags))
|
||||
return;
|
||||
|
||||
write_lock(&resource_lock);
|
||||
res->flags |= IORESOURCE_SYSRAM_MERGEABLE;
|
||||
|
||||
/* Try to merge with next item in the list. */
|
||||
cur = res->sibling;
|
||||
if (cur && system_ram_resources_mergeable(res, cur)) {
|
||||
res->end = cur->end;
|
||||
res->sibling = cur->sibling;
|
||||
free_resource(cur);
|
||||
}
|
||||
|
||||
/* Try to merge with previous item in the list. */
|
||||
cur = res->parent->child;
|
||||
while (cur && cur->sibling != res)
|
||||
cur = cur->sibling;
|
||||
if (cur && system_ram_resources_mergeable(cur, res)) {
|
||||
cur->end = res->end;
|
||||
cur->sibling = res->sibling;
|
||||
free_resource(res);
|
||||
}
|
||||
write_unlock(&resource_lock);
|
||||
}
|
||||
#endif /* CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
/*
|
||||
* Managed region resource
|
||||
*/
|
||||
|
@ -741,7 +741,7 @@ EXPORT_SYMBOL(on_each_cpu_mask);
|
||||
* for all the required CPUs to finish. This may include the local
|
||||
* processor.
|
||||
* @cond_func: A callback function that is passed a cpu id and
|
||||
* the the info parameter. The function is called
|
||||
* the info parameter. The function is called
|
||||
* with preemption disabled. The function should
|
||||
* return a blooean value indicating whether to IPI
|
||||
* the specified CPU.
|
||||
|
@ -2034,7 +2034,7 @@ static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data
|
||||
* VMAs already unmapped and kernel uses these members for statistics
|
||||
* output in procfs mostly, except
|
||||
*
|
||||
* - @start_brk/@brk which are used in do_brk but kernel lookups
|
||||
* - @start_brk/@brk which are used in do_brk_flags but kernel lookups
|
||||
* for VMAs when updating these memvers so anything wrong written
|
||||
* here cause kernel to swear at userspace program but won't lead
|
||||
* to any problem in kernel itself
|
||||
|
@ -515,7 +515,7 @@ EXPORT_SYMBOL(from_kgid_munged);
|
||||
*
|
||||
* When there is no mapping defined for the user-namespace projid
|
||||
* pair INVALID_PROJID is returned. Callers are expected to test
|
||||
* for and handle handle INVALID_PROJID being returned. INVALID_PROJID
|
||||
* for and handle INVALID_PROJID being returned. INVALID_PROJID
|
||||
* may be tested for using projid_valid().
|
||||
*/
|
||||
kprojid_t make_kprojid(struct user_namespace *ns, projid_t projid)
|
||||
|
@ -1768,6 +1768,13 @@ config FAIL_PAGE_ALLOC
|
||||
help
|
||||
Provide fault-injection capability for alloc_pages().
|
||||
|
||||
config FAULT_INJECTION_USERCOPY
|
||||
bool "Fault injection capability for usercopy functions"
|
||||
depends on FAULT_INJECTION
|
||||
help
|
||||
Provides fault-injection capability to inject failures
|
||||
in usercopy functions (copy_from_user(), get_user(), ...).
|
||||
|
||||
config FAIL_MAKE_REQUEST
|
||||
bool "Fault-injection capability for disk IO"
|
||||
depends on FAULT_INJECTION && BLOCK
|
||||
|
@ -47,6 +47,20 @@ config UBSAN_BOUNDS
|
||||
to the {str,mem}*cpy() family of functions (that is addressed
|
||||
by CONFIG_FORTIFY_SOURCE).
|
||||
|
||||
config UBSAN_LOCAL_BOUNDS
|
||||
bool "Perform array local bounds checking"
|
||||
depends on UBSAN_TRAP
|
||||
depends on CC_IS_CLANG
|
||||
depends on !UBSAN_KCOV_BROKEN
|
||||
help
|
||||
This option enables -fsanitize=local-bounds which traps when an
|
||||
exception/error is detected. Therefore, it should be enabled only
|
||||
if trapping is expected.
|
||||
Enabling this option detects errors due to accesses through a
|
||||
pointer that is derived from an object of a statically-known size,
|
||||
where an added offset (which may not be known statically) is
|
||||
out-of-bounds.
|
||||
|
||||
config UBSAN_MISC
|
||||
bool "Enable all other Undefined Behavior sanity checks"
|
||||
default UBSAN
|
||||
|
@ -210,6 +210,7 @@ obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
|
||||
|
||||
obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
|
||||
obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
|
||||
obj-$(CONFIG_FAULT_INJECTION_USERCOPY) += fault-inject-usercopy.o
|
||||
obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
|
||||
obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
|
||||
obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
|
||||
|
@ -23,7 +23,7 @@
|
||||
/**
|
||||
* DOC: bitmap introduction
|
||||
*
|
||||
* bitmaps provide an array of bits, implemented using an an
|
||||
* bitmaps provide an array of bits, implemented using an
|
||||
* array of unsigned longs. The number of valid bits in a
|
||||
* given bitmap does _not_ need to be an exact multiple of
|
||||
* BITS_PER_LONG.
|
||||
|
@ -331,7 +331,7 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
|
||||
return crc;
|
||||
}
|
||||
|
||||
#if CRC_LE_BITS == 1
|
||||
#if CRC_BE_BITS == 1
|
||||
u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
|
||||
{
|
||||
return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
|
||||
|
@ -390,7 +390,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
|
||||
j = (bd->inbufBits >> bd->inbufBitCount)&
|
||||
((1 << hufGroup->maxLen)-1);
|
||||
got_huff_bits:
|
||||
/* Figure how how many bits are in next symbol and
|
||||
/* Figure how many bits are in next symbol and
|
||||
* unget extras */
|
||||
i = hufGroup->minLen;
|
||||
while (j > limit[i])
|
||||
|
@ -60,8 +60,8 @@ void dql_completed(struct dql *dql, unsigned int count)
|
||||
* A decrease is only considered if the queue has been busy in
|
||||
* the whole interval (the check above).
|
||||
*
|
||||
* If there is slack, the amount of execess data queued above
|
||||
* the the amount needed to prevent starvation, the queue limit
|
||||
* If there is slack, the amount of excess data queued above
|
||||
* the amount needed to prevent starvation, the queue limit
|
||||
* can be decreased. To avoid hysteresis we consider the
|
||||
* minimum amount of slack found over several iterations of the
|
||||
* completion routine.
|
||||
|
@ -42,7 +42,7 @@ enum cpio_fields {
|
||||
/**
|
||||
* cpio_data find_cpio_data - Search for files in an uncompressed cpio
|
||||
* @path: The directory to search for, including a slash at the end
|
||||
* @data: Pointer to the the cpio archive or a header inside
|
||||
* @data: Pointer to the cpio archive or a header inside
|
||||
* @len: Remaining length of the cpio based on data pointer
|
||||
* @nextoff: When a matching file is found, this is the offset from the
|
||||
* beginning of the cpio to the beginning of the next file, not the
|
||||
|
39
lib/fault-inject-usercopy.c
Normal file
39
lib/fault-inject-usercopy.c
Normal file
@ -0,0 +1,39 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
#include <linux/fault-inject.h>
|
||||
#include <linux/fault-inject-usercopy.h>
|
||||
|
||||
static struct {
|
||||
struct fault_attr attr;
|
||||
} fail_usercopy = {
|
||||
.attr = FAULT_ATTR_INITIALIZER,
|
||||
};
|
||||
|
||||
static int __init setup_fail_usercopy(char *str)
|
||||
{
|
||||
return setup_fault_attr(&fail_usercopy.attr, str);
|
||||
}
|
||||
__setup("fail_usercopy=", setup_fail_usercopy);
|
||||
|
||||
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
|
||||
|
||||
static int __init fail_usercopy_debugfs(void)
|
||||
{
|
||||
struct dentry *dir;
|
||||
|
||||
dir = fault_create_debugfs_attr("fail_usercopy", NULL,
|
||||
&fail_usercopy.attr);
|
||||
if (IS_ERR(dir))
|
||||
return PTR_ERR(dir);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(fail_usercopy_debugfs);
|
||||
|
||||
#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
|
||||
|
||||
bool should_fail_usercopy(void)
|
||||
{
|
||||
return should_fail(&fail_usercopy.attr, 1);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(should_fail_usercopy);
|
@ -16,6 +16,7 @@
|
||||
#include <linux/bitmap.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/minmax.h>
|
||||
|
||||
#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
|
||||
!defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/minmax.h>
|
||||
#include <linux/export.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
|
@ -372,7 +372,8 @@ EXPORT_SYMBOL(idr_replace);
|
||||
* Allocate an ID between @min and @max, inclusive. The allocated ID will
|
||||
* not exceed %INT_MAX, even if @max is larger.
|
||||
*
|
||||
* Context: Any context.
|
||||
* Context: Any context. It is safe to call this function without
|
||||
* locking in your code.
|
||||
* Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
|
||||
* or %-ENOSPC if there are no free IDs.
|
||||
*/
|
||||
@ -479,7 +480,8 @@ EXPORT_SYMBOL(ida_alloc_range);
|
||||
* @ida: IDA handle.
|
||||
* @id: Previously allocated ID.
|
||||
*
|
||||
* Context: Any context.
|
||||
* Context: Any context. It is safe to call this function without
|
||||
* locking in your code.
|
||||
*/
|
||||
void ida_free(struct ida *ida, unsigned int id)
|
||||
{
|
||||
@ -531,7 +533,8 @@ EXPORT_SYMBOL(ida_free);
|
||||
* or freed. If the IDA is already empty, there is no need to call this
|
||||
* function.
|
||||
*
|
||||
* Context: Any context.
|
||||
* Context: Any context. It is safe to call this function without
|
||||
* locking in your code.
|
||||
*/
|
||||
void ida_destroy(struct ida *ida)
|
||||
{
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <crypto/hash.h>
|
||||
#include <linux/export.h>
|
||||
#include <linux/bvec.h>
|
||||
#include <linux/fault-inject-usercopy.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/slab.h>
|
||||
@ -140,6 +141,8 @@
|
||||
|
||||
static int copyout(void __user *to, const void *from, size_t n)
|
||||
{
|
||||
if (should_fail_usercopy())
|
||||
return n;
|
||||
if (access_ok(to, n)) {
|
||||
instrument_copy_to_user(to, from, n);
|
||||
n = raw_copy_to_user(to, from, n);
|
||||
@ -149,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n)
|
||||
|
||||
static int copyin(void *to, const void __user *from, size_t n)
|
||||
{
|
||||
if (should_fail_usercopy())
|
||||
return n;
|
||||
if (access_ok(from, n)) {
|
||||
instrument_copy_from_user(to, from, n);
|
||||
n = raw_copy_from_user(to, from, n);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user