Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: - most of the rest of MM (memcg, hugetlb, vmscan, proc, compaction, mempolicy, oom-kill, hugetlbfs, migration, thp, cma, util, memory-hotplug, cleanups, uaccess, migration, gup, pagemap), - various other subsystems (alpha, misc, sparse, bitmap, lib, bitops, checkpatch, autofs, minix, nilfs, ufs, fat, signals, kmod, coredump, exec, kdump, rapidio, panic, kcov, kgdb, ipc). * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (164 commits) mm/gup: remove task_struct pointer for all gup code mm: clean up the last pieces of page fault accountings mm/xtensa: use general page fault accounting mm/x86: use general page fault accounting mm/sparc64: use general page fault accounting mm/sparc32: use general page fault accounting mm/sh: use general page fault accounting mm/s390: use general page fault accounting mm/riscv: use general page fault accounting mm/powerpc: use general page fault accounting mm/parisc: use general page fault accounting mm/openrisc: use general page fault accounting mm/nios2: use general page fault accounting mm/nds32: use general page fault accounting mm/mips: use general page fault accounting mm/microblaze: use general page fault accounting mm/m68k: use general page fault accounting mm/ia64: use general page fault accounting mm/hexagon: use general page fault accounting mm/csky: use general page fault accounting ...
This commit is contained in:
commit
9ad57f6dfc
@ -1274,6 +1274,10 @@ PAGE_SIZE multiple when read back.
|
|||||||
Amount of memory used for storing in-kernel data
|
Amount of memory used for storing in-kernel data
|
||||||
structures.
|
structures.
|
||||||
|
|
||||||
|
percpu
|
||||||
|
Amount of memory used for storing per-cpu kernel
|
||||||
|
data structures.
|
||||||
|
|
||||||
sock
|
sock
|
||||||
Amount of memory used in network transmission buffers
|
Amount of memory used in network transmission buffers
|
||||||
|
|
||||||
|
@ -164,7 +164,8 @@ core_pattern
|
|||||||
%s signal number
|
%s signal number
|
||||||
%t UNIX time of dump
|
%t UNIX time of dump
|
||||||
%h hostname
|
%h hostname
|
||||||
%e executable filename (may be shortened)
|
%e executable filename (may be shortened, could be changed by prctl etc)
|
||||||
|
%f executable filename
|
||||||
%E executable path
|
%E executable path
|
||||||
%c maximum size of core file by resource limit RLIMIT_CORE
|
%c maximum size of core file by resource limit RLIMIT_CORE
|
||||||
%<OTHER> both are dropped
|
%<OTHER> both are dropped
|
||||||
|
@ -119,6 +119,21 @@ all zones are compacted such that free memory is available in contiguous
|
|||||||
blocks where possible. This can be important for example in the allocation of
|
blocks where possible. This can be important for example in the allocation of
|
||||||
huge pages although processes will also directly compact memory as required.
|
huge pages although processes will also directly compact memory as required.
|
||||||
|
|
||||||
|
compaction_proactiveness
|
||||||
|
========================
|
||||||
|
|
||||||
|
This tunable takes a value in the range [0, 100] with a default value of
|
||||||
|
20. This tunable determines how aggressively compaction is done in the
|
||||||
|
background. Setting it to 0 disables proactive compaction.
|
||||||
|
|
||||||
|
Note that compaction has a non-trivial system-wide impact as pages
|
||||||
|
belonging to different processes are moved around, which could also lead
|
||||||
|
to latency spikes in unsuspecting applications. The kernel employs
|
||||||
|
various heuristics to avoid wasting CPU cycles if it detects that
|
||||||
|
proactive compaction is not being effective.
|
||||||
|
|
||||||
|
Be careful when setting it to extreme values like 100, as that may
|
||||||
|
cause excessive background compaction activity.
|
||||||
|
|
||||||
compact_unevictable_allowed
|
compact_unevictable_allowed
|
||||||
===========================
|
===========================
|
||||||
|
@ -1633,9 +1633,6 @@ may allocate from based on an estimation of its current memory and swap use.
|
|||||||
For example, if a task is using all allowed memory, its badness score will be
|
For example, if a task is using all allowed memory, its badness score will be
|
||||||
1000. If it is using half of its allowed memory, its score will be 500.
|
1000. If it is using half of its allowed memory, its score will be 500.
|
||||||
|
|
||||||
There is an additional factor included in the badness score: the current memory
|
|
||||||
and swap usage is discounted by 3% for root processes.
|
|
||||||
|
|
||||||
The amount of "allowed" memory depends on the context in which the oom killer
|
The amount of "allowed" memory depends on the context in which the oom killer
|
||||||
was called. If it is due to the memory assigned to the allocating task's cpuset
|
was called. If it is due to the memory assigned to the allocating task's cpuset
|
||||||
being exhausted, the allowed memory represents the set of mems assigned to that
|
being exhausted, the allowed memory represents the set of mems assigned to that
|
||||||
@ -1671,11 +1668,6 @@ The value of /proc/<pid>/oom_score_adj may be reduced no lower than the last
|
|||||||
value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
|
value set by a CAP_SYS_RESOURCE process. To reduce the value any lower
|
||||||
requires CAP_SYS_RESOURCE.
|
requires CAP_SYS_RESOURCE.
|
||||||
|
|
||||||
Caveat: when a parent task is selected, the oom killer will sacrifice any first
|
|
||||||
generation children with separate address spaces instead, if possible. This
|
|
||||||
avoids servers and important system daemons from being killed and loses the
|
|
||||||
minimal amount of work.
|
|
||||||
|
|
||||||
|
|
||||||
3.2 /proc/<pid>/oom_score - Display current oom-killer score
|
3.2 /proc/<pid>/oom_score - Display current oom-killer score
|
||||||
-------------------------------------------------------------
|
-------------------------------------------------------------
|
||||||
@ -1684,6 +1676,9 @@ This file can be used to check the current score used by the oom-killer for
|
|||||||
any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which
|
any given <pid>. Use it together with /proc/<pid>/oom_score_adj to tune which
|
||||||
process should be killed in an out-of-memory situation.
|
process should be killed in an out-of-memory situation.
|
||||||
|
|
||||||
|
Please note that the exported value includes oom_score_adj so it is
|
||||||
|
effectively in range [0,2000].
|
||||||
|
|
||||||
|
|
||||||
3.3 /proc/<pid>/io - Display the IO accounting fields
|
3.3 /proc/<pid>/io - Display the IO accounting fields
|
||||||
-------------------------------------------------------
|
-------------------------------------------------------
|
||||||
|
@ -253,5 +253,32 @@ which are function pointers of struct address_space_operations.
|
|||||||
PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
|
PG_isolated is alias with PG_reclaim flag so driver shouldn't use the flag
|
||||||
for own purpose.
|
for own purpose.
|
||||||
|
|
||||||
|
Monitoring Migration
|
||||||
|
=====================
|
||||||
|
|
||||||
|
The following events (counters) can be used to monitor page migration.
|
||||||
|
|
||||||
|
1. PGMIGRATE_SUCCESS: Normal page migration success. Each count means that a
|
||||||
|
page was migrated. If the page was a non-THP page, then this counter is
|
||||||
|
increased by one. If the page was a THP, then this counter is increased by
|
||||||
|
the number of THP subpages. For example, migration of a single 2MB THP that
|
||||||
|
has 4KB-size base pages (subpages) will cause this counter to increase by
|
||||||
|
512.
|
||||||
|
|
||||||
|
2. PGMIGRATE_FAIL: Normal page migration failure. Same counting rules as for
|
||||||
|
_SUCCESS, above: this will be increased by the number of subpages, if it was
|
||||||
|
a THP.
|
||||||
|
|
||||||
|
3. THP_MIGRATION_SUCCESS: A THP was migrated without being split.
|
||||||
|
|
||||||
|
4. THP_MIGRATION_FAIL: A THP could not be migrated nor it could be split.
|
||||||
|
|
||||||
|
5. THP_MIGRATION_SPLIT: A THP was migrated, but not as such: first, the THP had
|
||||||
|
to be split. After splitting, a migration retry was used for it's sub-pages.
|
||||||
|
|
||||||
|
THP_MIGRATION_* events also update the appropriate PGMIGRATE_SUCCESS or
|
||||||
|
PGMIGRATE_FAIL events. For example, a THP migration failure will cause both
|
||||||
|
THP_MIGRATION_FAIL and PGMIGRATE_FAIL to increase.
|
||||||
|
|
||||||
Christoph Lameter, May 8, 2006.
|
Christoph Lameter, May 8, 2006.
|
||||||
Minchan Kim, Mar 28, 2016.
|
Minchan Kim, Mar 28, 2016.
|
||||||
|
4
Makefile
4
Makefile
@ -893,6 +893,10 @@ KBUILD_CFLAGS += $(CC_FLAGS_SCS)
|
|||||||
export CC_FLAGS_SCS
|
export CC_FLAGS_SCS
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B
|
||||||
|
KBUILD_CFLAGS += -falign-functions=32
|
||||||
|
endif
|
||||||
|
|
||||||
# arch Makefile may override CC so keep this after arch Makefile is included
|
# arch Makefile may override CC so keep this after arch Makefile is included
|
||||||
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
|
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
|
||||||
|
|
||||||
|
@ -489,10 +489,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ioread16be(p) be16_to_cpu(ioread16(p))
|
#define ioread16be(p) swab16(ioread16(p))
|
||||||
#define ioread32be(p) be32_to_cpu(ioread32(p))
|
#define ioread32be(p) swab32(ioread32(p))
|
||||||
#define iowrite16be(v,p) iowrite16(cpu_to_be16(v), (p))
|
#define iowrite16be(v,p) iowrite16(swab16(v), (p))
|
||||||
#define iowrite32be(v,p) iowrite32(cpu_to_be32(v), (p))
|
#define iowrite32be(v,p) iowrite32(swab32(v), (p))
|
||||||
|
|
||||||
#define inb_p inb
|
#define inb_p inb
|
||||||
#define inw_p inw
|
#define inw_p inw
|
||||||
|
@ -20,7 +20,7 @@
|
|||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Is a address valid? This does a straightforward calculation rather
|
* Is a address valid? This does a straightforward calculation rather
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/extable.h>
|
#include <linux/extable.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
|
extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
|
||||||
|
|
||||||
@ -116,6 +117,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
|
|||||||
#endif
|
#endif
|
||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
vma = find_vma(mm, address);
|
vma = find_vma(mm, address);
|
||||||
@ -148,7 +150,7 @@ retry:
|
|||||||
/* If for any reason at all we couldn't handle the fault,
|
/* If for any reason at all we couldn't handle the fault,
|
||||||
make sure we exit gracefully rather than endlessly redo
|
make sure we exit gracefully rather than endlessly redo
|
||||||
the fault. */
|
the fault. */
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -164,10 +166,6 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -14,8 +14,7 @@ typedef unsigned long mm_segment_t;
|
|||||||
|
|
||||||
#define KERNEL_DS MAKE_MM_SEG(0)
|
#define KERNEL_DS MAKE_MM_SEG(0)
|
||||||
#define USER_DS MAKE_MM_SEG(TASK_SIZE)
|
#define USER_DS MAKE_MM_SEG(TASK_SIZE)
|
||||||
|
#define uaccess_kernel() (get_fs() == KERNEL_DS)
|
||||||
#define segment_eq(a, b) ((a) == (b))
|
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
#endif /* __ASMARC_SEGMENT_H */
|
#endif /* __ASMARC_SEGMENT_H */
|
||||||
|
@ -91,7 +91,7 @@ fault:
|
|||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
mmap_read_lock(current->mm);
|
mmap_read_lock(current->mm);
|
||||||
ret = fixup_user_fault(current, current->mm, (unsigned long) uaddr,
|
ret = fixup_user_fault(current->mm, (unsigned long) uaddr,
|
||||||
FAULT_FLAG_WRITE, NULL);
|
FAULT_FLAG_WRITE, NULL);
|
||||||
mmap_read_unlock(current->mm);
|
mmap_read_unlock(current->mm);
|
||||||
|
|
||||||
|
@ -105,6 +105,7 @@ void do_page_fault(unsigned long address, struct pt_regs *regs)
|
|||||||
if (write)
|
if (write)
|
||||||
flags |= FAULT_FLAG_WRITE;
|
flags |= FAULT_FLAG_WRITE;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
|
|
||||||
@ -130,7 +131,7 @@ retry:
|
|||||||
goto bad_area;
|
goto bad_area;
|
||||||
}
|
}
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
/* Quick path to respond to signals */
|
/* Quick path to respond to signals */
|
||||||
if (fault_signal_pending(fault, regs)) {
|
if (fault_signal_pending(fault, regs)) {
|
||||||
@ -155,22 +156,9 @@ bad_area:
|
|||||||
* Major/minor page fault accounting
|
* Major/minor page fault accounting
|
||||||
* (in case of retry we only land here once)
|
* (in case of retry we only land here once)
|
||||||
*/
|
*/
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
if (likely(!(fault & VM_FAULT_ERROR)))
|
||||||
|
|
||||||
if (likely(!(fault & VM_FAULT_ERROR))) {
|
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
|
|
||||||
regs, address);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
|
|
||||||
regs, address);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Normal return path: fault Handled Gracefully */
|
/* Normal return path: fault Handled Gracefully */
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
|
|
||||||
if (!user_mode(regs))
|
if (!user_mode(regs))
|
||||||
goto no_context;
|
goto no_context;
|
||||||
|
@ -76,7 +76,7 @@ static inline void set_fs(mm_segment_t fs)
|
|||||||
modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
|
modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a) == (b))
|
#define uaccess_kernel() (get_fs() == KERNEL_DS)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use 33-bit arithmetic here. Success returns zero, failure returns
|
* We use 33-bit arithmetic here. Success returns zero, failure returns
|
||||||
@ -267,7 +267,7 @@ extern int __put_user_8(void *, unsigned long long);
|
|||||||
*/
|
*/
|
||||||
#define USER_DS KERNEL_DS
|
#define USER_DS KERNEL_DS
|
||||||
|
|
||||||
#define segment_eq(a, b) (1)
|
#define uaccess_kernel() (true)
|
||||||
#define __addr_ok(addr) ((void)(addr), 1)
|
#define __addr_ok(addr) ((void)(addr), 1)
|
||||||
#define __range_ok(addr, size) ((void)(addr), 0)
|
#define __range_ok(addr, size) ((void)(addr), 0)
|
||||||
#define get_fs() (KERNEL_DS)
|
#define get_fs() (KERNEL_DS)
|
||||||
|
@ -713,7 +713,9 @@ struct page *get_signal_page(void)
|
|||||||
/* Defer to generic check */
|
/* Defer to generic check */
|
||||||
asmlinkage void addr_limit_check_failed(void)
|
asmlinkage void addr_limit_check_failed(void)
|
||||||
{
|
{
|
||||||
|
#ifdef CONFIG_MMU
|
||||||
addr_limit_user_check();
|
addr_limit_user_check();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_RSEQ
|
#ifdef CONFIG_DEBUG_RSEQ
|
||||||
|
@ -202,7 +202,8 @@ static inline bool access_error(unsigned int fsr, struct vm_area_struct *vma)
|
|||||||
|
|
||||||
static vm_fault_t __kprobes
|
static vm_fault_t __kprobes
|
||||||
__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
|
__do_page_fault(struct mm_struct *mm, unsigned long addr, unsigned int fsr,
|
||||||
unsigned int flags, struct task_struct *tsk)
|
unsigned int flags, struct task_struct *tsk,
|
||||||
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
vm_fault_t fault;
|
vm_fault_t fault;
|
||||||
@ -224,7 +225,7 @@ good_area:
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
return handle_mm_fault(vma, addr & PAGE_MASK, flags);
|
return handle_mm_fault(vma, addr & PAGE_MASK, flags, regs);
|
||||||
|
|
||||||
check_stack:
|
check_stack:
|
||||||
/* Don't allow expansion below FIRST_USER_ADDRESS */
|
/* Don't allow expansion below FIRST_USER_ADDRESS */
|
||||||
@ -266,6 +267,8 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
|
|||||||
if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
|
if ((fsr & FSR_WRITE) && !(fsr & FSR_CM))
|
||||||
flags |= FAULT_FLAG_WRITE;
|
flags |= FAULT_FLAG_WRITE;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As per x86, we may deadlock here. However, since the kernel only
|
* As per x86, we may deadlock here. However, since the kernel only
|
||||||
* validly references user space from well defined areas of the code,
|
* validly references user space from well defined areas of the code,
|
||||||
@ -290,7 +293,7 @@ retry:
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
fault = __do_page_fault(mm, addr, fsr, flags, tsk);
|
fault = __do_page_fault(mm, addr, fsr, flags, tsk, regs);
|
||||||
|
|
||||||
/* If we need to retry but a fatal signal is pending, handle the
|
/* If we need to retry but a fatal signal is pending, handle the
|
||||||
* signal first. We do not need to release the mmap_lock because
|
* signal first. We do not need to release the mmap_lock because
|
||||||
@ -302,23 +305,7 @@ retry:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting is only done on the
|
|
||||||
* initial attempt. If we go through a retry, it is extremely
|
|
||||||
* likely that the page will be found in page cache at that point.
|
|
||||||
*/
|
|
||||||
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
|
|
||||||
if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (!(fault & VM_FAULT_ERROR) && flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
|
|
||||||
regs, addr);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
|
|
||||||
regs, addr);
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
goto retry;
|
goto retry;
|
||||||
|
@ -50,7 +50,7 @@ static inline void set_fs(mm_segment_t fs)
|
|||||||
CONFIG_ARM64_UAO));
|
CONFIG_ARM64_UAO));
|
||||||
}
|
}
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a) == (b))
|
#define uaccess_kernel() (get_fs() == KERNEL_DS)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Test whether a block of memory is a valid user space address.
|
* Test whether a block of memory is a valid user space address.
|
||||||
|
@ -180,7 +180,7 @@ static __kprobes unsigned long _sdei_handler(struct pt_regs *regs,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* We didn't take an exception to get here, set PAN. UAO will be cleared
|
* We didn't take an exception to get here, set PAN. UAO will be cleared
|
||||||
* by sdei_event_handler()s set_fs(USER_DS) call.
|
* by sdei_event_handler()s force_uaccess_begin() call.
|
||||||
*/
|
*/
|
||||||
__uaccess_enable_hw_pan();
|
__uaccess_enable_hw_pan();
|
||||||
|
|
||||||
|
@ -404,7 +404,8 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re
|
|||||||
#define VM_FAULT_BADACCESS 0x020000
|
#define VM_FAULT_BADACCESS 0x020000
|
||||||
|
|
||||||
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
|
static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
|
||||||
unsigned int mm_flags, unsigned long vm_flags)
|
unsigned int mm_flags, unsigned long vm_flags,
|
||||||
|
struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
struct vm_area_struct *vma = find_vma(mm, addr);
|
struct vm_area_struct *vma = find_vma(mm, addr);
|
||||||
|
|
||||||
@ -428,7 +429,7 @@ static vm_fault_t __do_page_fault(struct mm_struct *mm, unsigned long addr,
|
|||||||
*/
|
*/
|
||||||
if (!(vma->vm_flags & vm_flags))
|
if (!(vma->vm_flags & vm_flags))
|
||||||
return VM_FAULT_BADACCESS;
|
return VM_FAULT_BADACCESS;
|
||||||
return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags);
|
return handle_mm_fault(vma, addr & PAGE_MASK, mm_flags, regs);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool is_el0_instruction_abort(unsigned int esr)
|
static bool is_el0_instruction_abort(unsigned int esr)
|
||||||
@ -450,7 +451,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
|
|||||||
{
|
{
|
||||||
const struct fault_info *inf;
|
const struct fault_info *inf;
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
vm_fault_t fault, major = 0;
|
vm_fault_t fault;
|
||||||
unsigned long vm_flags = VM_ACCESS_FLAGS;
|
unsigned long vm_flags = VM_ACCESS_FLAGS;
|
||||||
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
|
unsigned int mm_flags = FAULT_FLAG_DEFAULT;
|
||||||
|
|
||||||
@ -516,8 +517,7 @@ retry:
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
fault = __do_page_fault(mm, addr, mm_flags, vm_flags);
|
fault = __do_page_fault(mm, addr, mm_flags, vm_flags, regs);
|
||||||
major |= fault & VM_FAULT_MAJOR;
|
|
||||||
|
|
||||||
/* Quick path to respond to signals */
|
/* Quick path to respond to signals */
|
||||||
if (fault_signal_pending(fault, regs)) {
|
if (fault_signal_pending(fault, regs)) {
|
||||||
@ -538,25 +538,8 @@ retry:
|
|||||||
* Handle the "normal" (no error) case first.
|
* Handle the "normal" (no error) case first.
|
||||||
*/
|
*/
|
||||||
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
|
if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP |
|
||||||
VM_FAULT_BADACCESS)))) {
|
VM_FAULT_BADACCESS))))
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting is only done
|
|
||||||
* once. If we go through a retry, it is extremely
|
|
||||||
* likely that the page will be found in page cache at
|
|
||||||
* that point.
|
|
||||||
*/
|
|
||||||
if (major) {
|
|
||||||
current->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
|
|
||||||
addr);
|
|
||||||
} else {
|
|
||||||
current->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
|
|
||||||
addr);
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we are in kernel mode at this point, we have no context to
|
* If we are in kernel mode at this point, we have no context to
|
||||||
|
@ -461,13 +461,3 @@ void __init arm64_numa_init(void)
|
|||||||
|
|
||||||
numa_init(dummy_numa_init);
|
numa_init(dummy_numa_init);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* We hope that we will be hotplugging memory on nodes we already know about,
|
|
||||||
* such that acpi_get_node() succeeds and we never fall back to this...
|
|
||||||
*/
|
|
||||||
int memory_add_physaddr_to_nid(u64 addr)
|
|
||||||
{
|
|
||||||
pr_warn("Unknown node for memory at 0x%llx, assuming node 0\n", addr);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
@ -13,6 +13,6 @@ typedef struct {
|
|||||||
#define USER_DS ((mm_segment_t) { 0x80000000UL })
|
#define USER_DS ((mm_segment_t) { 0x80000000UL })
|
||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#endif /* __ASM_CSKY_SEGMENT_H */
|
#endif /* __ASM_CSKY_SEGMENT_H */
|
||||||
|
@ -150,7 +150,8 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0);
|
fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0,
|
||||||
|
regs);
|
||||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||||
if (fault & VM_FAULT_OOM)
|
if (fault & VM_FAULT_OOM)
|
||||||
goto out_of_memory;
|
goto out_of_memory;
|
||||||
@ -160,16 +161,6 @@ good_area:
|
|||||||
goto bad_area;
|
goto bad_area;
|
||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
|
|
||||||
address);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
|
|
||||||
address);
|
|
||||||
}
|
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ static inline mm_segment_t get_fs(void)
|
|||||||
return USER_DS;
|
return USER_DS;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include <linux/signal.h>
|
#include <linux/signal.h>
|
||||||
#include <linux/extable.h>
|
#include <linux/extable.h>
|
||||||
#include <linux/hardirq.h>
|
#include <linux/hardirq.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Decode of hardware exception sends us to one of several
|
* Decode of hardware exception sends us to one of several
|
||||||
@ -53,6 +54,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
|
|||||||
|
|
||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
vma = find_vma(mm, address);
|
vma = find_vma(mm, address);
|
||||||
@ -88,7 +91,7 @@ good_area:
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -96,10 +99,6 @@ good_area:
|
|||||||
/* The most common case -- we are done. */
|
/* The most common case -- we are done. */
|
||||||
if (likely(!(fault & VM_FAULT_ERROR))) {
|
if (likely(!(fault & VM_FAULT_ERROR))) {
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
goto retry;
|
goto retry;
|
||||||
|
@ -50,7 +50,7 @@
|
|||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* When accessing user memory, we need to make sure the entire area really is in
|
* When accessing user memory, we need to make sure the entire area really is in
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
#include <linux/kdebug.h>
|
#include <linux/kdebug.h>
|
||||||
#include <linux/prefetch.h>
|
#include <linux/prefetch.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/exception.h>
|
#include <asm/exception.h>
|
||||||
@ -105,6 +106,8 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
|
|||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
if (mask & VM_WRITE)
|
if (mask & VM_WRITE)
|
||||||
flags |= FAULT_FLAG_WRITE;
|
flags |= FAULT_FLAG_WRITE;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
|
|
||||||
@ -143,7 +146,7 @@ retry:
|
|||||||
* sure we exit gracefully rather than endlessly redo the
|
* sure we exit gracefully rather than endlessly redo the
|
||||||
* fault.
|
* fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -166,10 +169,6 @@ retry:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -106,7 +106,5 @@ int memory_add_physaddr_to_nid(u64 addr)
|
|||||||
return 0;
|
return 0;
|
||||||
return nid;
|
return nid;
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
@ -52,7 +52,7 @@ static inline void set_fs(mm_segment_t val)
|
|||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#endif /* __ASSEMBLY__ */
|
#endif /* __ASSEMBLY__ */
|
||||||
|
|
||||||
|
@ -85,10 +85,10 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
|
|||||||
static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
|
static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
|
||||||
{
|
{
|
||||||
if (vma->vm_mm == current->active_mm) {
|
if (vma->vm_mm == current->active_mm) {
|
||||||
mm_segment_t old_fs = get_fs();
|
mm_segment_t old_fs = force_uaccess_begin();
|
||||||
set_fs(USER_DS);
|
|
||||||
__flush_tlb_one(addr);
|
__flush_tlb_one(addr);
|
||||||
set_fs(old_fs);
|
force_uaccess_end(old_fs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@
|
|||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
#include <asm/setup.h>
|
#include <asm/setup.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
@ -84,6 +85,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||||||
|
|
||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
|
|
||||||
@ -134,7 +137,7 @@ good_area:
|
|||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
pr_debug("handle_mm_fault returns %x\n", fault);
|
pr_debug("handle_mm_fault returns %x\n", fault);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
@ -150,16 +153,7 @@ good_area:
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting is only done on the
|
|
||||||
* initial attempt. If we go through a retry, it is extremely
|
|
||||||
* likely that the page will be found in page cache at that point.
|
|
||||||
*/
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@
|
|||||||
# define get_fs() (current_thread_info()->addr_limit)
|
# define get_fs() (current_thread_info()->addr_limit)
|
||||||
# define set_fs(val) (current_thread_info()->addr_limit = (val))
|
# define set_fs(val) (current_thread_info()->addr_limit = (val))
|
||||||
|
|
||||||
# define segment_eq(a, b) ((a).seg == (b).seg)
|
# define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#ifndef CONFIG_MMU
|
#ifndef CONFIG_MMU
|
||||||
|
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include <linux/mman.h>
|
#include <linux/mman.h>
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
#include <asm/mmu.h>
|
#include <asm/mmu.h>
|
||||||
@ -121,6 +122,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
|
|
||||||
/* When running in the kernel we expect faults to occur only to
|
/* When running in the kernel we expect faults to occur only to
|
||||||
* addresses in user space. All other faults represent errors in the
|
* addresses in user space. All other faults represent errors in the
|
||||||
* kernel and should generate an OOPS. Unfortunately, in the case of an
|
* kernel and should generate an OOPS. Unfortunately, in the case of an
|
||||||
@ -214,7 +217,7 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -230,10 +233,6 @@ good_area:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (unlikely(fault & VM_FAULT_MAJOR))
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -72,7 +72,7 @@ extern u64 __ua_limit;
|
|||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* eva_kernel_access() - determine whether kernel memory access on an EVA system
|
* eva_kernel_access() - determine whether kernel memory access on an EVA system
|
||||||
|
@ -191,17 +191,16 @@ static void emulate_load_store_insn(struct pt_regs *regs,
|
|||||||
* memory, so we need to "switch" the address limit to
|
* memory, so we need to "switch" the address limit to
|
||||||
* user space, so that address check can work properly.
|
* user space, so that address check can work properly.
|
||||||
*/
|
*/
|
||||||
seg = get_fs();
|
seg = force_uaccess_begin();
|
||||||
set_fs(USER_DS);
|
|
||||||
switch (insn.spec3_format.func) {
|
switch (insn.spec3_format.func) {
|
||||||
case lhe_op:
|
case lhe_op:
|
||||||
if (!access_ok(addr, 2)) {
|
if (!access_ok(addr, 2)) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto sigbus;
|
goto sigbus;
|
||||||
}
|
}
|
||||||
LoadHWE(addr, value, res);
|
LoadHWE(addr, value, res);
|
||||||
if (res) {
|
if (res) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto fault;
|
goto fault;
|
||||||
}
|
}
|
||||||
compute_return_epc(regs);
|
compute_return_epc(regs);
|
||||||
@ -209,12 +208,12 @@ static void emulate_load_store_insn(struct pt_regs *regs,
|
|||||||
break;
|
break;
|
||||||
case lwe_op:
|
case lwe_op:
|
||||||
if (!access_ok(addr, 4)) {
|
if (!access_ok(addr, 4)) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto sigbus;
|
goto sigbus;
|
||||||
}
|
}
|
||||||
LoadWE(addr, value, res);
|
LoadWE(addr, value, res);
|
||||||
if (res) {
|
if (res) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto fault;
|
goto fault;
|
||||||
}
|
}
|
||||||
compute_return_epc(regs);
|
compute_return_epc(regs);
|
||||||
@ -222,12 +221,12 @@ static void emulate_load_store_insn(struct pt_regs *regs,
|
|||||||
break;
|
break;
|
||||||
case lhue_op:
|
case lhue_op:
|
||||||
if (!access_ok(addr, 2)) {
|
if (!access_ok(addr, 2)) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto sigbus;
|
goto sigbus;
|
||||||
}
|
}
|
||||||
LoadHWUE(addr, value, res);
|
LoadHWUE(addr, value, res);
|
||||||
if (res) {
|
if (res) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto fault;
|
goto fault;
|
||||||
}
|
}
|
||||||
compute_return_epc(regs);
|
compute_return_epc(regs);
|
||||||
@ -235,35 +234,35 @@ static void emulate_load_store_insn(struct pt_regs *regs,
|
|||||||
break;
|
break;
|
||||||
case she_op:
|
case she_op:
|
||||||
if (!access_ok(addr, 2)) {
|
if (!access_ok(addr, 2)) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto sigbus;
|
goto sigbus;
|
||||||
}
|
}
|
||||||
compute_return_epc(regs);
|
compute_return_epc(regs);
|
||||||
value = regs->regs[insn.spec3_format.rt];
|
value = regs->regs[insn.spec3_format.rt];
|
||||||
StoreHWE(addr, value, res);
|
StoreHWE(addr, value, res);
|
||||||
if (res) {
|
if (res) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto fault;
|
goto fault;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case swe_op:
|
case swe_op:
|
||||||
if (!access_ok(addr, 4)) {
|
if (!access_ok(addr, 4)) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto sigbus;
|
goto sigbus;
|
||||||
}
|
}
|
||||||
compute_return_epc(regs);
|
compute_return_epc(regs);
|
||||||
value = regs->regs[insn.spec3_format.rt];
|
value = regs->regs[insn.spec3_format.rt];
|
||||||
StoreWE(addr, value, res);
|
StoreWE(addr, value, res);
|
||||||
if (res) {
|
if (res) {
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto fault;
|
goto fault;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
goto sigill;
|
goto sigill;
|
||||||
}
|
}
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
|
@ -96,6 +96,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
|
|||||||
|
|
||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
vma = find_vma(mm, address);
|
vma = find_vma(mm, address);
|
||||||
@ -152,12 +154,11 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
|
||||||
if (unlikely(fault & VM_FAULT_ERROR)) {
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
||||||
if (fault & VM_FAULT_OOM)
|
if (fault & VM_FAULT_OOM)
|
||||||
goto out_of_memory;
|
goto out_of_memory;
|
||||||
@ -168,15 +169,6 @@ good_area:
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
|
|
||||||
regs, address);
|
|
||||||
tsk->maj_flt++;
|
|
||||||
} else {
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
|
|
||||||
regs, address);
|
|
||||||
tsk->min_flt++;
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -44,7 +44,7 @@ static inline void set_fs(mm_segment_t fs)
|
|||||||
current_thread_info()->addr_limit = fs;
|
current_thread_info()->addr_limit = fs;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a) == (b))
|
#define uaccess_kernel() (get_fs() == KERNEL_DS)
|
||||||
|
|
||||||
#define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size))
|
#define __range_ok(addr, size) (size <= get_fs() && addr <= (get_fs() -size))
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ void show_regs(struct pt_regs *regs)
|
|||||||
regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]);
|
regs->uregs[3], regs->uregs[2], regs->uregs[1], regs->uregs[0]);
|
||||||
pr_info(" IRQs o%s Segment %s\n",
|
pr_info(" IRQs o%s Segment %s\n",
|
||||||
interrupts_enabled(regs) ? "n" : "ff",
|
interrupts_enabled(regs) ? "n" : "ff",
|
||||||
segment_eq(get_fs(), KERNEL_DS)? "kernel" : "user");
|
uaccess_kernel() ? "kernel" : "user");
|
||||||
}
|
}
|
||||||
|
|
||||||
EXPORT_SYMBOL(show_regs);
|
EXPORT_SYMBOL(show_regs);
|
||||||
|
@ -512,7 +512,7 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs)
|
|||||||
{
|
{
|
||||||
unsigned long inst;
|
unsigned long inst;
|
||||||
int ret = -EFAULT;
|
int ret = -EFAULT;
|
||||||
mm_segment_t seg = get_fs();
|
mm_segment_t seg;
|
||||||
|
|
||||||
inst = get_inst(regs->ipc);
|
inst = get_inst(regs->ipc);
|
||||||
|
|
||||||
@ -520,13 +520,12 @@ int do_unaligned_access(unsigned long addr, struct pt_regs *regs)
|
|||||||
"Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr,
|
"Faulting addr: 0x%08lx, pc: 0x%08lx [inst: 0x%08lx ]\n", addr,
|
||||||
regs->ipc, inst);
|
regs->ipc, inst);
|
||||||
|
|
||||||
set_fs(USER_DS);
|
seg = force_uaccess_begin();
|
||||||
|
|
||||||
if (inst & NDS32_16BIT_INSTRUCTION)
|
if (inst & NDS32_16BIT_INSTRUCTION)
|
||||||
ret = do_16((inst >> 16) & 0xffff, regs);
|
ret = do_16((inst >> 16) & 0xffff, regs);
|
||||||
else
|
else
|
||||||
ret = do_32(inst, regs);
|
ret = do_32(inst, regs);
|
||||||
set_fs(seg);
|
force_uaccess_end(seg);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -121,6 +121,8 @@ void do_page_fault(unsigned long entry, unsigned long addr,
|
|||||||
if (unlikely(faulthandler_disabled() || !mm))
|
if (unlikely(faulthandler_disabled() || !mm))
|
||||||
goto no_context;
|
goto no_context;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* As per x86, we may deadlock here. However, since the kernel only
|
* As per x86, we may deadlock here. However, since the kernel only
|
||||||
* validly references user space from well defined areas of the code,
|
* validly references user space from well defined areas of the code,
|
||||||
@ -206,7 +208,7 @@ good_area:
|
|||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, addr, flags);
|
fault = handle_mm_fault(vma, addr, flags, regs);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we need to retry but a fatal signal is pending, handle the
|
* If we need to retry but a fatal signal is pending, handle the
|
||||||
@ -228,22 +230,7 @@ good_area:
|
|||||||
goto bad_area;
|
goto bad_area;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting is only done on the initial
|
|
||||||
* attempt. If we go through a retry, it is extremely likely that the
|
|
||||||
* page will be found in page cache at that point.
|
|
||||||
*/
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
|
||||||
1, regs, addr);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
|
||||||
1, regs, addr);
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -30,7 +30,7 @@
|
|||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(seg) (current_thread_info()->addr_limit = (seg))
|
#define set_fs(seg) (current_thread_info()->addr_limit = (seg))
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#define __access_ok(addr, len) \
|
#define __access_ok(addr, len) \
|
||||||
(((signed long)(((long)get_fs().seg) & \
|
(((signed long)(((long)get_fs().seg) & \
|
||||||
|
@ -24,6 +24,7 @@
|
|||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/extable.h>
|
#include <linux/extable.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
#include <asm/mmu_context.h>
|
#include <asm/mmu_context.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
@ -83,6 +84,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long cause,
|
|||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
|
|
||||||
if (!mmap_read_trylock(mm)) {
|
if (!mmap_read_trylock(mm)) {
|
||||||
if (!user_mode(regs) && !search_exception_tables(regs->ea))
|
if (!user_mode(regs) && !search_exception_tables(regs->ea))
|
||||||
goto bad_area_nosemaphore;
|
goto bad_area_nosemaphore;
|
||||||
@ -131,7 +134,7 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -146,16 +149,7 @@ good_area:
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting is only done on the
|
|
||||||
* initial attempt. If we go through a retry, it is extremely
|
|
||||||
* likely that the page will be found in page cache at that point.
|
|
||||||
*/
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a) == (b))
|
#define uaccess_kernel() (get_fs() == KERNEL_DS)
|
||||||
|
|
||||||
/* Ensure that the range from addr to addr+size is all within the process'
|
/* Ensure that the range from addr to addr+size is all within the process'
|
||||||
* address space
|
* address space
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <linux/interrupt.h>
|
#include <linux/interrupt.h>
|
||||||
#include <linux/extable.h>
|
#include <linux/extable.h>
|
||||||
#include <linux/sched/signal.h>
|
#include <linux/sched/signal.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <asm/siginfo.h>
|
#include <asm/siginfo.h>
|
||||||
@ -103,6 +104,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
|
|||||||
if (in_interrupt() || !mm)
|
if (in_interrupt() || !mm)
|
||||||
goto no_context;
|
goto no_context;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
vma = find_vma(mm, address);
|
vma = find_vma(mm, address);
|
||||||
@ -159,7 +162,7 @@ good_area:
|
|||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -176,10 +179,6 @@ good_area:
|
|||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
/*RGD modeled on Cris */
|
/*RGD modeled on Cris */
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
tsk->maj_flt++;
|
|
||||||
else
|
|
||||||
tsk->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -14,7 +14,7 @@
|
|||||||
#define KERNEL_DS ((mm_segment_t){0})
|
#define KERNEL_DS ((mm_segment_t){0})
|
||||||
#define USER_DS ((mm_segment_t){1})
|
#define USER_DS ((mm_segment_t){1})
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include <linux/extable.h>
|
#include <linux/extable.h>
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/hugetlb.h>
|
#include <linux/hugetlb.h>
|
||||||
|
#include <linux/perf_event.h>
|
||||||
|
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
|
|
||||||
@ -281,6 +282,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
|
|||||||
acc_type = parisc_acctyp(code, regs->iir);
|
acc_type = parisc_acctyp(code, regs->iir);
|
||||||
if (acc_type & VM_WRITE)
|
if (acc_type & VM_WRITE)
|
||||||
flags |= FAULT_FLAG_WRITE;
|
flags |= FAULT_FLAG_WRITE;
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
vma = find_vma_prev(mm, address, &prev_vma);
|
vma = find_vma_prev(mm, address, &prev_vma);
|
||||||
@ -302,7 +304,7 @@ good_area:
|
|||||||
* fault.
|
* fault.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -323,10 +325,6 @@ good_area:
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
/*
|
/*
|
||||||
* No need to mmap_read_unlock(mm) as we would
|
* No need to mmap_read_unlock(mm) as we would
|
||||||
|
@ -38,8 +38,7 @@ static inline void set_fs(mm_segment_t fs)
|
|||||||
set_thread_flag(TIF_FSCHECK);
|
set_thread_flag(TIF_FSCHECK);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#define user_addr_max() (get_fs().seg)
|
#define user_addr_max() (get_fs().seg)
|
||||||
|
|
||||||
#ifdef __powerpc64__
|
#ifdef __powerpc64__
|
||||||
|
@ -64,7 +64,7 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ret = 0;
|
ret = 0;
|
||||||
*flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0);
|
*flt = handle_mm_fault(vma, ea, is_write ? FAULT_FLAG_WRITE : 0, NULL);
|
||||||
if (unlikely(*flt & VM_FAULT_ERROR)) {
|
if (unlikely(*flt & VM_FAULT_ERROR)) {
|
||||||
if (*flt & VM_FAULT_OOM) {
|
if (*flt & VM_FAULT_OOM) {
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
@ -76,11 +76,6 @@ int copro_handle_mm_fault(struct mm_struct *mm, unsigned long ea,
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*flt & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
|
|
||||||
out_unlock:
|
out_unlock:
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
return ret;
|
return ret;
|
||||||
|
@ -511,7 +511,7 @@ retry:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
major |= fault & VM_FAULT_MAJOR;
|
major |= fault & VM_FAULT_MAJOR;
|
||||||
|
|
||||||
@ -537,14 +537,9 @@ retry:
|
|||||||
/*
|
/*
|
||||||
* Major/minor page fault accounting.
|
* Major/minor page fault accounting.
|
||||||
*/
|
*/
|
||||||
if (major) {
|
if (major)
|
||||||
current->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
|
|
||||||
cmo_account_page_fault();
|
cmo_account_page_fault();
|
||||||
} else {
|
|
||||||
current->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(__do_page_fault);
|
NOKPROBE_SYMBOL(__do_page_fault);
|
||||||
|
@ -8,6 +8,8 @@
|
|||||||
#ifndef _ASM_RISCV_UACCESS_H
|
#ifndef _ASM_RISCV_UACCESS_H
|
||||||
#define _ASM_RISCV_UACCESS_H
|
#define _ASM_RISCV_UACCESS_H
|
||||||
|
|
||||||
|
#include <asm/pgtable.h> /* for TASK_SIZE */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* User space memory access functions
|
* User space memory access functions
|
||||||
*/
|
*/
|
||||||
@ -62,11 +64,9 @@ static inline void set_fs(mm_segment_t fs)
|
|||||||
current_thread_info()->addr_limit = fs;
|
current_thread_info()->addr_limit = fs;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#define user_addr_max() (get_fs().seg)
|
#define user_addr_max() (get_fs().seg)
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* access_ok: - Checks if a user space pointer is valid
|
* access_ok: - Checks if a user space pointer is valid
|
||||||
* @addr: User space pointer to start of block to check
|
* @addr: User space pointer to start of block to check
|
||||||
|
@ -109,7 +109,7 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, addr, flags);
|
fault = handle_mm_fault(vma, addr, flags, regs);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we need to retry but a fatal signal is pending, handle the
|
* If we need to retry but a fatal signal is pending, handle the
|
||||||
@ -127,21 +127,7 @@ good_area:
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting is only done on the
|
|
||||||
* initial attempt. If we go through a retry, it is extremely
|
|
||||||
* likely that the page will be found in page cache at that point.
|
|
||||||
*/
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
|
||||||
1, regs, addr);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
|
||||||
1, regs, addr);
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
#define USER_DS_SACF (3)
|
#define USER_DS_SACF (3)
|
||||||
|
|
||||||
#define get_fs() (current->thread.mm_segment)
|
#define get_fs() (current->thread.mm_segment)
|
||||||
#define segment_eq(a,b) (((a) & 2) == ((b) & 2))
|
#define uaccess_kernel() ((get_fs() & 2) == KERNEL_DS)
|
||||||
|
|
||||||
void set_fs(mm_segment_t fs);
|
void set_fs(mm_segment_t fs);
|
||||||
|
|
||||||
|
@ -2768,7 +2768,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
|
|||||||
struct page *page = NULL;
|
struct page *page = NULL;
|
||||||
|
|
||||||
mmap_read_lock(kvm->mm);
|
mmap_read_lock(kvm->mm);
|
||||||
get_user_pages_remote(NULL, kvm->mm, uaddr, 1, FOLL_WRITE,
|
get_user_pages_remote(kvm->mm, uaddr, 1, FOLL_WRITE,
|
||||||
&page, NULL, NULL);
|
&page, NULL, NULL);
|
||||||
mmap_read_unlock(kvm->mm);
|
mmap_read_unlock(kvm->mm);
|
||||||
return page;
|
return page;
|
||||||
|
@ -1892,7 +1892,7 @@ static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
|
|||||||
|
|
||||||
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
|
r = set_guest_storage_key(current->mm, hva, keys[i], 0);
|
||||||
if (r) {
|
if (r) {
|
||||||
r = fixup_user_fault(current, current->mm, hva,
|
r = fixup_user_fault(current->mm, hva,
|
||||||
FAULT_FLAG_WRITE, &unlocked);
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
if (r)
|
if (r)
|
||||||
break;
|
break;
|
||||||
|
@ -273,7 +273,7 @@ retry:
|
|||||||
rc = get_guest_storage_key(current->mm, vmaddr, &key);
|
rc = get_guest_storage_key(current->mm, vmaddr, &key);
|
||||||
|
|
||||||
if (rc) {
|
if (rc) {
|
||||||
rc = fixup_user_fault(current, current->mm, vmaddr,
|
rc = fixup_user_fault(current->mm, vmaddr,
|
||||||
FAULT_FLAG_WRITE, &unlocked);
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
if (!rc) {
|
if (!rc) {
|
||||||
mmap_read_unlock(current->mm);
|
mmap_read_unlock(current->mm);
|
||||||
@ -319,7 +319,7 @@ retry:
|
|||||||
mmap_read_lock(current->mm);
|
mmap_read_lock(current->mm);
|
||||||
rc = reset_guest_reference_bit(current->mm, vmaddr);
|
rc = reset_guest_reference_bit(current->mm, vmaddr);
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
rc = fixup_user_fault(current, current->mm, vmaddr,
|
rc = fixup_user_fault(current->mm, vmaddr,
|
||||||
FAULT_FLAG_WRITE, &unlocked);
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
if (!rc) {
|
if (!rc) {
|
||||||
mmap_read_unlock(current->mm);
|
mmap_read_unlock(current->mm);
|
||||||
@ -390,7 +390,7 @@ static int handle_sske(struct kvm_vcpu *vcpu)
|
|||||||
m3 & SSKE_MC);
|
m3 & SSKE_MC);
|
||||||
|
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
rc = fixup_user_fault(current, current->mm, vmaddr,
|
rc = fixup_user_fault(current->mm, vmaddr,
|
||||||
FAULT_FLAG_WRITE, &unlocked);
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
rc = !rc ? -EAGAIN : rc;
|
rc = !rc ? -EAGAIN : rc;
|
||||||
}
|
}
|
||||||
@ -1094,7 +1094,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
|
|||||||
rc = cond_set_guest_storage_key(current->mm, vmaddr,
|
rc = cond_set_guest_storage_key(current->mm, vmaddr,
|
||||||
key, NULL, nq, mr, mc);
|
key, NULL, nq, mr, mc);
|
||||||
if (rc < 0) {
|
if (rc < 0) {
|
||||||
rc = fixup_user_fault(current, current->mm, vmaddr,
|
rc = fixup_user_fault(current->mm, vmaddr,
|
||||||
FAULT_FLAG_WRITE, &unlocked);
|
FAULT_FLAG_WRITE, &unlocked);
|
||||||
rc = !rc ? -EAGAIN : rc;
|
rc = !rc ? -EAGAIN : rc;
|
||||||
}
|
}
|
||||||
|
@ -476,7 +476,7 @@ retry:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
if (fault_signal_pending(fault, regs)) {
|
if (fault_signal_pending(fault, regs)) {
|
||||||
fault = VM_FAULT_SIGNAL;
|
fault = VM_FAULT_SIGNAL;
|
||||||
if (flags & FAULT_FLAG_RETRY_NOWAIT)
|
if (flags & FAULT_FLAG_RETRY_NOWAIT)
|
||||||
@ -486,21 +486,7 @@ retry:
|
|||||||
if (unlikely(fault & VM_FAULT_ERROR))
|
if (unlikely(fault & VM_FAULT_ERROR))
|
||||||
goto out_up;
|
goto out_up;
|
||||||
|
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting is only done on the
|
|
||||||
* initial attempt. If we go through a retry, it is extremely
|
|
||||||
* likely that the page will be found in page cache at that point.
|
|
||||||
*/
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
|
|
||||||
regs, address);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
|
|
||||||
regs, address);
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
|
if (IS_ENABLED(CONFIG_PGSTE) && gmap &&
|
||||||
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
|
(flags & FAULT_FLAG_RETRY_NOWAIT)) {
|
||||||
|
@ -649,7 +649,7 @@ retry:
|
|||||||
rc = vmaddr;
|
rc = vmaddr;
|
||||||
goto out_up;
|
goto out_up;
|
||||||
}
|
}
|
||||||
if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags,
|
if (fixup_user_fault(gmap->mm, vmaddr, fault_flags,
|
||||||
&unlocked)) {
|
&unlocked)) {
|
||||||
rc = -EFAULT;
|
rc = -EFAULT;
|
||||||
goto out_up;
|
goto out_up;
|
||||||
@ -879,7 +879,7 @@ static int gmap_pte_op_fixup(struct gmap *gmap, unsigned long gaddr,
|
|||||||
|
|
||||||
BUG_ON(gmap_is_shadow(gmap));
|
BUG_ON(gmap_is_shadow(gmap));
|
||||||
fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
|
fault_flags = (prot == PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
|
||||||
if (fixup_user_fault(current, mm, vmaddr, fault_flags, &unlocked))
|
if (fixup_user_fault(mm, vmaddr, fault_flags, &unlocked))
|
||||||
return -EFAULT;
|
return -EFAULT;
|
||||||
if (unlocked)
|
if (unlocked)
|
||||||
/* lost mmap_lock, caller has to retry __gmap_translate */
|
/* lost mmap_lock, caller has to retry __gmap_translate */
|
||||||
|
@ -24,8 +24,7 @@ typedef struct {
|
|||||||
#define USER_DS KERNEL_DS
|
#define USER_DS KERNEL_DS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
|
|
||||||
#define get_fs() (current_thread_info()->addr_limit)
|
#define get_fs() (current_thread_info()->addr_limit)
|
||||||
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
#define set_fs(x) (current_thread_info()->addr_limit = (x))
|
||||||
|
@ -5,11 +5,9 @@
|
|||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
/*
|
/*
|
||||||
* SECTION_SIZE_BITS 2^N: how big each section will be
|
* SECTION_SIZE_BITS 2^N: how big each section will be
|
||||||
* MAX_PHYSADDR_BITS 2^N: how much physical address space we have
|
* MAX_PHYSMEM_BITS 2^N: how much physical address space we have
|
||||||
* MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
|
|
||||||
*/
|
*/
|
||||||
#define SECTION_SIZE_BITS 26
|
#define SECTION_SIZE_BITS 26
|
||||||
#define MAX_PHYSADDR_BITS 32
|
|
||||||
#define MAX_PHYSMEM_BITS 32
|
#define MAX_PHYSMEM_BITS 32
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -482,8 +482,6 @@ asmlinkage void do_address_error(struct pt_regs *regs,
|
|||||||
error_code = lookup_exception_vector();
|
error_code = lookup_exception_vector();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
oldfs = get_fs();
|
|
||||||
|
|
||||||
if (user_mode(regs)) {
|
if (user_mode(regs)) {
|
||||||
int si_code = BUS_ADRERR;
|
int si_code = BUS_ADRERR;
|
||||||
unsigned int user_action;
|
unsigned int user_action;
|
||||||
@ -491,13 +489,13 @@ asmlinkage void do_address_error(struct pt_regs *regs,
|
|||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
inc_unaligned_user_access();
|
inc_unaligned_user_access();
|
||||||
|
|
||||||
set_fs(USER_DS);
|
oldfs = force_uaccess_begin();
|
||||||
if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1),
|
if (copy_from_user(&instruction, (insn_size_t *)(regs->pc & ~1),
|
||||||
sizeof(instruction))) {
|
sizeof(instruction))) {
|
||||||
set_fs(oldfs);
|
force_uaccess_end(oldfs);
|
||||||
goto uspace_segv;
|
goto uspace_segv;
|
||||||
}
|
}
|
||||||
set_fs(oldfs);
|
force_uaccess_end(oldfs);
|
||||||
|
|
||||||
/* shout about userspace fixups */
|
/* shout about userspace fixups */
|
||||||
unaligned_fixups_notify(current, instruction, regs);
|
unaligned_fixups_notify(current, instruction, regs);
|
||||||
@ -520,11 +518,11 @@ fixup:
|
|||||||
goto uspace_segv;
|
goto uspace_segv;
|
||||||
}
|
}
|
||||||
|
|
||||||
set_fs(USER_DS);
|
oldfs = force_uaccess_begin();
|
||||||
tmp = handle_unaligned_access(instruction, regs,
|
tmp = handle_unaligned_access(instruction, regs,
|
||||||
&user_mem_access, 0,
|
&user_mem_access, 0,
|
||||||
address);
|
address);
|
||||||
set_fs(oldfs);
|
force_uaccess_end(oldfs);
|
||||||
|
|
||||||
if (tmp == 0)
|
if (tmp == 0)
|
||||||
return; /* sorted */
|
return; /* sorted */
|
||||||
|
@ -482,22 +482,13 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
|
if (unlikely(fault & (VM_FAULT_RETRY | VM_FAULT_ERROR)))
|
||||||
if (mm_fault_error(regs, error_code, address, fault))
|
if (mm_fault_error(regs, error_code, address, fault))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
|
|
||||||
regs, address);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
|
|
||||||
regs, address);
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -425,15 +425,6 @@ int arch_add_memory(int nid, u64 start, u64 size,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
|
||||||
int memory_add_physaddr_to_nid(u64 addr)
|
|
||||||
{
|
|
||||||
/* Node 0 for now.. */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void arch_remove_memory(int nid, u64 start, u64 size,
|
void arch_remove_memory(int nid, u64 start, u64 size,
|
||||||
struct vmem_altmap *altmap)
|
struct vmem_altmap *altmap)
|
||||||
{
|
{
|
||||||
|
@ -7,7 +7,6 @@
|
|||||||
#include <asm/page.h>
|
#include <asm/page.h>
|
||||||
|
|
||||||
#define SECTION_SIZE_BITS 30
|
#define SECTION_SIZE_BITS 30
|
||||||
#define MAX_PHYSADDR_BITS MAX_PHYS_ADDRESS_BITS
|
|
||||||
#define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS
|
#define MAX_PHYSMEM_BITS MAX_PHYS_ADDRESS_BITS
|
||||||
|
|
||||||
#endif /* !(__KERNEL__) */
|
#endif /* !(__KERNEL__) */
|
||||||
|
@ -28,7 +28,7 @@
|
|||||||
#define get_fs() (current->thread.current_ds)
|
#define get_fs() (current->thread.current_ds)
|
||||||
#define set_fs(val) ((current->thread.current_ds) = (val))
|
#define set_fs(val) ((current->thread.current_ds) = (val))
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
/* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test
|
/* We have there a nice not-mapped page at PAGE_OFFSET - PAGE_SIZE, so that this test
|
||||||
* can be fairly lightweight.
|
* can be fairly lightweight.
|
||||||
|
@ -32,7 +32,7 @@
|
|||||||
|
|
||||||
#define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)})
|
#define get_fs() ((mm_segment_t){(current_thread_info()->current_ds)})
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#define set_fs(val) \
|
#define set_fs(val) \
|
||||||
do { \
|
do { \
|
||||||
|
@ -234,7 +234,7 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -250,15 +250,6 @@ good_area:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
current->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
|
||||||
1, regs, address);
|
|
||||||
} else {
|
|
||||||
current->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
|
||||||
1, regs, address);
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
@ -410,7 +401,7 @@ good_area:
|
|||||||
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
|
if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
|
||||||
goto bad_area;
|
goto bad_area;
|
||||||
}
|
}
|
||||||
switch (handle_mm_fault(vma, address, flags)) {
|
switch (handle_mm_fault(vma, address, flags, NULL)) {
|
||||||
case VM_FAULT_SIGBUS:
|
case VM_FAULT_SIGBUS:
|
||||||
case VM_FAULT_OOM:
|
case VM_FAULT_OOM:
|
||||||
goto do_sigbus;
|
goto do_sigbus;
|
||||||
|
@ -422,7 +422,7 @@ good_area:
|
|||||||
goto bad_area;
|
goto bad_area;
|
||||||
}
|
}
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
goto exit_exception;
|
goto exit_exception;
|
||||||
@ -438,15 +438,6 @@ good_area:
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR) {
|
|
||||||
current->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ,
|
|
||||||
1, regs, address);
|
|
||||||
} else {
|
|
||||||
current->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN,
|
|
||||||
1, regs, address);
|
|
||||||
}
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -71,7 +71,7 @@ good_area:
|
|||||||
do {
|
do {
|
||||||
vm_fault_t fault;
|
vm_fault_t fault;
|
||||||
|
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, NULL);
|
||||||
|
|
||||||
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
|
||||||
goto out_nosemaphore;
|
goto out_nosemaphore;
|
||||||
@ -88,10 +88,6 @@ good_area:
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ static inline void set_fs(mm_segment_t fs)
|
|||||||
set_thread_flag(TIF_FSCHECK);
|
set_thread_flag(TIF_FSCHECK);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
#define user_addr_max() (current->thread.addr_limit.seg)
|
#define user_addr_max() (current->thread.addr_limit.seg)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1139,7 +1139,7 @@ void do_user_addr_fault(struct pt_regs *regs,
|
|||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
struct task_struct *tsk;
|
struct task_struct *tsk;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
vm_fault_t fault, major = 0;
|
vm_fault_t fault;
|
||||||
unsigned int flags = FAULT_FLAG_DEFAULT;
|
unsigned int flags = FAULT_FLAG_DEFAULT;
|
||||||
|
|
||||||
tsk = current;
|
tsk = current;
|
||||||
@ -1291,8 +1291,7 @@ good_area:
|
|||||||
* userland). The return to userland is identified whenever
|
* userland). The return to userland is identified whenever
|
||||||
* FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
|
* FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
major |= fault & VM_FAULT_MAJOR;
|
|
||||||
|
|
||||||
/* Quick path to respond to signals */
|
/* Quick path to respond to signals */
|
||||||
if (fault_signal_pending(fault, regs)) {
|
if (fault_signal_pending(fault, regs)) {
|
||||||
@ -1319,18 +1318,6 @@ good_area:
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Major/minor page fault accounting. If any of the events
|
|
||||||
* returned VM_FAULT_MAJOR, we account it as a major fault.
|
|
||||||
*/
|
|
||||||
if (major) {
|
|
||||||
tsk->maj_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
|
|
||||||
} else {
|
|
||||||
tsk->min_flt++;
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
|
|
||||||
}
|
|
||||||
|
|
||||||
check_v8086_mode(regs, address, tsk);
|
check_v8086_mode(regs, address, tsk);
|
||||||
}
|
}
|
||||||
NOKPROBE_SYMBOL(do_user_addr_fault);
|
NOKPROBE_SYMBOL(do_user_addr_fault);
|
||||||
|
@ -1452,6 +1452,15 @@ static unsigned long probe_memory_block_size(void)
|
|||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use max block size to minimize overhead on bare metal, where
|
||||||
|
* alignment for memory hotplug isn't a concern.
|
||||||
|
*/
|
||||||
|
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
|
||||||
|
bz = MAX_BLOCK_SIZE;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
/* Find the largest allowed block size that aligns to memory end */
|
/* Find the largest allowed block size that aligns to memory end */
|
||||||
for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
|
for (bz = MAX_BLOCK_SIZE; bz > MIN_MEMORY_BLOCK_SIZE; bz >>= 1) {
|
||||||
if (IS_ALIGNED(boot_mem_end, bz))
|
if (IS_ALIGNED(boot_mem_end, bz))
|
||||||
|
@ -929,5 +929,4 @@ int memory_add_physaddr_to_nid(u64 start)
|
|||||||
nid = numa_meminfo.blk[0].nid;
|
nid = numa_meminfo.blk[0].nid;
|
||||||
return nid;
|
return nid;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -35,7 +35,7 @@
|
|||||||
#define get_fs() (current->thread.current_ds)
|
#define get_fs() (current->thread.current_ds)
|
||||||
#define set_fs(val) (current->thread.current_ds = (val))
|
#define set_fs(val) (current->thread.current_ds = (val))
|
||||||
|
|
||||||
#define segment_eq(a, b) ((a).seg == (b).seg)
|
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
|
||||||
|
|
||||||
#define __kernel_ok (uaccess_kernel())
|
#define __kernel_ok (uaccess_kernel())
|
||||||
#define __user_ok(addr, size) \
|
#define __user_ok(addr, size) \
|
||||||
|
@ -72,6 +72,9 @@ void do_page_fault(struct pt_regs *regs)
|
|||||||
|
|
||||||
if (user_mode(regs))
|
if (user_mode(regs))
|
||||||
flags |= FAULT_FLAG_USER;
|
flags |= FAULT_FLAG_USER;
|
||||||
|
|
||||||
|
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||||
|
|
||||||
retry:
|
retry:
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
vma = find_vma(mm, address);
|
vma = find_vma(mm, address);
|
||||||
@ -107,7 +110,7 @@ good_area:
|
|||||||
* make sure we exit gracefully rather than endlessly redo
|
* make sure we exit gracefully rather than endlessly redo
|
||||||
* the fault.
|
* the fault.
|
||||||
*/
|
*/
|
||||||
fault = handle_mm_fault(vma, address, flags);
|
fault = handle_mm_fault(vma, address, flags, regs);
|
||||||
|
|
||||||
if (fault_signal_pending(fault, regs))
|
if (fault_signal_pending(fault, regs))
|
||||||
return;
|
return;
|
||||||
@ -122,10 +125,6 @@ good_area:
|
|||||||
BUG();
|
BUG();
|
||||||
}
|
}
|
||||||
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
if (flags & FAULT_FLAG_ALLOW_RETRY) {
|
||||||
if (fault & VM_FAULT_MAJOR)
|
|
||||||
current->maj_flt++;
|
|
||||||
else
|
|
||||||
current->min_flt++;
|
|
||||||
if (fault & VM_FAULT_RETRY) {
|
if (fault & VM_FAULT_RETRY) {
|
||||||
flags |= FAULT_FLAG_TRIED;
|
flags |= FAULT_FLAG_TRIED;
|
||||||
|
|
||||||
@ -139,12 +138,6 @@ good_area:
|
|||||||
}
|
}
|
||||||
|
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
|
||||||
if (flags & VM_FAULT_MAJOR)
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs, address);
|
|
||||||
else
|
|
||||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs, address);
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Something tried to access memory that isn't in our memory map..
|
/* Something tried to access memory that isn't in our memory map..
|
||||||
|
@ -1136,15 +1136,14 @@ int sdei_event_handler(struct pt_regs *regs,
|
|||||||
* access kernel memory.
|
* access kernel memory.
|
||||||
* Do the same here because this doesn't come via the same entry code.
|
* Do the same here because this doesn't come via the same entry code.
|
||||||
*/
|
*/
|
||||||
orig_addr_limit = get_fs();
|
orig_addr_limit = force_uaccess_begin();
|
||||||
set_fs(USER_DS);
|
|
||||||
|
|
||||||
err = arg->callback(event_num, regs, arg->callback_arg);
|
err = arg->callback(event_num, regs, arg->callback_arg);
|
||||||
if (err)
|
if (err)
|
||||||
pr_err_ratelimited("event %u on CPU %u failed with error: %d\n",
|
pr_err_ratelimited("event %u on CPU %u failed with error: %d\n",
|
||||||
event_num, smp_processor_id(), err);
|
event_num, smp_processor_id(), err);
|
||||||
|
|
||||||
set_fs(orig_addr_limit);
|
force_uaccess_end(orig_addr_limit);
|
||||||
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -469,7 +469,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
|
|||||||
locked = 1;
|
locked = 1;
|
||||||
}
|
}
|
||||||
ret = pin_user_pages_remote
|
ret = pin_user_pages_remote
|
||||||
(work->task, mm,
|
(mm,
|
||||||
obj->userptr.ptr + pinned * PAGE_SIZE,
|
obj->userptr.ptr + pinned * PAGE_SIZE,
|
||||||
npages - pinned,
|
npages - pinned,
|
||||||
flags,
|
flags,
|
||||||
|
@ -439,7 +439,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem_odp *umem_odp, u64 user_virt,
|
|||||||
* complex (and doesn't gain us much performance in most use
|
* complex (and doesn't gain us much performance in most use
|
||||||
* cases).
|
* cases).
|
||||||
*/
|
*/
|
||||||
npages = get_user_pages_remote(owning_process, owning_mm,
|
npages = get_user_pages_remote(owning_mm,
|
||||||
user_virt, gup_num_pages,
|
user_virt, gup_num_pages,
|
||||||
flags, local_page_list, NULL, NULL);
|
flags, local_page_list, NULL, NULL);
|
||||||
mmap_read_unlock(owning_mm);
|
mmap_read_unlock(owning_mm);
|
||||||
|
@ -495,7 +495,7 @@ static void do_fault(struct work_struct *work)
|
|||||||
if (access_error(vma, fault))
|
if (access_error(vma, fault))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
ret = handle_mm_fault(vma, address, flags);
|
ret = handle_mm_fault(vma, address, flags, NULL);
|
||||||
out:
|
out:
|
||||||
mmap_read_unlock(mm);
|
mmap_read_unlock(mm);
|
||||||
|
|
||||||
|
@ -962,7 +962,8 @@ static irqreturn_t prq_event_thread(int irq, void *d)
|
|||||||
goto invalid;
|
goto invalid;
|
||||||
|
|
||||||
ret = handle_mm_fault(vma, address,
|
ret = handle_mm_fault(vma, address,
|
||||||
req->wr_req ? FAULT_FLAG_WRITE : 0);
|
req->wr_req ? FAULT_FLAG_WRITE : 0,
|
||||||
|
NULL);
|
||||||
if (ret & VM_FAULT_ERROR)
|
if (ret & VM_FAULT_ERROR)
|
||||||
goto invalid;
|
goto invalid;
|
||||||
|
|
||||||
|
@ -981,7 +981,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg)
|
|||||||
|
|
||||||
if (unlikely(copy_from_user(transfer,
|
if (unlikely(copy_from_user(transfer,
|
||||||
(void __user *)(uintptr_t)transaction.block,
|
(void __user *)(uintptr_t)transaction.block,
|
||||||
transaction.count * sizeof(*transfer)))) {
|
array_size(sizeof(*transfer), transaction.count)))) {
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
goto out_free;
|
goto out_free;
|
||||||
}
|
}
|
||||||
@ -994,7 +994,7 @@ static int rio_mport_transfer_ioctl(struct file *filp, void __user *arg)
|
|||||||
|
|
||||||
if (unlikely(copy_to_user((void __user *)(uintptr_t)transaction.block,
|
if (unlikely(copy_to_user((void __user *)(uintptr_t)transaction.block,
|
||||||
transfer,
|
transfer,
|
||||||
transaction.count * sizeof(*transfer))))
|
array_size(sizeof(*transfer), transaction.count))))
|
||||||
ret = -EFAULT;
|
ret = -EFAULT;
|
||||||
|
|
||||||
out_free:
|
out_free:
|
||||||
@ -1710,8 +1710,7 @@ static int rio_mport_add_riodev(struct mport_cdev_priv *priv,
|
|||||||
if (rval & RIO_PEF_SWITCH) {
|
if (rval & RIO_PEF_SWITCH) {
|
||||||
rio_mport_read_config_32(mport, destid, hopcount,
|
rio_mport_read_config_32(mport, destid, hopcount,
|
||||||
RIO_SWP_INFO_CAR, &swpinfo);
|
RIO_SWP_INFO_CAR, &swpinfo);
|
||||||
size += (RIO_GET_TOTAL_PORTS(swpinfo) *
|
size += struct_size(rswitch, nextdev, RIO_GET_TOTAL_PORTS(swpinfo));
|
||||||
sizeof(rswitch->nextdev[0])) + sizeof(*rswitch);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rdev = kzalloc(size, GFP_KERNEL);
|
rdev = kzalloc(size, GFP_KERNEL);
|
||||||
|
@ -330,7 +330,7 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
|
|||||||
size_t size;
|
size_t size;
|
||||||
u32 swpinfo = 0;
|
u32 swpinfo = 0;
|
||||||
|
|
||||||
size = sizeof(struct rio_dev);
|
size = sizeof(*rdev);
|
||||||
if (rio_mport_read_config_32(port, destid, hopcount,
|
if (rio_mport_read_config_32(port, destid, hopcount,
|
||||||
RIO_PEF_CAR, &result))
|
RIO_PEF_CAR, &result))
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -338,10 +338,8 @@ static struct rio_dev *rio_setup_device(struct rio_net *net,
|
|||||||
if (result & (RIO_PEF_SWITCH | RIO_PEF_MULTIPORT)) {
|
if (result & (RIO_PEF_SWITCH | RIO_PEF_MULTIPORT)) {
|
||||||
rio_mport_read_config_32(port, destid, hopcount,
|
rio_mport_read_config_32(port, destid, hopcount,
|
||||||
RIO_SWP_INFO_CAR, &swpinfo);
|
RIO_SWP_INFO_CAR, &swpinfo);
|
||||||
if (result & RIO_PEF_SWITCH) {
|
if (result & RIO_PEF_SWITCH)
|
||||||
size += (RIO_GET_TOTAL_PORTS(swpinfo) *
|
size += struct_size(rswitch, nextdev, RIO_GET_TOTAL_PORTS(swpinfo));
|
||||||
sizeof(rswitch->nextdev[0])) + sizeof(*rswitch);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rdev = kzalloc(size, GFP_KERNEL);
|
rdev = kzalloc(size, GFP_KERNEL);
|
||||||
|
@ -425,7 +425,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
|
|||||||
if (ret) {
|
if (ret) {
|
||||||
bool unlocked = false;
|
bool unlocked = false;
|
||||||
|
|
||||||
ret = fixup_user_fault(NULL, mm, vaddr,
|
ret = fixup_user_fault(mm, vaddr,
|
||||||
FAULT_FLAG_REMOTE |
|
FAULT_FLAG_REMOTE |
|
||||||
(write_fault ? FAULT_FLAG_WRITE : 0),
|
(write_fault ? FAULT_FLAG_WRITE : 0),
|
||||||
&unlocked);
|
&unlocked);
|
||||||
@ -453,7 +453,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
|
|||||||
flags |= FOLL_WRITE;
|
flags |= FOLL_WRITE;
|
||||||
|
|
||||||
mmap_read_lock(mm);
|
mmap_read_lock(mm);
|
||||||
ret = pin_user_pages_remote(NULL, mm, vaddr, 1, flags | FOLL_LONGTERM,
|
ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM,
|
||||||
page, NULL, NULL);
|
page, NULL, NULL);
|
||||||
if (ret == 1) {
|
if (ret == 1) {
|
||||||
*pfn = page_to_pfn(page[0]);
|
*pfn = page_to_pfn(page[0]);
|
||||||
|
@ -153,10 +153,10 @@ int cn_esc_printf(struct core_name *cn, const char *fmt, ...)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cn_print_exe_file(struct core_name *cn)
|
static int cn_print_exe_file(struct core_name *cn, bool name_only)
|
||||||
{
|
{
|
||||||
struct file *exe_file;
|
struct file *exe_file;
|
||||||
char *pathbuf, *path;
|
char *pathbuf, *path, *ptr;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
exe_file = get_mm_exe_file(current->mm);
|
exe_file = get_mm_exe_file(current->mm);
|
||||||
@ -175,6 +175,11 @@ static int cn_print_exe_file(struct core_name *cn)
|
|||||||
goto free_buf;
|
goto free_buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (name_only) {
|
||||||
|
ptr = strrchr(path, '/');
|
||||||
|
if (ptr)
|
||||||
|
path = ptr + 1;
|
||||||
|
}
|
||||||
ret = cn_esc_printf(cn, "%s", path);
|
ret = cn_esc_printf(cn, "%s", path);
|
||||||
|
|
||||||
free_buf:
|
free_buf:
|
||||||
@ -301,12 +306,16 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm,
|
|||||||
utsname()->nodename);
|
utsname()->nodename);
|
||||||
up_read(&uts_sem);
|
up_read(&uts_sem);
|
||||||
break;
|
break;
|
||||||
/* executable */
|
/* executable, could be changed by prctl PR_SET_NAME etc */
|
||||||
case 'e':
|
case 'e':
|
||||||
err = cn_esc_printf(cn, "%s", current->comm);
|
err = cn_esc_printf(cn, "%s", current->comm);
|
||||||
break;
|
break;
|
||||||
|
/* file name of executable */
|
||||||
|
case 'f':
|
||||||
|
err = cn_print_exe_file(cn, true);
|
||||||
|
break;
|
||||||
case 'E':
|
case 'E':
|
||||||
err = cn_print_exe_file(cn);
|
err = cn_print_exe_file(cn, false);
|
||||||
break;
|
break;
|
||||||
/* core limit size */
|
/* core limit size */
|
||||||
case 'c':
|
case 'c':
|
||||||
|
32
fs/exec.c
32
fs/exec.c
@ -141,12 +141,14 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
|
|||||||
if (IS_ERR(file))
|
if (IS_ERR(file))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
error = -EINVAL;
|
/*
|
||||||
if (!S_ISREG(file_inode(file)->i_mode))
|
* may_open() has already checked for this, so it should be
|
||||||
goto exit;
|
* impossible to trip now. But we need to be extra cautious
|
||||||
|
* and check again at the very end too.
|
||||||
|
*/
|
||||||
error = -EACCES;
|
error = -EACCES;
|
||||||
if (path_noexec(&file->f_path))
|
if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
|
||||||
|
path_noexec(&file->f_path)))
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
fsnotify_open(file);
|
fsnotify_open(file);
|
||||||
@ -215,7 +217,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
|
|||||||
* We are doing an exec(). 'current' is the process
|
* We are doing an exec(). 'current' is the process
|
||||||
* doing the exec and bprm->mm is the new process's mm.
|
* doing the exec and bprm->mm is the new process's mm.
|
||||||
*/
|
*/
|
||||||
ret = get_user_pages_remote(current, bprm->mm, pos, 1, gup_flags,
|
ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
|
||||||
&page, NULL, NULL);
|
&page, NULL, NULL);
|
||||||
if (ret <= 0)
|
if (ret <= 0)
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -909,11 +911,14 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
|
|||||||
if (IS_ERR(file))
|
if (IS_ERR(file))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* may_open() has already checked for this, so it should be
|
||||||
|
* impossible to trip now. But we need to be extra cautious
|
||||||
|
* and check again at the very end too.
|
||||||
|
*/
|
||||||
err = -EACCES;
|
err = -EACCES;
|
||||||
if (!S_ISREG(file_inode(file)->i_mode))
|
if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
|
||||||
goto exit;
|
path_noexec(&file->f_path)))
|
||||||
|
|
||||||
if (path_noexec(&file->f_path))
|
|
||||||
goto exit;
|
goto exit;
|
||||||
|
|
||||||
err = deny_write_access(file);
|
err = deny_write_access(file);
|
||||||
@ -1402,7 +1407,12 @@ int begin_new_exec(struct linux_binprm * bprm)
|
|||||||
if (retval)
|
if (retval)
|
||||||
goto out_unlock;
|
goto out_unlock;
|
||||||
|
|
||||||
set_fs(USER_DS);
|
/*
|
||||||
|
* Ensure that the uaccess routines can actually operate on userspace
|
||||||
|
* pointers:
|
||||||
|
*/
|
||||||
|
force_uaccess_begin();
|
||||||
|
|
||||||
me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
|
me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_KTHREAD |
|
||||||
PF_NOFREEZE | PF_NO_SETAFFINITY);
|
PF_NOFREEZE | PF_NO_SETAFFINITY);
|
||||||
flush_thread();
|
flush_thread();
|
||||||
|
@ -41,7 +41,7 @@ config MSDOS_FS
|
|||||||
they are compressed; to access compressed MSDOS partitions under
|
they are compressed; to access compressed MSDOS partitions under
|
||||||
Linux, you can either use the DOS emulator DOSEMU, described in the
|
Linux, you can either use the DOS emulator DOSEMU, described in the
|
||||||
DOSEMU-HOWTO, available from
|
DOSEMU-HOWTO, available from
|
||||||
<http://www.tldp.org/docs.html#howto>, or try dmsdosfs in
|
<https://www.tldp.org/docs.html#howto>, or try dmsdosfs in
|
||||||
<ftp://ibiblio.org/pub/Linux/system/filesystems/dosfs/>. If you
|
<ftp://ibiblio.org/pub/Linux/system/filesystems/dosfs/>. If you
|
||||||
intend to use dosemu with a non-compressed MSDOS partition, say Y
|
intend to use dosemu with a non-compressed MSDOS partition, say Y
|
||||||
here) and MSDOS floppies. This means that file access becomes
|
here) and MSDOS floppies. This means that file access becomes
|
||||||
|
@ -657,6 +657,9 @@ static void fat_ra_init(struct super_block *sb, struct fatent_ra *ra,
|
|||||||
unsigned long ra_pages = sb->s_bdi->ra_pages;
|
unsigned long ra_pages = sb->s_bdi->ra_pages;
|
||||||
unsigned int reada_blocks;
|
unsigned int reada_blocks;
|
||||||
|
|
||||||
|
if (fatent->entry >= ent_limit)
|
||||||
|
return;
|
||||||
|
|
||||||
if (ra_pages > sb->s_bdi->io_pages)
|
if (ra_pages > sb->s_bdi->io_pages)
|
||||||
ra_pages = rounddown(ra_pages, sb->s_bdi->io_pages);
|
ra_pages = rounddown(ra_pages, sb->s_bdi->io_pages);
|
||||||
reada_blocks = ra_pages << (PAGE_SHIFT - sb->s_blocksize_bits + 1);
|
reada_blocks = ra_pages << (PAGE_SHIFT - sb->s_blocksize_bits + 1);
|
||||||
|
@ -25,9 +25,9 @@ static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
|
|||||||
{
|
{
|
||||||
u32 attr;
|
u32 attr;
|
||||||
|
|
||||||
inode_lock(inode);
|
inode_lock_shared(inode);
|
||||||
attr = fat_make_attrs(inode);
|
attr = fat_make_attrs(inode);
|
||||||
inode_unlock(inode);
|
inode_unlock_shared(inode);
|
||||||
|
|
||||||
return put_user(attr, user_attr);
|
return put_user(attr, user_attr);
|
||||||
}
|
}
|
||||||
|
@ -1364,6 +1364,12 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
|
|||||||
sb->s_magic = HUGETLBFS_MAGIC;
|
sb->s_magic = HUGETLBFS_MAGIC;
|
||||||
sb->s_op = &hugetlbfs_ops;
|
sb->s_op = &hugetlbfs_ops;
|
||||||
sb->s_time_gran = 1;
|
sb->s_time_gran = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Due to the special and limited functionality of hugetlbfs, it does
|
||||||
|
* not work well as a stacking filesystem.
|
||||||
|
*/
|
||||||
|
sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
|
||||||
sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
|
sb->s_root = d_make_root(hugetlbfs_get_root(sb, ctx));
|
||||||
if (!sb->s_root)
|
if (!sb->s_root)
|
||||||
goto out_free;
|
goto out_free;
|
||||||
|
@ -150,6 +150,25 @@ static int minix_remount (struct super_block * sb, int * flags, char * data)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool minix_check_superblock(struct super_block *sb)
|
||||||
|
{
|
||||||
|
struct minix_sb_info *sbi = minix_sb(sb);
|
||||||
|
|
||||||
|
if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* s_max_size must not exceed the block mapping limitation. This check
|
||||||
|
* is only needed for V1 filesystems, since V2/V3 support an extra level
|
||||||
|
* of indirect blocks which places the limit well above U32_MAX.
|
||||||
|
*/
|
||||||
|
if (sbi->s_version == MINIX_V1 &&
|
||||||
|
sb->s_maxbytes > (7 + 512 + 512*512) * BLOCK_SIZE)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static int minix_fill_super(struct super_block *s, void *data, int silent)
|
static int minix_fill_super(struct super_block *s, void *data, int silent)
|
||||||
{
|
{
|
||||||
struct buffer_head *bh;
|
struct buffer_head *bh;
|
||||||
@ -185,7 +204,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
|
|||||||
sbi->s_zmap_blocks = ms->s_zmap_blocks;
|
sbi->s_zmap_blocks = ms->s_zmap_blocks;
|
||||||
sbi->s_firstdatazone = ms->s_firstdatazone;
|
sbi->s_firstdatazone = ms->s_firstdatazone;
|
||||||
sbi->s_log_zone_size = ms->s_log_zone_size;
|
sbi->s_log_zone_size = ms->s_log_zone_size;
|
||||||
sbi->s_max_size = ms->s_max_size;
|
s->s_maxbytes = ms->s_max_size;
|
||||||
s->s_magic = ms->s_magic;
|
s->s_magic = ms->s_magic;
|
||||||
if (s->s_magic == MINIX_SUPER_MAGIC) {
|
if (s->s_magic == MINIX_SUPER_MAGIC) {
|
||||||
sbi->s_version = MINIX_V1;
|
sbi->s_version = MINIX_V1;
|
||||||
@ -216,7 +235,7 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
|
|||||||
sbi->s_zmap_blocks = m3s->s_zmap_blocks;
|
sbi->s_zmap_blocks = m3s->s_zmap_blocks;
|
||||||
sbi->s_firstdatazone = m3s->s_firstdatazone;
|
sbi->s_firstdatazone = m3s->s_firstdatazone;
|
||||||
sbi->s_log_zone_size = m3s->s_log_zone_size;
|
sbi->s_log_zone_size = m3s->s_log_zone_size;
|
||||||
sbi->s_max_size = m3s->s_max_size;
|
s->s_maxbytes = m3s->s_max_size;
|
||||||
sbi->s_ninodes = m3s->s_ninodes;
|
sbi->s_ninodes = m3s->s_ninodes;
|
||||||
sbi->s_nzones = m3s->s_zones;
|
sbi->s_nzones = m3s->s_zones;
|
||||||
sbi->s_dirsize = 64;
|
sbi->s_dirsize = 64;
|
||||||
@ -228,11 +247,12 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
|
|||||||
} else
|
} else
|
||||||
goto out_no_fs;
|
goto out_no_fs;
|
||||||
|
|
||||||
|
if (!minix_check_superblock(s))
|
||||||
|
goto out_illegal_sb;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Allocate the buffer map to keep the superblock small.
|
* Allocate the buffer map to keep the superblock small.
|
||||||
*/
|
*/
|
||||||
if (sbi->s_imap_blocks == 0 || sbi->s_zmap_blocks == 0)
|
|
||||||
goto out_illegal_sb;
|
|
||||||
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
|
i = (sbi->s_imap_blocks + sbi->s_zmap_blocks) * sizeof(bh);
|
||||||
map = kzalloc(i, GFP_KERNEL);
|
map = kzalloc(i, GFP_KERNEL);
|
||||||
if (!map)
|
if (!map)
|
||||||
@ -468,6 +488,13 @@ static struct inode *V1_minix_iget(struct inode *inode)
|
|||||||
iget_failed(inode);
|
iget_failed(inode);
|
||||||
return ERR_PTR(-EIO);
|
return ERR_PTR(-EIO);
|
||||||
}
|
}
|
||||||
|
if (raw_inode->i_nlinks == 0) {
|
||||||
|
printk("MINIX-fs: deleted inode referenced: %lu\n",
|
||||||
|
inode->i_ino);
|
||||||
|
brelse(bh);
|
||||||
|
iget_failed(inode);
|
||||||
|
return ERR_PTR(-ESTALE);
|
||||||
|
}
|
||||||
inode->i_mode = raw_inode->i_mode;
|
inode->i_mode = raw_inode->i_mode;
|
||||||
i_uid_write(inode, raw_inode->i_uid);
|
i_uid_write(inode, raw_inode->i_uid);
|
||||||
i_gid_write(inode, raw_inode->i_gid);
|
i_gid_write(inode, raw_inode->i_gid);
|
||||||
@ -501,6 +528,13 @@ static struct inode *V2_minix_iget(struct inode *inode)
|
|||||||
iget_failed(inode);
|
iget_failed(inode);
|
||||||
return ERR_PTR(-EIO);
|
return ERR_PTR(-EIO);
|
||||||
}
|
}
|
||||||
|
if (raw_inode->i_nlinks == 0) {
|
||||||
|
printk("MINIX-fs: deleted inode referenced: %lu\n",
|
||||||
|
inode->i_ino);
|
||||||
|
brelse(bh);
|
||||||
|
iget_failed(inode);
|
||||||
|
return ERR_PTR(-ESTALE);
|
||||||
|
}
|
||||||
inode->i_mode = raw_inode->i_mode;
|
inode->i_mode = raw_inode->i_mode;
|
||||||
i_uid_write(inode, raw_inode->i_uid);
|
i_uid_write(inode, raw_inode->i_uid);
|
||||||
i_gid_write(inode, raw_inode->i_gid);
|
i_gid_write(inode, raw_inode->i_gid);
|
||||||
|
@ -75,6 +75,7 @@ static int alloc_branch(struct inode *inode,
|
|||||||
int n = 0;
|
int n = 0;
|
||||||
int i;
|
int i;
|
||||||
int parent = minix_new_block(inode);
|
int parent = minix_new_block(inode);
|
||||||
|
int err = -ENOSPC;
|
||||||
|
|
||||||
branch[0].key = cpu_to_block(parent);
|
branch[0].key = cpu_to_block(parent);
|
||||||
if (parent) for (n = 1; n < num; n++) {
|
if (parent) for (n = 1; n < num; n++) {
|
||||||
@ -85,6 +86,11 @@ static int alloc_branch(struct inode *inode,
|
|||||||
break;
|
break;
|
||||||
branch[n].key = cpu_to_block(nr);
|
branch[n].key = cpu_to_block(nr);
|
||||||
bh = sb_getblk(inode->i_sb, parent);
|
bh = sb_getblk(inode->i_sb, parent);
|
||||||
|
if (!bh) {
|
||||||
|
minix_free_block(inode, nr);
|
||||||
|
err = -ENOMEM;
|
||||||
|
break;
|
||||||
|
}
|
||||||
lock_buffer(bh);
|
lock_buffer(bh);
|
||||||
memset(bh->b_data, 0, bh->b_size);
|
memset(bh->b_data, 0, bh->b_size);
|
||||||
branch[n].bh = bh;
|
branch[n].bh = bh;
|
||||||
@ -103,7 +109,7 @@ static int alloc_branch(struct inode *inode,
|
|||||||
bforget(branch[i].bh);
|
bforget(branch[i].bh);
|
||||||
for (i = 0; i < n; i++)
|
for (i = 0; i < n; i++)
|
||||||
minix_free_block(inode, block_to_cpu(branch[i].key));
|
minix_free_block(inode, block_to_cpu(branch[i].key));
|
||||||
return -ENOSPC;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int splice_branch(struct inode *inode,
|
static inline int splice_branch(struct inode *inode,
|
||||||
|
@ -29,12 +29,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
|
|||||||
if (block < 0) {
|
if (block < 0) {
|
||||||
printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n",
|
printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n",
|
||||||
block, inode->i_sb->s_bdev);
|
block, inode->i_sb->s_bdev);
|
||||||
} else if (block >= (minix_sb(inode->i_sb)->s_max_size/BLOCK_SIZE)) {
|
return 0;
|
||||||
if (printk_ratelimit())
|
}
|
||||||
printk("MINIX-fs: block_to_path: "
|
if ((u64)block * BLOCK_SIZE >= inode->i_sb->s_maxbytes)
|
||||||
"block %ld too big on dev %pg\n",
|
return 0;
|
||||||
block, inode->i_sb->s_bdev);
|
|
||||||
} else if (block < 7) {
|
if (block < 7) {
|
||||||
offsets[n++] = block;
|
offsets[n++] = block;
|
||||||
} else if ((block -= 7) < 512) {
|
} else if ((block -= 7) < 512) {
|
||||||
offsets[n++] = 7;
|
offsets[n++] = 7;
|
||||||
|
@ -32,13 +32,12 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
|
|||||||
if (block < 0) {
|
if (block < 0) {
|
||||||
printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n",
|
printk("MINIX-fs: block_to_path: block %ld < 0 on dev %pg\n",
|
||||||
block, sb->s_bdev);
|
block, sb->s_bdev);
|
||||||
} else if ((u64)block * (u64)sb->s_blocksize >=
|
return 0;
|
||||||
minix_sb(sb)->s_max_size) {
|
}
|
||||||
if (printk_ratelimit())
|
if ((u64)block * (u64)sb->s_blocksize >= sb->s_maxbytes)
|
||||||
printk("MINIX-fs: block_to_path: "
|
return 0;
|
||||||
"block %ld too big on dev %pg\n",
|
|
||||||
block, sb->s_bdev);
|
if (block < DIRCOUNT) {
|
||||||
} else if (block < DIRCOUNT) {
|
|
||||||
offsets[n++] = block;
|
offsets[n++] = block;
|
||||||
} else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) {
|
} else if ((block -= DIRCOUNT) < INDIRCOUNT(sb)) {
|
||||||
offsets[n++] = DIRCOUNT;
|
offsets[n++] = DIRCOUNT;
|
||||||
|
@ -32,7 +32,6 @@ struct minix_sb_info {
|
|||||||
unsigned long s_zmap_blocks;
|
unsigned long s_zmap_blocks;
|
||||||
unsigned long s_firstdatazone;
|
unsigned long s_firstdatazone;
|
||||||
unsigned long s_log_zone_size;
|
unsigned long s_log_zone_size;
|
||||||
unsigned long s_max_size;
|
|
||||||
int s_dirsize;
|
int s_dirsize;
|
||||||
int s_namelen;
|
int s_namelen;
|
||||||
struct buffer_head ** s_imap;
|
struct buffer_head ** s_imap;
|
||||||
|
10
fs/namei.c
10
fs/namei.c
@ -2849,18 +2849,24 @@ static int may_open(const struct path *path, int acc_mode, int flag)
|
|||||||
case S_IFLNK:
|
case S_IFLNK:
|
||||||
return -ELOOP;
|
return -ELOOP;
|
||||||
case S_IFDIR:
|
case S_IFDIR:
|
||||||
if (acc_mode & MAY_WRITE)
|
if (acc_mode & (MAY_WRITE | MAY_EXEC))
|
||||||
return -EISDIR;
|
return -EISDIR;
|
||||||
break;
|
break;
|
||||||
case S_IFBLK:
|
case S_IFBLK:
|
||||||
case S_IFCHR:
|
case S_IFCHR:
|
||||||
if (!may_open_dev(path))
|
if (!may_open_dev(path))
|
||||||
return -EACCES;
|
return -EACCES;
|
||||||
/*FALLTHRU*/
|
fallthrough;
|
||||||
case S_IFIFO:
|
case S_IFIFO:
|
||||||
case S_IFSOCK:
|
case S_IFSOCK:
|
||||||
|
if (acc_mode & MAY_EXEC)
|
||||||
|
return -EACCES;
|
||||||
flag &= ~O_TRUNC;
|
flag &= ~O_TRUNC;
|
||||||
break;
|
break;
|
||||||
|
case S_IFREG:
|
||||||
|
if ((acc_mode & MAY_EXEC) && path_noexec(path))
|
||||||
|
return -EACCES;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
error = inode_permission(inode, MAY_OPEN | acc_mode);
|
error = inode_permission(inode, MAY_OPEN | acc_mode);
|
||||||
|
@ -613,10 +613,10 @@ void nilfs_palloc_commit_free_entry(struct inode *inode,
|
|||||||
lock = nilfs_mdt_bgl_lock(inode, group);
|
lock = nilfs_mdt_bgl_lock(inode, group);
|
||||||
|
|
||||||
if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
|
if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"%s (ino=%lu): entry number %llu already freed",
|
"%s (ino=%lu): entry number %llu already freed",
|
||||||
__func__, inode->i_ino,
|
__func__, inode->i_ino,
|
||||||
(unsigned long long)req->pr_entry_nr);
|
(unsigned long long)req->pr_entry_nr);
|
||||||
else
|
else
|
||||||
nilfs_palloc_group_desc_add_entries(desc, lock, 1);
|
nilfs_palloc_group_desc_add_entries(desc, lock, 1);
|
||||||
|
|
||||||
@ -654,10 +654,10 @@ void nilfs_palloc_abort_alloc_entry(struct inode *inode,
|
|||||||
lock = nilfs_mdt_bgl_lock(inode, group);
|
lock = nilfs_mdt_bgl_lock(inode, group);
|
||||||
|
|
||||||
if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
|
if (!nilfs_clear_bit_atomic(lock, group_offset, bitmap))
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"%s (ino=%lu): entry number %llu already freed",
|
"%s (ino=%lu): entry number %llu already freed",
|
||||||
__func__, inode->i_ino,
|
__func__, inode->i_ino,
|
||||||
(unsigned long long)req->pr_entry_nr);
|
(unsigned long long)req->pr_entry_nr);
|
||||||
else
|
else
|
||||||
nilfs_palloc_group_desc_add_entries(desc, lock, 1);
|
nilfs_palloc_group_desc_add_entries(desc, lock, 1);
|
||||||
|
|
||||||
@ -763,10 +763,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
|
|||||||
do {
|
do {
|
||||||
if (!nilfs_clear_bit_atomic(lock, group_offset,
|
if (!nilfs_clear_bit_atomic(lock, group_offset,
|
||||||
bitmap)) {
|
bitmap)) {
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"%s (ino=%lu): entry number %llu already freed",
|
"%s (ino=%lu): entry number %llu already freed",
|
||||||
__func__, inode->i_ino,
|
__func__, inode->i_ino,
|
||||||
(unsigned long long)entry_nrs[j]);
|
(unsigned long long)entry_nrs[j]);
|
||||||
} else {
|
} else {
|
||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
@ -808,10 +808,10 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
|
|||||||
ret = nilfs_palloc_delete_entry_block(inode,
|
ret = nilfs_palloc_delete_entry_block(inode,
|
||||||
last_nrs[k]);
|
last_nrs[k]);
|
||||||
if (ret && ret != -ENOENT)
|
if (ret && ret != -ENOENT)
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"error %d deleting block that object (entry=%llu, ino=%lu) belongs to",
|
"error %d deleting block that object (entry=%llu, ino=%lu) belongs to",
|
||||||
ret, (unsigned long long)last_nrs[k],
|
ret, (unsigned long long)last_nrs[k],
|
||||||
inode->i_ino);
|
inode->i_ino);
|
||||||
}
|
}
|
||||||
|
|
||||||
desc_kaddr = kmap_atomic(desc_bh->b_page);
|
desc_kaddr = kmap_atomic(desc_bh->b_page);
|
||||||
@ -826,9 +826,9 @@ int nilfs_palloc_freev(struct inode *inode, __u64 *entry_nrs, size_t nitems)
|
|||||||
if (nfree == nilfs_palloc_entries_per_group(inode)) {
|
if (nfree == nilfs_palloc_entries_per_group(inode)) {
|
||||||
ret = nilfs_palloc_delete_bitmap_block(inode, group);
|
ret = nilfs_palloc_delete_bitmap_block(inode, group);
|
||||||
if (ret && ret != -ENOENT)
|
if (ret && ret != -ENOENT)
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"error %d deleting bitmap block of group=%lu, ino=%lu",
|
"error %d deleting bitmap block of group=%lu, ino=%lu",
|
||||||
ret, group, inode->i_ino);
|
ret, group, inode->i_ino);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -351,10 +351,10 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
|
|||||||
(flags & NILFS_BTREE_NODE_ROOT) ||
|
(flags & NILFS_BTREE_NODE_ROOT) ||
|
||||||
nchildren < 0 ||
|
nchildren < 0 ||
|
||||||
nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
|
nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) {
|
||||||
nilfs_msg(inode->i_sb, KERN_CRIT,
|
nilfs_crit(inode->i_sb,
|
||||||
"bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
|
"bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d",
|
||||||
inode->i_ino, (unsigned long long)blocknr, level,
|
inode->i_ino, (unsigned long long)blocknr, level,
|
||||||
flags, nchildren);
|
flags, nchildren);
|
||||||
ret = 1;
|
ret = 1;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -381,9 +381,9 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
|
|||||||
level >= NILFS_BTREE_LEVEL_MAX ||
|
level >= NILFS_BTREE_LEVEL_MAX ||
|
||||||
nchildren < 0 ||
|
nchildren < 0 ||
|
||||||
nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
|
nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
|
||||||
nilfs_msg(inode->i_sb, KERN_CRIT,
|
nilfs_crit(inode->i_sb,
|
||||||
"bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
|
"bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d",
|
||||||
inode->i_ino, level, flags, nchildren);
|
inode->i_ino, level, flags, nchildren);
|
||||||
ret = 1;
|
ret = 1;
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
@ -450,10 +450,10 @@ static int nilfs_btree_bad_node(const struct nilfs_bmap *btree,
|
|||||||
{
|
{
|
||||||
if (unlikely(nilfs_btree_node_get_level(node) != level)) {
|
if (unlikely(nilfs_btree_node_get_level(node) != level)) {
|
||||||
dump_stack();
|
dump_stack();
|
||||||
nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
|
nilfs_crit(btree->b_inode->i_sb,
|
||||||
"btree level mismatch (ino=%lu): %d != %d",
|
"btree level mismatch (ino=%lu): %d != %d",
|
||||||
btree->b_inode->i_ino,
|
btree->b_inode->i_ino,
|
||||||
nilfs_btree_node_get_level(node), level);
|
nilfs_btree_node_get_level(node), level);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
@ -508,7 +508,7 @@ static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr,
|
|||||||
|
|
||||||
out_no_wait:
|
out_no_wait:
|
||||||
if (!buffer_uptodate(bh)) {
|
if (!buffer_uptodate(bh)) {
|
||||||
nilfs_msg(btree->b_inode->i_sb, KERN_ERR,
|
nilfs_err(btree->b_inode->i_sb,
|
||||||
"I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
|
"I/O error reading b-tree node block (ino=%lu, blocknr=%llu)",
|
||||||
btree->b_inode->i_ino, (unsigned long long)ptr);
|
btree->b_inode->i_ino, (unsigned long long)ptr);
|
||||||
brelse(bh);
|
brelse(bh);
|
||||||
@ -2074,10 +2074,10 @@ static int nilfs_btree_propagate(struct nilfs_bmap *btree,
|
|||||||
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
|
ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0);
|
||||||
if (ret < 0) {
|
if (ret < 0) {
|
||||||
if (unlikely(ret == -ENOENT))
|
if (unlikely(ret == -ENOENT))
|
||||||
nilfs_msg(btree->b_inode->i_sb, KERN_CRIT,
|
nilfs_crit(btree->b_inode->i_sb,
|
||||||
"writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
|
"writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d",
|
||||||
btree->b_inode->i_ino,
|
btree->b_inode->i_ino,
|
||||||
(unsigned long long)key, level);
|
(unsigned long long)key, level);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2114,11 +2114,11 @@ static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree,
|
|||||||
if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
|
if (level < NILFS_BTREE_LEVEL_NODE_MIN ||
|
||||||
level >= NILFS_BTREE_LEVEL_MAX) {
|
level >= NILFS_BTREE_LEVEL_MAX) {
|
||||||
dump_stack();
|
dump_stack();
|
||||||
nilfs_msg(btree->b_inode->i_sb, KERN_WARNING,
|
nilfs_warn(btree->b_inode->i_sb,
|
||||||
"invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
|
"invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)",
|
||||||
level, (unsigned long long)key,
|
level, (unsigned long long)key,
|
||||||
btree->b_inode->i_ino,
|
btree->b_inode->i_ino,
|
||||||
(unsigned long long)bh->b_blocknr);
|
(unsigned long long)bh->b_blocknr);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -322,7 +322,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
|
|||||||
int ret, ncps, nicps, nss, count, i;
|
int ret, ncps, nicps, nss, count, i;
|
||||||
|
|
||||||
if (unlikely(start == 0 || start > end)) {
|
if (unlikely(start == 0 || start > end)) {
|
||||||
nilfs_msg(cpfile->i_sb, KERN_ERR,
|
nilfs_err(cpfile->i_sb,
|
||||||
"cannot delete checkpoints: invalid range [%llu, %llu)",
|
"cannot delete checkpoints: invalid range [%llu, %llu)",
|
||||||
(unsigned long long)start, (unsigned long long)end);
|
(unsigned long long)start, (unsigned long long)end);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -376,7 +376,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
|
|||||||
cpfile, cno);
|
cpfile, cno);
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
continue;
|
continue;
|
||||||
nilfs_msg(cpfile->i_sb, KERN_ERR,
|
nilfs_err(cpfile->i_sb,
|
||||||
"error %d deleting checkpoint block",
|
"error %d deleting checkpoint block",
|
||||||
ret);
|
ret);
|
||||||
break;
|
break;
|
||||||
@ -981,12 +981,10 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize,
|
|||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (cpsize > sb->s_blocksize) {
|
if (cpsize > sb->s_blocksize) {
|
||||||
nilfs_msg(sb, KERN_ERR,
|
nilfs_err(sb, "too large checkpoint size: %zu bytes", cpsize);
|
||||||
"too large checkpoint size: %zu bytes", cpsize);
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
} else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) {
|
} else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) {
|
||||||
nilfs_msg(sb, KERN_ERR,
|
nilfs_err(sb, "too small checkpoint size: %zu bytes", cpsize);
|
||||||
"too small checkpoint size: %zu bytes", cpsize);
|
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -340,11 +340,11 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
|
|||||||
kaddr = kmap_atomic(entry_bh->b_page);
|
kaddr = kmap_atomic(entry_bh->b_page);
|
||||||
entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
|
entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr);
|
||||||
if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
|
if (unlikely(entry->de_blocknr == cpu_to_le64(0))) {
|
||||||
nilfs_msg(dat->i_sb, KERN_CRIT,
|
nilfs_crit(dat->i_sb,
|
||||||
"%s: invalid vblocknr = %llu, [%llu, %llu)",
|
"%s: invalid vblocknr = %llu, [%llu, %llu)",
|
||||||
__func__, (unsigned long long)vblocknr,
|
__func__, (unsigned long long)vblocknr,
|
||||||
(unsigned long long)le64_to_cpu(entry->de_start),
|
(unsigned long long)le64_to_cpu(entry->de_start),
|
||||||
(unsigned long long)le64_to_cpu(entry->de_end));
|
(unsigned long long)le64_to_cpu(entry->de_end));
|
||||||
kunmap_atomic(kaddr);
|
kunmap_atomic(kaddr);
|
||||||
brelse(entry_bh);
|
brelse(entry_bh);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
@ -471,11 +471,11 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size,
|
|||||||
int err;
|
int err;
|
||||||
|
|
||||||
if (entry_size > sb->s_blocksize) {
|
if (entry_size > sb->s_blocksize) {
|
||||||
nilfs_msg(sb, KERN_ERR, "too large DAT entry size: %zu bytes",
|
nilfs_err(sb, "too large DAT entry size: %zu bytes",
|
||||||
entry_size);
|
entry_size);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
} else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) {
|
} else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) {
|
||||||
nilfs_msg(sb, KERN_ERR, "too small DAT entry size: %zu bytes",
|
nilfs_err(sb, "too small DAT entry size: %zu bytes",
|
||||||
entry_size);
|
entry_size);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
@ -328,16 +328,18 @@ static int nilfs_direct_assign(struct nilfs_bmap *bmap,
|
|||||||
|
|
||||||
key = nilfs_bmap_data_get_key(bmap, *bh);
|
key = nilfs_bmap_data_get_key(bmap, *bh);
|
||||||
if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
|
if (unlikely(key > NILFS_DIRECT_KEY_MAX)) {
|
||||||
nilfs_msg(bmap->b_inode->i_sb, KERN_CRIT,
|
nilfs_crit(bmap->b_inode->i_sb,
|
||||||
"%s (ino=%lu): invalid key: %llu", __func__,
|
"%s (ino=%lu): invalid key: %llu",
|
||||||
bmap->b_inode->i_ino, (unsigned long long)key);
|
__func__,
|
||||||
|
bmap->b_inode->i_ino, (unsigned long long)key);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
ptr = nilfs_direct_get_ptr(bmap, key);
|
ptr = nilfs_direct_get_ptr(bmap, key);
|
||||||
if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
|
if (unlikely(ptr == NILFS_BMAP_INVALID_PTR)) {
|
||||||
nilfs_msg(bmap->b_inode->i_sb, KERN_CRIT,
|
nilfs_crit(bmap->b_inode->i_sb,
|
||||||
"%s (ino=%lu): invalid pointer: %llu", __func__,
|
"%s (ino=%lu): invalid pointer: %llu",
|
||||||
bmap->b_inode->i_ino, (unsigned long long)ptr);
|
__func__,
|
||||||
|
bmap->b_inode->i_ino, (unsigned long long)ptr);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,7 +142,7 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
|
|||||||
if (!buffer_uptodate(bh)) {
|
if (!buffer_uptodate(bh)) {
|
||||||
struct inode *inode = bh->b_page->mapping->host;
|
struct inode *inode = bh->b_page->mapping->host;
|
||||||
|
|
||||||
nilfs_msg(inode->i_sb, KERN_ERR,
|
nilfs_err(inode->i_sb,
|
||||||
"I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)",
|
"I/O error reading %s block for GC (ino=%lu, vblocknr=%llu)",
|
||||||
buffer_nilfs_node(bh) ? "node" : "data",
|
buffer_nilfs_node(bh) ? "node" : "data",
|
||||||
inode->i_ino, (unsigned long long)bh->b_blocknr);
|
inode->i_ino, (unsigned long long)bh->b_blocknr);
|
||||||
|
@ -142,8 +142,8 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino,
|
|||||||
|
|
||||||
err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
|
err = nilfs_palloc_get_entry_block(ifile, ino, 0, out_bh);
|
||||||
if (unlikely(err))
|
if (unlikely(err))
|
||||||
nilfs_msg(sb, KERN_WARNING, "error %d reading inode: ino=%lu",
|
nilfs_warn(sb, "error %d reading inode: ino=%lu",
|
||||||
err, (unsigned long)ino);
|
err, (unsigned long)ino);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,10 +104,10 @@ int nilfs_get_block(struct inode *inode, sector_t blkoff,
|
|||||||
* However, the page having this block must
|
* However, the page having this block must
|
||||||
* be locked in this case.
|
* be locked in this case.
|
||||||
*/
|
*/
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
|
"%s (ino=%lu): a race condition while inserting a data block at offset=%llu",
|
||||||
__func__, inode->i_ino,
|
__func__, inode->i_ino,
|
||||||
(unsigned long long)blkoff);
|
(unsigned long long)blkoff);
|
||||||
err = 0;
|
err = 0;
|
||||||
}
|
}
|
||||||
nilfs_transaction_abort(inode->i_sb);
|
nilfs_transaction_abort(inode->i_sb);
|
||||||
@ -388,7 +388,8 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
|
|||||||
|
|
||||||
failed_after_creation:
|
failed_after_creation:
|
||||||
clear_nlink(inode);
|
clear_nlink(inode);
|
||||||
unlock_new_inode(inode);
|
if (inode->i_state & I_NEW)
|
||||||
|
unlock_new_inode(inode);
|
||||||
iput(inode); /*
|
iput(inode); /*
|
||||||
* raw_inode will be deleted through
|
* raw_inode will be deleted through
|
||||||
* nilfs_evict_inode().
|
* nilfs_evict_inode().
|
||||||
@ -706,9 +707,8 @@ repeat:
|
|||||||
goto repeat;
|
goto repeat;
|
||||||
|
|
||||||
failed:
|
failed:
|
||||||
nilfs_msg(ii->vfs_inode.i_sb, KERN_WARNING,
|
nilfs_warn(ii->vfs_inode.i_sb, "error %d truncating bmap (ino=%lu)",
|
||||||
"error %d truncating bmap (ino=%lu)", ret,
|
ret, ii->vfs_inode.i_ino);
|
||||||
ii->vfs_inode.i_ino);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void nilfs_truncate(struct inode *inode)
|
void nilfs_truncate(struct inode *inode)
|
||||||
@ -919,9 +919,9 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
|
|||||||
* This will happen when somebody is freeing
|
* This will happen when somebody is freeing
|
||||||
* this inode.
|
* this inode.
|
||||||
*/
|
*/
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"cannot set file dirty (ino=%lu): the file is being freed",
|
"cannot set file dirty (ino=%lu): the file is being freed",
|
||||||
inode->i_ino);
|
inode->i_ino);
|
||||||
spin_unlock(&nilfs->ns_inode_lock);
|
spin_unlock(&nilfs->ns_inode_lock);
|
||||||
return -EINVAL; /*
|
return -EINVAL; /*
|
||||||
* NILFS_I_DIRTY may remain for
|
* NILFS_I_DIRTY may remain for
|
||||||
@ -942,9 +942,9 @@ int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
|
|||||||
|
|
||||||
err = nilfs_load_inode_block(inode, &ibh);
|
err = nilfs_load_inode_block(inode, &ibh);
|
||||||
if (unlikely(err)) {
|
if (unlikely(err)) {
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"cannot mark inode dirty (ino=%lu): error %d loading inode block",
|
"cannot mark inode dirty (ino=%lu): error %d loading inode block",
|
||||||
inode->i_ino, err);
|
inode->i_ino, err);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
nilfs_update_inode(inode, ibh, flags);
|
nilfs_update_inode(inode, ibh, flags);
|
||||||
@ -970,8 +970,8 @@ void nilfs_dirty_inode(struct inode *inode, int flags)
|
|||||||
struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
|
struct nilfs_mdt_info *mdi = NILFS_MDT(inode);
|
||||||
|
|
||||||
if (is_bad_inode(inode)) {
|
if (is_bad_inode(inode)) {
|
||||||
nilfs_msg(inode->i_sb, KERN_WARNING,
|
nilfs_warn(inode->i_sb,
|
||||||
"tried to mark bad_inode dirty. ignored.");
|
"tried to mark bad_inode dirty. ignored.");
|
||||||
dump_stack();
|
dump_stack();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user