Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull low-level x86 updates from Ingo Molnar: "In this cycle this topic tree has become one of those 'super topics' that accumulated a lot of changes: - Add CONFIG_VMAP_STACK=y support to the core kernel and enable it on x86 - preceded by an array of changes. v4.8 saw preparatory changes in this area already - this is the rest of the work. Includes the thread stack caching performance optimization. (Andy Lutomirski) - switch_to() cleanups and all around enhancements. (Brian Gerst) - A large number of dumpstack infrastructure enhancements and an unwinder abstraction. The secret long term plan is safe(r) live patching plus maybe another attempt at debuginfo based unwinding - but all these current bits are standalone enhancements in a frame pointer based debug environment as well. (Josh Poimboeuf) - More __ro_after_init and const annotations. (Kees Cook) - Enable KASLR for the vmemmap memory region. (Thomas Garnier)" [ The virtually mapped stack changes are pretty fundamental, and not x86-specific per se, even if they are only used on x86 right now. ] * 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (70 commits) x86/asm: Get rid of __read_cr4_safe() thread_info: Use unsigned long for flags x86/alternatives: Add stack frame dependency to alternative_call_2() x86/dumpstack: Fix show_stack() task pointer regression x86/dumpstack: Remove dump_trace() and related callbacks x86/dumpstack: Convert show_trace_log_lvl() to use the new unwinder oprofile/x86: Convert x86_backtrace() to use the new unwinder x86/stacktrace: Convert save_stack_trace_*() to use the new unwinder perf/x86: Convert perf_callchain_kernel() to use the new unwinder x86/unwind: Add new unwind interface and implementations x86/dumpstack: Remove NULL task pointer convention fork: Optimize task creation by caching two thread stacks per CPU if CONFIG_VMAP_STACK=y sched/core: Free the stack early if CONFIG_THREAD_INFO_IN_TASK lib/syscall: Pin the task stack in collect_syscall() x86/process: Pin the target stack in get_wchan() x86/dumpstack: Pin the target stack when dumping it kthread: Pin the stack via try_get_task_stack()/put_task_stack() in to_live_kthread() function sched/core: Add try_get_task_stack() and put_task_stack() x86/entry/64: Fix a minor comment rebase error iommu/amd: Don't put completion-wait semaphore on stack ...
This commit is contained in:
commit
1a4a2bc460
@ -203,6 +203,17 @@ along to ftrace_push_return_trace() instead of a stub value of 0.
|
||||
|
||||
Similarly, when you call ftrace_return_to_handler(), pass it the frame pointer.
|
||||
|
||||
HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
||||
--------------------------------
|
||||
|
||||
An arch may pass in a pointer to the return address on the stack. This
|
||||
prevents potential stack unwinding issues where the unwinder gets out of
|
||||
sync with ret_stack and the wrong addresses are reported by
|
||||
ftrace_graph_ret_addr().
|
||||
|
||||
Adding support for it is easy: just define the macro in asm/ftrace.h and
|
||||
pass the return address pointer as the 'retp' argument to
|
||||
ftrace_push_return_trace().
|
||||
|
||||
HAVE_FTRACE_NMI_ENTER
|
||||
---------------------
|
||||
|
34
arch/Kconfig
34
arch/Kconfig
@ -696,4 +696,38 @@ config ARCH_NO_COHERENT_DMA_MMAP
|
||||
config CPU_NO_EFFICIENT_FFS
|
||||
def_bool n
|
||||
|
||||
config HAVE_ARCH_VMAP_STACK
|
||||
def_bool n
|
||||
help
|
||||
An arch should select this symbol if it can support kernel stacks
|
||||
in vmalloc space. This means:
|
||||
|
||||
- vmalloc space must be large enough to hold many kernel stacks.
|
||||
This may rule out many 32-bit architectures.
|
||||
|
||||
- Stacks in vmalloc space need to work reliably. For example, if
|
||||
vmap page tables are created on demand, either this mechanism
|
||||
needs to work while the stack points to a virtual address with
|
||||
unpopulated page tables or arch code (switch_to() and switch_mm(),
|
||||
most likely) needs to ensure that the stack's page table entries
|
||||
are populated before running on a possibly unpopulated stack.
|
||||
|
||||
- If the stack overflows into a guard page, something reasonable
|
||||
should happen. The definition of "reasonable" is flexible, but
|
||||
instantly rebooting without logging anything would be unfriendly.
|
||||
|
||||
config VMAP_STACK
|
||||
default y
|
||||
bool "Use a virtually-mapped stack"
|
||||
depends on HAVE_ARCH_VMAP_STACK && !KASAN
|
||||
---help---
|
||||
Enable this if you want the use virtually-mapped kernel stacks
|
||||
with guard pages. This causes kernel stack overflows to be
|
||||
caught immediately rather than causing difficult-to-diagnose
|
||||
corruption.
|
||||
|
||||
This is presently incompatible with KASAN because KASAN expects
|
||||
the stack to map directly to the KASAN shadow map using a formula
|
||||
that is incorrect if the stack is in vmalloc space.
|
||||
|
||||
source "kernel/gcov/Kconfig"
|
||||
|
@ -218,7 +218,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
||||
}
|
||||
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
|
||||
frame_pointer);
|
||||
frame_pointer, NULL);
|
||||
if (err == -EBUSY) {
|
||||
*parent = old;
|
||||
return;
|
||||
|
@ -219,7 +219,7 @@ ENDPROC(ftrace_graph_caller)
|
||||
*
|
||||
* Run ftrace_return_to_handler() before going back to parent.
|
||||
* @fp is checked against the value passed by ftrace_graph_caller()
|
||||
* only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
|
||||
* only when HAVE_FUNCTION_GRAPH_FP_TEST is enabled.
|
||||
*/
|
||||
ENTRY(return_to_handler)
|
||||
save_return_regs
|
||||
|
@ -138,7 +138,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
||||
return;
|
||||
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
|
||||
frame_pointer);
|
||||
frame_pointer, NULL);
|
||||
if (err == -EBUSY)
|
||||
return;
|
||||
else
|
||||
|
@ -169,7 +169,7 @@ ENTRY(_ftrace_graph_caller)
|
||||
r0 = sp; /* unsigned long *parent */
|
||||
r1 = [sp]; /* unsigned long self_addr */
|
||||
# endif
|
||||
# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
r2 = fp; /* unsigned long frame_pointer */
|
||||
# endif
|
||||
r0 += 16; /* skip the 4 local regs on stack */
|
||||
@ -190,7 +190,7 @@ ENTRY(_return_to_handler)
|
||||
[--sp] = r1;
|
||||
|
||||
/* get original return address */
|
||||
# ifdef CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
# ifdef HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
r0 = fp; /* Blackfin is sane, so omit this */
|
||||
# endif
|
||||
call _ftrace_return_to_handler;
|
||||
|
@ -107,7 +107,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
||||
return;
|
||||
|
||||
if (ftrace_push_return_trace(*parent, self_addr, &trace.depth,
|
||||
frame_pointer) == -EBUSY)
|
||||
frame_pointer, NULL) == -EBUSY)
|
||||
return;
|
||||
|
||||
trace.func = self_addr;
|
||||
|
@ -56,7 +56,7 @@ struct thread_info {
|
||||
#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0)
|
||||
#define task_thread_info(tsk) ((struct thread_info *) 0)
|
||||
#endif
|
||||
#define free_thread_stack(ti) /* nothing */
|
||||
#define free_thread_stack(tsk) /* nothing */
|
||||
#define task_stack_page(tsk) ((void *)(tsk))
|
||||
|
||||
#define __HAVE_THREAD_FUNCTIONS
|
||||
|
@ -63,7 +63,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
|
||||
return;
|
||||
}
|
||||
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
|
||||
if (err == -EBUSY) {
|
||||
*parent = old;
|
||||
return;
|
||||
|
@ -382,8 +382,8 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
|
||||
if (unlikely(faulted))
|
||||
goto out;
|
||||
|
||||
if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp)
|
||||
== -EBUSY) {
|
||||
if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp,
|
||||
NULL) == -EBUSY) {
|
||||
*parent_ra_addr = old_parent_ra;
|
||||
return;
|
||||
}
|
||||
|
@ -48,7 +48,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
|
||||
return;
|
||||
|
||||
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
|
||||
0 ) == -EBUSY)
|
||||
0, NULL) == -EBUSY)
|
||||
return;
|
||||
|
||||
/* activate parisc_return_to_handler() as return point */
|
||||
|
@ -593,7 +593,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
|
||||
if (!ftrace_graph_entry(&trace))
|
||||
goto out;
|
||||
|
||||
if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
|
||||
if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
|
||||
NULL) == -EBUSY)
|
||||
goto out;
|
||||
|
||||
parent = return_hooker;
|
||||
|
@ -209,7 +209,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
|
||||
/* Only trace if the calling function expects to. */
|
||||
if (!ftrace_graph_entry(&trace))
|
||||
goto out;
|
||||
if (ftrace_push_return_trace(parent, ip, &trace.depth, 0) == -EBUSY)
|
||||
if (ftrace_push_return_trace(parent, ip, &trace.depth, 0,
|
||||
NULL) == -EBUSY)
|
||||
goto out;
|
||||
parent = (unsigned long) return_to_handler;
|
||||
out:
|
||||
|
@ -382,7 +382,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
|
||||
return;
|
||||
}
|
||||
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0);
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL);
|
||||
if (err == -EBUSY) {
|
||||
__raw_writel(old, parent);
|
||||
return;
|
||||
|
@ -56,7 +56,6 @@ config SPARC64
|
||||
def_bool 64BIT
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
select HAVE_KRETPROBES
|
||||
select HAVE_KPROBES
|
||||
select HAVE_RCU_TABLE_FREE if SMP
|
||||
|
@ -9,6 +9,10 @@
|
||||
void _mcount(void);
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_MCOUNT */
|
||||
|
||||
#if defined(CONFIG_SPARC64) && !defined(CC_USE_FENTRY)
|
||||
#define HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
|
@ -131,7 +131,7 @@ unsigned long prepare_ftrace_return(unsigned long parent,
|
||||
return parent + 8UL;
|
||||
|
||||
if (ftrace_push_return_trace(parent, self_addr, &trace.depth,
|
||||
frame_pointer) == -EBUSY)
|
||||
frame_pointer, NULL) == -EBUSY)
|
||||
return parent + 8UL;
|
||||
|
||||
trace.func = self_addr;
|
||||
|
@ -184,7 +184,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
||||
*parent = return_hooker;
|
||||
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
|
||||
frame_pointer);
|
||||
frame_pointer, NULL);
|
||||
if (err == -EBUSY) {
|
||||
*parent = old;
|
||||
return;
|
||||
|
@ -93,6 +93,7 @@ config X86
|
||||
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
|
||||
select HAVE_ARCH_WITHIN_STACK_FRAMES
|
||||
select HAVE_EBPF_JIT if X86_64
|
||||
select HAVE_ARCH_VMAP_STACK if X86_64
|
||||
select HAVE_CC_STACKPROTECTOR
|
||||
select HAVE_CMPXCHG_DOUBLE
|
||||
select HAVE_CMPXCHG_LOCAL
|
||||
@ -109,7 +110,6 @@ config X86
|
||||
select HAVE_EXIT_THREAD
|
||||
select HAVE_FENTRY if X86_64
|
||||
select HAVE_FTRACE_MCOUNT_RECORD
|
||||
select HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
select HAVE_FUNCTION_GRAPH_TRACER
|
||||
select HAVE_FUNCTION_TRACER
|
||||
select HAVE_GCC_PLUGINS
|
||||
@ -157,6 +157,7 @@ config X86
|
||||
select SPARSE_IRQ
|
||||
select SRCU
|
||||
select SYSCTL_EXCEPTION_TRACE
|
||||
select THREAD_INFO_IN_TASK
|
||||
select USER_STACKTRACE_SUPPORT
|
||||
select VIRT_TO_BUS
|
||||
select X86_DEV_DMA_OPS if X86_64
|
||||
|
@ -31,13 +31,6 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/syscalls.h>
|
||||
|
||||
static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
|
||||
{
|
||||
unsigned long top_of_stack =
|
||||
(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
|
||||
return (struct thread_info *)(top_of_stack - THREAD_SIZE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING
|
||||
/* Called on entry from user mode with IRQs off. */
|
||||
__visible inline void enter_from_user_mode(void)
|
||||
@ -71,7 +64,7 @@ static long syscall_trace_enter(struct pt_regs *regs)
|
||||
{
|
||||
u32 arch = in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
struct thread_info *ti = current_thread_info();
|
||||
unsigned long ret = 0;
|
||||
bool emulated = false;
|
||||
u32 work;
|
||||
@ -173,18 +166,17 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags)
|
||||
/* Disable IRQs and retry */
|
||||
local_irq_disable();
|
||||
|
||||
cached_flags = READ_ONCE(pt_regs_to_thread_info(regs)->flags);
|
||||
cached_flags = READ_ONCE(current_thread_info()->flags);
|
||||
|
||||
if (!(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/* Called with IRQs disabled. */
|
||||
__visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
struct thread_info *ti = current_thread_info();
|
||||
u32 cached_flags;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PROVE_LOCKING) && WARN_ON(!irqs_disabled()))
|
||||
@ -209,7 +201,7 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
|
||||
* special case only applies after poking regs and before the
|
||||
* very next return to user mode.
|
||||
*/
|
||||
ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
|
||||
current->thread.status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
|
||||
#endif
|
||||
|
||||
user_enter_irqoff();
|
||||
@ -247,7 +239,7 @@ static void syscall_slow_exit_work(struct pt_regs *regs, u32 cached_flags)
|
||||
*/
|
||||
__visible inline void syscall_return_slowpath(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
struct thread_info *ti = current_thread_info();
|
||||
u32 cached_flags = READ_ONCE(ti->flags);
|
||||
|
||||
CT_WARN_ON(ct_state() != CONTEXT_KERNEL);
|
||||
@ -270,7 +262,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
|
||||
#ifdef CONFIG_X86_64
|
||||
__visible void do_syscall_64(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
struct thread_info *ti = current_thread_info();
|
||||
unsigned long nr = regs->orig_ax;
|
||||
|
||||
enter_from_user_mode();
|
||||
@ -303,11 +295,11 @@ __visible void do_syscall_64(struct pt_regs *regs)
|
||||
*/
|
||||
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
|
||||
{
|
||||
struct thread_info *ti = pt_regs_to_thread_info(regs);
|
||||
struct thread_info *ti = current_thread_info();
|
||||
unsigned int nr = (unsigned int)regs->orig_ax;
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
ti->status |= TS_COMPAT;
|
||||
current->thread.status |= TS_COMPAT;
|
||||
#endif
|
||||
|
||||
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) {
|
||||
|
@ -204,34 +204,70 @@
|
||||
POP_GS_EX
|
||||
.endm
|
||||
|
||||
/*
|
||||
* %eax: prev task
|
||||
* %edx: next task
|
||||
*/
|
||||
ENTRY(__switch_to_asm)
|
||||
/*
|
||||
* Save callee-saved registers
|
||||
* This must match the order in struct inactive_task_frame
|
||||
*/
|
||||
pushl %ebp
|
||||
pushl %ebx
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
|
||||
/* switch stack */
|
||||
movl %esp, TASK_threadsp(%eax)
|
||||
movl TASK_threadsp(%edx), %esp
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
movl TASK_stack_canary(%edx), %ebx
|
||||
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
|
||||
#endif
|
||||
|
||||
/* restore callee-saved registers */
|
||||
popl %esi
|
||||
popl %edi
|
||||
popl %ebx
|
||||
popl %ebp
|
||||
|
||||
jmp __switch_to
|
||||
END(__switch_to_asm)
|
||||
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
*
|
||||
* eax: prev task we switched from
|
||||
* ebx: kernel thread func (NULL for user thread)
|
||||
* edi: kernel thread arg
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
pushl %eax
|
||||
call schedule_tail
|
||||
popl %eax
|
||||
|
||||
testl %ebx, %ebx
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
|
||||
2:
|
||||
/* When we fork, we trace the syscall return in the child, too. */
|
||||
movl %esp, %eax
|
||||
call syscall_return_slowpath
|
||||
jmp restore_all
|
||||
END(ret_from_fork)
|
||||
|
||||
ENTRY(ret_from_kernel_thread)
|
||||
pushl %eax
|
||||
call schedule_tail
|
||||
popl %eax
|
||||
movl PT_EBP(%esp), %eax
|
||||
call *PT_EBX(%esp)
|
||||
movl $0, PT_EAX(%esp)
|
||||
|
||||
/* kernel thread */
|
||||
1: movl %edi, %eax
|
||||
call *%ebx
|
||||
/*
|
||||
* Kernel threads return to userspace as if returning from a syscall.
|
||||
* We should check whether anything actually uses this path and, if so,
|
||||
* consider switching it over to ret_from_fork.
|
||||
* A kernel thread is allowed to return here after successfully
|
||||
* calling do_execve(). Exit to userspace to complete the execve()
|
||||
* syscall.
|
||||
*/
|
||||
movl %esp, %eax
|
||||
call syscall_return_slowpath
|
||||
jmp restore_all
|
||||
ENDPROC(ret_from_kernel_thread)
|
||||
movl $0, PT_EAX(%esp)
|
||||
jmp 2b
|
||||
END(ret_from_fork)
|
||||
|
||||
/*
|
||||
* Return to user mode is not as complex as all this looks,
|
||||
|
@ -179,7 +179,8 @@ GLOBAL(entry_SYSCALL_64_after_swapgs)
|
||||
* If we need to do entry work or if we guess we'll need to do
|
||||
* exit work, go straight to the slow path.
|
||||
*/
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
movq PER_CPU_VAR(current_task), %r11
|
||||
testl $_TIF_WORK_SYSCALL_ENTRY|_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
|
||||
jnz entry_SYSCALL64_slow_path
|
||||
|
||||
entry_SYSCALL_64_fastpath:
|
||||
@ -217,7 +218,8 @@ entry_SYSCALL_64_fastpath:
|
||||
*/
|
||||
DISABLE_INTERRUPTS(CLBR_NONE)
|
||||
TRACE_IRQS_OFF
|
||||
testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
|
||||
movq PER_CPU_VAR(current_task), %r11
|
||||
testl $_TIF_ALLWORK_MASK, TASK_TI_flags(%r11)
|
||||
jnz 1f
|
||||
|
||||
LOCKDEP_SYS_EXIT
|
||||
@ -351,8 +353,7 @@ ENTRY(stub_ptregs_64)
|
||||
jmp entry_SYSCALL64_slow_path
|
||||
|
||||
1:
|
||||
/* Called from C */
|
||||
jmp *%rax /* called from C */
|
||||
jmp *%rax /* Called from C */
|
||||
END(stub_ptregs_64)
|
||||
|
||||
.macro ptregs_stub func
|
||||
@ -368,42 +369,74 @@ END(ptregs_\func)
|
||||
#define __SYSCALL_64(nr, sym, qual) __SYSCALL_64_QUAL_##qual(sym)
|
||||
#include <asm/syscalls_64.h>
|
||||
|
||||
/*
|
||||
* %rdi: prev task
|
||||
* %rsi: next task
|
||||
*/
|
||||
ENTRY(__switch_to_asm)
|
||||
/*
|
||||
* Save callee-saved registers
|
||||
* This must match the order in inactive_task_frame
|
||||
*/
|
||||
pushq %rbp
|
||||
pushq %rbx
|
||||
pushq %r12
|
||||
pushq %r13
|
||||
pushq %r14
|
||||
pushq %r15
|
||||
|
||||
/* switch stack */
|
||||
movq %rsp, TASK_threadsp(%rdi)
|
||||
movq TASK_threadsp(%rsi), %rsp
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
movq TASK_stack_canary(%rsi), %rbx
|
||||
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
|
||||
#endif
|
||||
|
||||
/* restore callee-saved registers */
|
||||
popq %r15
|
||||
popq %r14
|
||||
popq %r13
|
||||
popq %r12
|
||||
popq %rbx
|
||||
popq %rbp
|
||||
|
||||
jmp __switch_to
|
||||
END(__switch_to_asm)
|
||||
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
*
|
||||
* rdi: prev task we switched from
|
||||
* rax: prev task we switched from
|
||||
* rbx: kernel thread func (NULL for user thread)
|
||||
* r12: kernel thread arg
|
||||
*/
|
||||
ENTRY(ret_from_fork)
|
||||
LOCK ; btr $TIF_FORK, TI_flags(%r8)
|
||||
|
||||
movq %rax, %rdi
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
testb $3, CS(%rsp) /* from kernel_thread? */
|
||||
jnz 1f
|
||||
testq %rbx, %rbx /* from kernel_thread? */
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
|
||||
/*
|
||||
* We came from kernel_thread. This code path is quite twisted, and
|
||||
* someone should clean it up.
|
||||
*
|
||||
* copy_thread_tls stashes the function pointer in RBX and the
|
||||
* parameter to be passed in RBP. The called function is permitted
|
||||
* to call do_execve and thereby jump to user mode.
|
||||
*/
|
||||
movq RBP(%rsp), %rdi
|
||||
call *RBX(%rsp)
|
||||
movl $0, RAX(%rsp)
|
||||
|
||||
/*
|
||||
* Fall through as though we're exiting a syscall. This makes a
|
||||
* twisted sort of sense if we just called do_execve.
|
||||
*/
|
||||
|
||||
1:
|
||||
2:
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
|
||||
1:
|
||||
/* kernel thread */
|
||||
movq %r12, %rdi
|
||||
call *%rbx
|
||||
/*
|
||||
* A kernel thread is allowed to return here after successfully
|
||||
* calling do_execve(). Exit to userspace to complete the execve()
|
||||
* syscall.
|
||||
*/
|
||||
movq $0, RAX(%rsp)
|
||||
jmp 2b
|
||||
END(ret_from_fork)
|
||||
|
||||
/*
|
||||
@ -555,27 +588,69 @@ native_irq_return_iret:
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
native_irq_return_ldt:
|
||||
pushq %rax
|
||||
pushq %rdi
|
||||
/*
|
||||
* We are running with user GSBASE. All GPRs contain their user
|
||||
* values. We have a percpu ESPFIX stack that is eight slots
|
||||
* long (see ESPFIX_STACK_SIZE). espfix_waddr points to the bottom
|
||||
* of the ESPFIX stack.
|
||||
*
|
||||
* We clobber RAX and RDI in this code. We stash RDI on the
|
||||
* normal stack and RAX on the ESPFIX stack.
|
||||
*
|
||||
* The ESPFIX stack layout we set up looks like this:
|
||||
*
|
||||
* --- top of ESPFIX stack ---
|
||||
* SS
|
||||
* RSP
|
||||
* RFLAGS
|
||||
* CS
|
||||
* RIP <-- RSP points here when we're done
|
||||
* RAX <-- espfix_waddr points here
|
||||
* --- bottom of ESPFIX stack ---
|
||||
*/
|
||||
|
||||
pushq %rdi /* Stash user RDI */
|
||||
SWAPGS
|
||||
movq PER_CPU_VAR(espfix_waddr), %rdi
|
||||
movq %rax, (0*8)(%rdi) /* RAX */
|
||||
movq (2*8)(%rsp), %rax /* RIP */
|
||||
movq %rax, (0*8)(%rdi) /* user RAX */
|
||||
movq (1*8)(%rsp), %rax /* user RIP */
|
||||
movq %rax, (1*8)(%rdi)
|
||||
movq (3*8)(%rsp), %rax /* CS */
|
||||
movq (2*8)(%rsp), %rax /* user CS */
|
||||
movq %rax, (2*8)(%rdi)
|
||||
movq (4*8)(%rsp), %rax /* RFLAGS */
|
||||
movq (3*8)(%rsp), %rax /* user RFLAGS */
|
||||
movq %rax, (3*8)(%rdi)
|
||||
movq (6*8)(%rsp), %rax /* SS */
|
||||
movq (5*8)(%rsp), %rax /* user SS */
|
||||
movq %rax, (5*8)(%rdi)
|
||||
movq (5*8)(%rsp), %rax /* RSP */
|
||||
movq (4*8)(%rsp), %rax /* user RSP */
|
||||
movq %rax, (4*8)(%rdi)
|
||||
andl $0xffff0000, %eax
|
||||
popq %rdi
|
||||
/* Now RAX == RSP. */
|
||||
|
||||
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
|
||||
popq %rdi /* Restore user RDI */
|
||||
|
||||
/*
|
||||
* espfix_stack[31:16] == 0. The page tables are set up such that
|
||||
* (espfix_stack | (X & 0xffff0000)) points to a read-only alias of
|
||||
* espfix_waddr for any X. That is, there are 65536 RO aliases of
|
||||
* the same page. Set up RSP so that RSP[31:16] contains the
|
||||
* respective 16 bits of the /userspace/ RSP and RSP nonetheless
|
||||
* still points to an RO alias of the ESPFIX stack.
|
||||
*/
|
||||
orq PER_CPU_VAR(espfix_stack), %rax
|
||||
SWAPGS
|
||||
movq %rax, %rsp
|
||||
popq %rax
|
||||
|
||||
/*
|
||||
* At this point, we cannot write to the stack any more, but we can
|
||||
* still read.
|
||||
*/
|
||||
popq %rax /* Restore user RAX */
|
||||
|
||||
/*
|
||||
* RSP now points to an ordinary IRET frame, except that the page
|
||||
* is read-only and RSP[31:16] are preloaded with the userspace
|
||||
* values. We can now IRET back to userspace.
|
||||
*/
|
||||
jmp native_irq_return_iret
|
||||
#endif
|
||||
END(common_interrupt)
|
||||
|
@ -37,6 +37,7 @@
|
||||
#include <asm/timer.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/ldt.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
#include "perf_event.h"
|
||||
|
||||
@ -2267,39 +2268,26 @@ void arch_perf_update_userpage(struct perf_event *event,
|
||||
cyc2ns_read_end(data);
|
||||
}
|
||||
|
||||
/*
|
||||
* callchain support
|
||||
*/
|
||||
|
||||
static int backtrace_stack(void *data, char *name)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int backtrace_address(void *data, unsigned long addr, int reliable)
|
||||
{
|
||||
struct perf_callchain_entry_ctx *entry = data;
|
||||
|
||||
return perf_callchain_store(entry, addr);
|
||||
}
|
||||
|
||||
static const struct stacktrace_ops backtrace_ops = {
|
||||
.stack = backtrace_stack,
|
||||
.address = backtrace_address,
|
||||
.walk_stack = print_context_stack_bp,
|
||||
};
|
||||
|
||||
void
|
||||
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
|
||||
{
|
||||
struct unwind_state state;
|
||||
unsigned long addr;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
||||
/* TODO: We don't support guest os callchain now */
|
||||
return;
|
||||
}
|
||||
|
||||
perf_callchain_store(entry, regs->ip);
|
||||
if (perf_callchain_store(entry, regs->ip))
|
||||
return;
|
||||
|
||||
dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry);
|
||||
for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
|
||||
unwind_next_frame(&state)) {
|
||||
addr = unwind_get_return_address(&state);
|
||||
if (!addr || perf_callchain_store(entry, addr))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
@ -217,10 +217,14 @@ static inline int alternatives_text_reserved(void *start, void *end)
|
||||
*/
|
||||
#define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \
|
||||
output, input...) \
|
||||
{ \
|
||||
register void *__sp asm(_ASM_SP); \
|
||||
asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\
|
||||
"call %P[new2]", feature2) \
|
||||
: output : [old] "i" (oldfunc), [new1] "i" (newfunc1), \
|
||||
[new2] "i" (newfunc2), ## input)
|
||||
: output, "+r" (__sp) \
|
||||
: [old] "i" (oldfunc), [new1] "i" (newfunc1), \
|
||||
[new2] "i" (newfunc2), ## input); \
|
||||
}
|
||||
|
||||
/*
|
||||
* use this macro(s) if you need more than one output parameter
|
||||
|
@ -36,7 +36,7 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
|
||||
|
||||
extern struct desc_ptr idt_descr;
|
||||
extern gate_desc idt_table[];
|
||||
extern struct desc_ptr debug_idt_descr;
|
||||
extern const struct desc_ptr debug_idt_descr;
|
||||
extern gate_desc debug_idt_table[];
|
||||
|
||||
struct gdt_page {
|
||||
|
@ -45,7 +45,8 @@
|
||||
extern u64 xfeatures_mask;
|
||||
extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
||||
|
||||
extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);
|
||||
extern void __init update_regset_xstate_info(unsigned int size,
|
||||
u64 xstate_mask);
|
||||
|
||||
void fpu__xstate_clear_all_cpu_caps(void);
|
||||
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
|
||||
|
@ -6,6 +6,7 @@
|
||||
# define MCOUNT_ADDR ((unsigned long)(__fentry__))
|
||||
#else
|
||||
# define MCOUNT_ADDR ((unsigned long)(mcount))
|
||||
# define HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
#endif
|
||||
#define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */
|
||||
|
||||
@ -13,6 +14,8 @@
|
||||
#define ARCH_SUPPORTS_FTRACE_OPS 1
|
||||
#endif
|
||||
|
||||
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
extern void mcount(void);
|
||||
extern atomic_t modifying_ftrace_code;
|
||||
|
@ -6,6 +6,7 @@ unsigned long kaslr_get_random_long(const char *purpose);
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
extern unsigned long page_offset_base;
|
||||
extern unsigned long vmalloc_base;
|
||||
extern unsigned long vmemmap_base;
|
||||
|
||||
void kernel_randomize_memory(void);
|
||||
#else
|
||||
|
@ -24,8 +24,6 @@ enum die_val {
|
||||
extern void printk_address(unsigned long address);
|
||||
extern void die(const char *, struct pt_regs *,long);
|
||||
extern int __must_check __die(const char *, struct pt_regs *, long);
|
||||
extern void show_trace(struct task_struct *t, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp);
|
||||
extern void show_stack_regs(struct pt_regs *regs);
|
||||
extern void __show_regs(struct pt_regs *regs, int all);
|
||||
extern unsigned long oops_begin(void);
|
||||
|
@ -80,10 +80,6 @@ static inline unsigned long __read_cr4(void)
|
||||
{
|
||||
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4);
|
||||
}
|
||||
static inline unsigned long __read_cr4_safe(void)
|
||||
{
|
||||
return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe);
|
||||
}
|
||||
|
||||
static inline void __write_cr4(unsigned long x)
|
||||
{
|
||||
|
@ -108,7 +108,6 @@ struct pv_cpu_ops {
|
||||
unsigned long (*read_cr0)(void);
|
||||
void (*write_cr0)(unsigned long);
|
||||
|
||||
unsigned long (*read_cr4_safe)(void);
|
||||
unsigned long (*read_cr4)(void);
|
||||
void (*write_cr4)(unsigned long);
|
||||
|
||||
|
@ -57,11 +57,13 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
#define VMALLOC_SIZE_TB _AC(32, UL)
|
||||
#define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
|
||||
#define VMEMMAP_START _AC(0xffffea0000000000, UL)
|
||||
#define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
#define VMALLOC_START vmalloc_base
|
||||
#define VMEMMAP_START vmemmap_base
|
||||
#else
|
||||
#define VMALLOC_START __VMALLOC_BASE
|
||||
#define VMEMMAP_START __VMEMMAP_BASE
|
||||
#endif /* CONFIG_RANDOMIZE_MEMORY */
|
||||
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
|
||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||
|
@ -389,9 +389,9 @@ struct thread_struct {
|
||||
unsigned short fsindex;
|
||||
unsigned short gsindex;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long ip;
|
||||
#endif
|
||||
|
||||
u32 status; /* thread synchronous flags */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long fsbase;
|
||||
unsigned long gsbase;
|
||||
@ -437,6 +437,15 @@ struct thread_struct {
|
||||
*/
|
||||
};
|
||||
|
||||
/*
|
||||
* Thread-synchronous status.
|
||||
*
|
||||
* This is different from the flags in that nobody else
|
||||
* ever touches our thread-synchronous status, so we don't
|
||||
* have to worry about atomic accesses.
|
||||
*/
|
||||
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
|
||||
|
||||
/*
|
||||
* Set IOPL bits in EFLAGS from given mask
|
||||
*/
|
||||
@ -724,8 +733,6 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
.addr_limit = KERNEL_DS, \
|
||||
}
|
||||
|
||||
extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
|
||||
/*
|
||||
* TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
|
||||
* This is necessary to guarantee that the entire "struct pt_regs"
|
||||
@ -776,17 +783,13 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
.addr_limit = KERNEL_DS, \
|
||||
}
|
||||
|
||||
/*
|
||||
* Return saved PC of a blocked thread.
|
||||
* What is this good for? it will be always the scheduler or ret_from_fork.
|
||||
*/
|
||||
#define thread_saved_pc(t) READ_ONCE_NOCHECK(*(unsigned long *)((t)->thread.sp - 8))
|
||||
|
||||
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
|
||||
extern unsigned long KSTK_ESP(struct task_struct *task);
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
extern unsigned long thread_saved_pc(struct task_struct *tsk);
|
||||
|
||||
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
|
||||
unsigned long new_sp);
|
||||
|
||||
|
@ -44,9 +44,9 @@ struct trampoline_header {
|
||||
extern struct real_mode_header *real_mode_header;
|
||||
extern unsigned char real_mode_blob_end[];
|
||||
|
||||
extern unsigned long init_rsp;
|
||||
extern unsigned long initial_code;
|
||||
extern unsigned long initial_gs;
|
||||
extern unsigned long initial_stack;
|
||||
|
||||
extern unsigned char real_mode_blob[];
|
||||
extern unsigned char real_mode_relocs[];
|
||||
|
@ -39,9 +39,6 @@ DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_bios_cpu_apicid);
|
||||
DECLARE_EARLY_PER_CPU_READ_MOSTLY(int, x86_cpu_to_logical_apicid);
|
||||
#endif
|
||||
|
||||
/* Static state in head.S used to set up a CPU */
|
||||
extern unsigned long stack_start; /* Initial stack pointer address */
|
||||
|
||||
struct task_struct;
|
||||
|
||||
struct smp_ops {
|
||||
|
@ -59,22 +59,19 @@ static inline void native_write_cr3(unsigned long val)
|
||||
static inline unsigned long native_read_cr4(void)
|
||||
{
|
||||
unsigned long val;
|
||||
asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline unsigned long native_read_cr4_safe(void)
|
||||
{
|
||||
unsigned long val;
|
||||
/* This could fault if %cr4 does not exist. In x86_64, a cr4 always
|
||||
* exists, so it will never fail. */
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* This could fault if CR4 does not exist. Non-existent CR4
|
||||
* is functionally equivalent to CR4 == 0. Keep it simple and pretend
|
||||
* that CR4 == 0 on CPUs that don't have CR4.
|
||||
*/
|
||||
asm volatile("1: mov %%cr4, %0\n"
|
||||
"2:\n"
|
||||
_ASM_EXTABLE(1b, 2b)
|
||||
: "=r" (val), "=m" (__force_order) : "0" (0));
|
||||
#else
|
||||
val = native_read_cr4();
|
||||
/* CR4 always exists on x86_64. */
|
||||
asm volatile("mov %%cr4,%0\n\t" : "=r" (val), "=m" (__force_order));
|
||||
#endif
|
||||
return val;
|
||||
}
|
||||
@ -182,11 +179,6 @@ static inline unsigned long __read_cr4(void)
|
||||
return native_read_cr4();
|
||||
}
|
||||
|
||||
static inline unsigned long __read_cr4_safe(void)
|
||||
{
|
||||
return native_read_cr4_safe();
|
||||
}
|
||||
|
||||
static inline void __write_cr4(unsigned long x)
|
||||
{
|
||||
native_write_cr4(x);
|
||||
|
@ -8,86 +8,86 @@
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
enum stack_type {
|
||||
STACK_TYPE_UNKNOWN,
|
||||
STACK_TYPE_TASK,
|
||||
STACK_TYPE_IRQ,
|
||||
STACK_TYPE_SOFTIRQ,
|
||||
STACK_TYPE_EXCEPTION,
|
||||
STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
|
||||
};
|
||||
|
||||
struct stack_info {
|
||||
enum stack_type type;
|
||||
unsigned long *begin, *end, *next_sp;
|
||||
};
|
||||
|
||||
bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info);
|
||||
|
||||
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info, unsigned long *visit_mask);
|
||||
|
||||
void stack_type_str(enum stack_type type, const char **begin,
|
||||
const char **end);
|
||||
|
||||
static inline bool on_stack(struct stack_info *info, void *addr, size_t len)
|
||||
{
|
||||
void *begin = info->begin;
|
||||
void *end = info->end;
|
||||
|
||||
return (info->type != STACK_TYPE_UNKNOWN &&
|
||||
addr >= begin && addr < end &&
|
||||
addr + len > begin && addr + len <= end);
|
||||
}
|
||||
|
||||
extern int kstack_depth_to_print;
|
||||
|
||||
struct thread_info;
|
||||
struct stacktrace_ops;
|
||||
|
||||
typedef unsigned long (*walk_stack_t)(struct task_struct *task,
|
||||
unsigned long *stack,
|
||||
unsigned long bp,
|
||||
const struct stacktrace_ops *ops,
|
||||
void *data,
|
||||
unsigned long *end,
|
||||
int *graph);
|
||||
|
||||
extern unsigned long
|
||||
print_context_stack(struct task_struct *task,
|
||||
unsigned long *stack, unsigned long bp,
|
||||
const struct stacktrace_ops *ops, void *data,
|
||||
unsigned long *end, int *graph);
|
||||
|
||||
extern unsigned long
|
||||
print_context_stack_bp(struct task_struct *task,
|
||||
unsigned long *stack, unsigned long bp,
|
||||
const struct stacktrace_ops *ops, void *data,
|
||||
unsigned long *end, int *graph);
|
||||
|
||||
/* Generic stack tracer with callbacks */
|
||||
|
||||
struct stacktrace_ops {
|
||||
int (*address)(void *data, unsigned long address, int reliable);
|
||||
/* On negative return stop dumping */
|
||||
int (*stack)(void *data, char *name);
|
||||
walk_stack_t walk_stack;
|
||||
};
|
||||
|
||||
void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp,
|
||||
const struct stacktrace_ops *ops, void *data);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
#define STACKSLOTS_PER_LINE 8
|
||||
#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
|
||||
#else
|
||||
#define STACKSLOTS_PER_LINE 4
|
||||
#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
static inline unsigned long
|
||||
stack_frame(struct task_struct *task, struct pt_regs *regs)
|
||||
static inline unsigned long *
|
||||
get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
|
||||
{
|
||||
unsigned long bp;
|
||||
|
||||
if (regs)
|
||||
return regs->bp;
|
||||
return (unsigned long *)regs->bp;
|
||||
|
||||
if (task == current) {
|
||||
/* Grab bp right from our regs */
|
||||
get_bp(bp);
|
||||
return bp;
|
||||
}
|
||||
if (task == current)
|
||||
return __builtin_frame_address(0);
|
||||
|
||||
/* bp is the last reg pushed by switch_to */
|
||||
return *(unsigned long *)task->thread.sp;
|
||||
return (unsigned long *)((struct inactive_task_frame *)task->thread.sp)->bp;
|
||||
}
|
||||
#else
|
||||
static inline unsigned long
|
||||
stack_frame(struct task_struct *task, struct pt_regs *regs)
|
||||
static inline unsigned long *
|
||||
get_frame_pointer(struct task_struct *task, struct pt_regs *regs)
|
||||
{
|
||||
return 0;
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
|
||||
extern void
|
||||
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp, char *log_lvl);
|
||||
static inline unsigned long *
|
||||
get_stack_pointer(struct task_struct *task, struct pt_regs *regs)
|
||||
{
|
||||
if (regs)
|
||||
return (unsigned long *)kernel_stack_pointer(regs);
|
||||
|
||||
extern void
|
||||
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl);
|
||||
if (task == current)
|
||||
return __builtin_frame_address(0);
|
||||
|
||||
return (unsigned long *)task->thread.sp;
|
||||
}
|
||||
|
||||
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, char *log_lvl);
|
||||
|
||||
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, char *log_lvl);
|
||||
|
||||
extern unsigned int code_bytes;
|
||||
|
||||
@ -106,7 +106,7 @@ static inline unsigned long caller_frame_pointer(void)
|
||||
{
|
||||
struct stack_frame *frame;
|
||||
|
||||
get_bp(frame);
|
||||
frame = __builtin_frame_address(0);
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
frame = frame->next_frame;
|
||||
|
@ -2,130 +2,66 @@
|
||||
#define _ASM_X86_SWITCH_TO_H
|
||||
|
||||
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
||||
|
||||
struct task_struct *__switch_to_asm(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
|
||||
__visible struct task_struct *__switch_to(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
struct task_struct *next);
|
||||
struct tss_struct;
|
||||
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
|
||||
struct tss_struct *tss);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* This runs runs on the previous thread's stack. */
|
||||
static inline void prepare_switch_to(struct task_struct *prev,
|
||||
struct task_struct *next)
|
||||
{
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* If we switch to a stack that has a top-level paging entry
|
||||
* that is not present in the current mm, the resulting #PF will
|
||||
* will be promoted to a double-fault and we'll panic. Probe
|
||||
* the new stack now so that vmalloc_fault can fix up the page
|
||||
* tables if needed. This can only happen if we use a stack
|
||||
* in vmap space.
|
||||
*
|
||||
* We assume that the stack is aligned so that it never spans
|
||||
* more than one top-level paging entry.
|
||||
*
|
||||
* To minimize cache pollution, just follow the stack pointer.
|
||||
*/
|
||||
READ_ONCE(*(unsigned char *)next->thread.sp);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
#define __switch_canary \
|
||||
"movl %P[task_canary](%[next]), %%ebx\n\t" \
|
||||
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
|
||||
#define __switch_canary_oparam \
|
||||
, [stack_canary] "=m" (stack_canary.canary)
|
||||
#define __switch_canary_iparam \
|
||||
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
|
||||
#else /* CC_STACKPROTECTOR */
|
||||
#define __switch_canary
|
||||
#define __switch_canary_oparam
|
||||
#define __switch_canary_iparam
|
||||
#endif /* CC_STACKPROTECTOR */
|
||||
asmlinkage void ret_from_fork(void);
|
||||
|
||||
/* data that is pointed to by thread.sp */
|
||||
struct inactive_task_frame {
|
||||
#ifdef CONFIG_X86_64
|
||||
unsigned long r15;
|
||||
unsigned long r14;
|
||||
unsigned long r13;
|
||||
unsigned long r12;
|
||||
#else
|
||||
unsigned long si;
|
||||
unsigned long di;
|
||||
#endif
|
||||
unsigned long bx;
|
||||
unsigned long bp;
|
||||
unsigned long ret_addr;
|
||||
};
|
||||
|
||||
struct fork_frame {
|
||||
struct inactive_task_frame frame;
|
||||
struct pt_regs regs;
|
||||
};
|
||||
|
||||
/*
|
||||
* Saving eflags is important. It switches not only IOPL between tasks,
|
||||
* it also protects other tasks from NT leaking through sysenter etc.
|
||||
*/
|
||||
#define switch_to(prev, next, last) \
|
||||
do { \
|
||||
/* \
|
||||
* Context-switching clobbers all registers, so we clobber \
|
||||
* them explicitly, via unused output variables. \
|
||||
* (EAX and EBP is not listed because EBP is saved/restored \
|
||||
* explicitly for wchan access and EAX is the return value of \
|
||||
* __switch_to()) \
|
||||
*/ \
|
||||
unsigned long ebx, ecx, edx, esi, edi; \
|
||||
prepare_switch_to(prev, next); \
|
||||
\
|
||||
asm volatile("pushl %%ebp\n\t" /* save EBP */ \
|
||||
"movl %%esp,%[prev_sp]\n\t" /* save ESP */ \
|
||||
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
|
||||
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
|
||||
"pushl %[next_ip]\n\t" /* restore EIP */ \
|
||||
__switch_canary \
|
||||
"jmp __switch_to\n" /* regparm call */ \
|
||||
"1:\t" \
|
||||
"popl %%ebp\n\t" /* restore EBP */ \
|
||||
\
|
||||
/* output parameters */ \
|
||||
: [prev_sp] "=m" (prev->thread.sp), \
|
||||
[prev_ip] "=m" (prev->thread.ip), \
|
||||
"=a" (last), \
|
||||
\
|
||||
/* clobbered output registers: */ \
|
||||
"=b" (ebx), "=c" (ecx), "=d" (edx), \
|
||||
"=S" (esi), "=D" (edi) \
|
||||
\
|
||||
__switch_canary_oparam \
|
||||
\
|
||||
/* input parameters: */ \
|
||||
: [next_sp] "m" (next->thread.sp), \
|
||||
[next_ip] "m" (next->thread.ip), \
|
||||
\
|
||||
/* regparm parameters for __switch_to(): */ \
|
||||
[prev] "a" (prev), \
|
||||
[next] "d" (next) \
|
||||
\
|
||||
__switch_canary_iparam \
|
||||
\
|
||||
: /* reloaded segment registers */ \
|
||||
"memory"); \
|
||||
((last) = __switch_to_asm((prev), (next))); \
|
||||
} while (0)
|
||||
|
||||
#else /* CONFIG_X86_32 */
|
||||
|
||||
/* frame pointer must be last for get_wchan */
|
||||
#define SAVE_CONTEXT "pushq %%rbp ; movq %%rsi,%%rbp\n\t"
|
||||
#define RESTORE_CONTEXT "movq %%rbp,%%rsi ; popq %%rbp\t"
|
||||
|
||||
#define __EXTRA_CLOBBER \
|
||||
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
|
||||
"r12", "r13", "r14", "r15", "flags"
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
#define __switch_canary \
|
||||
"movq %P[task_canary](%%rsi),%%r8\n\t" \
|
||||
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
|
||||
#define __switch_canary_oparam \
|
||||
, [gs_canary] "=m" (irq_stack_union.stack_canary)
|
||||
#define __switch_canary_iparam \
|
||||
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
|
||||
#else /* CC_STACKPROTECTOR */
|
||||
#define __switch_canary
|
||||
#define __switch_canary_oparam
|
||||
#define __switch_canary_iparam
|
||||
#endif /* CC_STACKPROTECTOR */
|
||||
|
||||
/*
|
||||
* There is no need to save or restore flags, because flags are always
|
||||
* clean in kernel mode, with the possible exception of IOPL. Kernel IOPL
|
||||
* has no effect.
|
||||
*/
|
||||
#define switch_to(prev, next, last) \
|
||||
asm volatile(SAVE_CONTEXT \
|
||||
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
|
||||
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
|
||||
"call __switch_to\n\t" \
|
||||
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
|
||||
__switch_canary \
|
||||
"movq %P[thread_info](%%rsi),%%r8\n\t" \
|
||||
"movq %%rax,%%rdi\n\t" \
|
||||
"testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
|
||||
"jnz ret_from_fork\n\t" \
|
||||
RESTORE_CONTEXT \
|
||||
: "=a" (last) \
|
||||
__switch_canary_oparam \
|
||||
: [next] "S" (next), [prev] "D" (prev), \
|
||||
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
|
||||
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
|
||||
[_tif_fork] "i" (_TIF_FORK), \
|
||||
[thread_info] "i" (offsetof(struct task_struct, stack)), \
|
||||
[current_task] "m" (current_task) \
|
||||
__switch_canary_iparam \
|
||||
: "memory", "cc" __EXTRA_CLOBBER)
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||
|
@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
|
||||
* TS_COMPAT is set for 32-bit syscall entries and then
|
||||
* remains set until we return to user mode.
|
||||
*/
|
||||
if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
|
||||
if (task->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
|
||||
/*
|
||||
* Sign-extend the value so (int)-EFOO becomes (long)-EFOO
|
||||
* and will match correctly in comparisons.
|
||||
@ -116,7 +116,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
|
||||
unsigned long *args)
|
||||
{
|
||||
# ifdef CONFIG_IA32_EMULATION
|
||||
if (task_thread_info(task)->status & TS_COMPAT)
|
||||
if (task->thread.status & TS_COMPAT)
|
||||
switch (i) {
|
||||
case 0:
|
||||
if (!n--) break;
|
||||
@ -177,7 +177,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
|
||||
const unsigned long *args)
|
||||
{
|
||||
# ifdef CONFIG_IA32_EMULATION
|
||||
if (task_thread_info(task)->status & TS_COMPAT)
|
||||
if (task->thread.status & TS_COMPAT)
|
||||
switch (i) {
|
||||
case 0:
|
||||
if (!n--) break;
|
||||
@ -234,18 +234,8 @@ static inline void syscall_set_arguments(struct task_struct *task,
|
||||
|
||||
static inline int syscall_get_arch(void)
|
||||
{
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
/*
|
||||
* TS_COMPAT is set for 32-bit syscall entry and then
|
||||
* remains set until we return to user mode.
|
||||
*
|
||||
* x32 tasks should be considered AUDIT_ARCH_X86_64.
|
||||
*/
|
||||
if (task_thread_info(current)->status & TS_COMPAT)
|
||||
return AUDIT_ARCH_I386;
|
||||
#endif
|
||||
/* Both x32 and x86_64 are considered "64-bit". */
|
||||
return AUDIT_ARCH_X86_64;
|
||||
/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
|
||||
return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
}
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
|
@ -52,21 +52,6 @@ struct task_struct;
|
||||
#include <asm/cpufeature.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
struct thread_info {
|
||||
struct task_struct *task; /* main task structure */
|
||||
__u32 flags; /* low level flags */
|
||||
__u32 status; /* thread synchronous flags */
|
||||
__u32 cpu; /* current CPU */
|
||||
};
|
||||
|
||||
#define INIT_THREAD_INFO(tsk) \
|
||||
{ \
|
||||
.task = &tsk, \
|
||||
.flags = 0, \
|
||||
.cpu = 0, \
|
||||
}
|
||||
|
||||
#define init_thread_info (init_thread_union.thread_info)
|
||||
#define init_stack (init_thread_union.stack)
|
||||
|
||||
#else /* !__ASSEMBLY__ */
|
||||
@ -95,7 +80,6 @@ struct thread_info {
|
||||
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
|
||||
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
|
||||
#define TIF_IA32 17 /* IA32 compatibility process */
|
||||
#define TIF_FORK 18 /* ret_from_fork */
|
||||
#define TIF_NOHZ 19 /* in adaptive nohz mode */
|
||||
#define TIF_MEMDIE 20 /* is terminating due to OOM killer */
|
||||
#define TIF_POLLING_NRFLAG 21 /* idle is polling for TIF_NEED_RESCHED */
|
||||
@ -119,7 +103,6 @@ struct thread_info {
|
||||
#define _TIF_UPROBE (1 << TIF_UPROBE)
|
||||
#define _TIF_NOTSC (1 << TIF_NOTSC)
|
||||
#define _TIF_IA32 (1 << TIF_IA32)
|
||||
#define _TIF_FORK (1 << TIF_FORK)
|
||||
#define _TIF_NOHZ (1 << TIF_NOHZ)
|
||||
#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
|
||||
#define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP)
|
||||
@ -160,11 +143,6 @@ struct thread_info {
|
||||
*/
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
static inline struct thread_info *current_thread_info(void)
|
||||
{
|
||||
return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
|
||||
}
|
||||
|
||||
static inline unsigned long current_stack_pointer(void)
|
||||
{
|
||||
unsigned long sp;
|
||||
@ -226,60 +204,19 @@ static inline int arch_within_stack_frames(const void * const stack,
|
||||
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ASM operand which evaluates to a 'thread_info' address of
|
||||
* the current task, if it is known that "reg" is exactly "off"
|
||||
* bytes below the top of the stack currently.
|
||||
*
|
||||
* ( The kernel stack's size is known at build time, it is usually
|
||||
* 2 or 4 pages, and the bottom of the kernel stack contains
|
||||
* the thread_info structure. So to access the thread_info very
|
||||
* quickly from assembly code we can calculate down from the
|
||||
* top of the kernel stack to the bottom, using constant,
|
||||
* build-time calculations only. )
|
||||
*
|
||||
* For example, to fetch the current thread_info->flags value into %eax
|
||||
* on x86-64 defconfig kernels, in syscall entry code where RSP is
|
||||
* currently at exactly SIZEOF_PTREGS bytes away from the top of the
|
||||
* stack:
|
||||
*
|
||||
* mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
|
||||
*
|
||||
* will translate to:
|
||||
*
|
||||
* 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
|
||||
*
|
||||
* which is below the current RSP by almost 16K.
|
||||
*/
|
||||
#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Thread-synchronous status.
|
||||
*
|
||||
* This is different from the flags in that nobody else
|
||||
* ever touches our thread-synchronous status, so we don't
|
||||
* have to worry about atomic accesses.
|
||||
*/
|
||||
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
|
||||
#ifdef CONFIG_COMPAT
|
||||
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
static inline bool in_ia32_syscall(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
return true;
|
||||
#define in_ia32_syscall() true
|
||||
#else
|
||||
#define in_ia32_syscall() (IS_ENABLED(CONFIG_IA32_EMULATION) && \
|
||||
current->thread.status & TS_COMPAT)
|
||||
#endif
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (current_thread_info()->status & TS_COMPAT)
|
||||
return true;
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Force syscall return via IRET by making it look as if there was
|
||||
|
@ -81,7 +81,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
|
||||
/* Initialize cr4 shadow for this CPU. */
|
||||
static inline void cr4_init_shadow(void)
|
||||
{
|
||||
this_cpu_write(cpu_tlbstate.cr4, __read_cr4_safe());
|
||||
this_cpu_write(cpu_tlbstate.cr4, __read_cr4());
|
||||
}
|
||||
|
||||
/* Set in this cpu's CR4. */
|
||||
|
@ -117,6 +117,12 @@ extern void ist_exit(struct pt_regs *regs);
|
||||
extern void ist_begin_non_atomic(struct pt_regs *regs);
|
||||
extern void ist_end_non_atomic(void);
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
void __noreturn handle_stack_overflow(const char *message,
|
||||
struct pt_regs *regs,
|
||||
unsigned long fault_address);
|
||||
#endif
|
||||
|
||||
/* Interrupts/Exceptions */
|
||||
enum {
|
||||
X86_TRAP_DE = 0, /* 0, Divide-by-zero */
|
||||
|
73
arch/x86/include/asm/unwind.h
Normal file
73
arch/x86/include/asm/unwind.h
Normal file
@ -0,0 +1,73 @@
|
||||
#ifndef _ASM_X86_UNWIND_H
|
||||
#define _ASM_X86_UNWIND_H
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
struct unwind_state {
|
||||
struct stack_info stack_info;
|
||||
unsigned long stack_mask;
|
||||
struct task_struct *task;
|
||||
int graph_idx;
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
unsigned long *bp;
|
||||
#else
|
||||
unsigned long *sp;
|
||||
#endif
|
||||
};
|
||||
|
||||
void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
struct pt_regs *regs, unsigned long *first_frame);
|
||||
|
||||
bool unwind_next_frame(struct unwind_state *state);
|
||||
|
||||
static inline bool unwind_done(struct unwind_state *state)
|
||||
{
|
||||
return state->stack_info.type == STACK_TYPE_UNKNOWN;
|
||||
}
|
||||
|
||||
static inline
|
||||
void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
struct pt_regs *regs, unsigned long *first_frame)
|
||||
{
|
||||
first_frame = first_frame ? : get_stack_pointer(task, regs);
|
||||
|
||||
__unwind_start(state, task, regs, first_frame);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
|
||||
static inline
|
||||
unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
return NULL;
|
||||
|
||||
return state->bp + 1;
|
||||
}
|
||||
|
||||
unsigned long unwind_get_return_address(struct unwind_state *state);
|
||||
|
||||
#else /* !CONFIG_FRAME_POINTER */
|
||||
|
||||
static inline
|
||||
unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline
|
||||
unsigned long unwind_get_return_address(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
return 0;
|
||||
|
||||
return ftrace_graph_ret_addr(state->task, &state->graph_idx,
|
||||
*state->sp, state->sp);
|
||||
}
|
||||
|
||||
#endif /* CONFIG_FRAME_POINTER */
|
||||
|
||||
#endif /* _ASM_X86_UNWIND_H */
|
@ -125,6 +125,12 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
obj-$(CONFIG_TRACING) += tracepoint.o
|
||||
|
||||
ifdef CONFIG_FRAME_POINTER
|
||||
obj-y += unwind_frame.o
|
||||
else
|
||||
obj-y += unwind_guess.o
|
||||
endif
|
||||
|
||||
###
|
||||
# 64 bit specific files
|
||||
ifeq ($(CONFIG_X86_64),y)
|
||||
|
@ -99,7 +99,7 @@ int x86_acpi_suspend_lowlevel(void)
|
||||
saved_magic = 0x12345678;
|
||||
#else /* CONFIG_64BIT */
|
||||
#ifdef CONFIG_SMP
|
||||
stack_start = (unsigned long)temp_stack + sizeof(temp_stack);
|
||||
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
|
||||
early_gdt_descr.address =
|
||||
(unsigned long)get_cpu_gdt_table(smp_processor_id());
|
||||
initial_gs = per_cpu_offset(smp_processor_id());
|
||||
|
@ -25,7 +25,7 @@
|
||||
static struct apic apic_physflat;
|
||||
static struct apic apic_flat;
|
||||
|
||||
struct apic __read_mostly *apic = &apic_flat;
|
||||
struct apic *apic __ro_after_init = &apic_flat;
|
||||
EXPORT_SYMBOL_GPL(apic);
|
||||
|
||||
static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
|
||||
@ -154,7 +154,7 @@ static int flat_probe(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct apic apic_flat = {
|
||||
static struct apic apic_flat __ro_after_init = {
|
||||
.name = "flat",
|
||||
.probe = flat_probe,
|
||||
.acpi_madt_oem_check = flat_acpi_madt_oem_check,
|
||||
@ -248,7 +248,7 @@ static int physflat_probe(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct apic apic_physflat = {
|
||||
static struct apic apic_physflat __ro_after_init = {
|
||||
|
||||
.name = "physical flat",
|
||||
.probe = physflat_probe,
|
||||
|
@ -108,7 +108,7 @@ static void noop_apic_write(u32 reg, u32 v)
|
||||
WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_APIC) && !disable_apic);
|
||||
}
|
||||
|
||||
struct apic apic_noop = {
|
||||
struct apic apic_noop __ro_after_init = {
|
||||
.name = "noop",
|
||||
.probe = noop_probe,
|
||||
.acpi_madt_oem_check = NULL,
|
||||
|
@ -142,7 +142,7 @@ static int probe_bigsmp(void)
|
||||
return dmi_bigsmp;
|
||||
}
|
||||
|
||||
static struct apic apic_bigsmp = {
|
||||
static struct apic apic_bigsmp __ro_after_init = {
|
||||
|
||||
.name = "bigsmp",
|
||||
.probe = probe_bigsmp,
|
||||
|
@ -269,7 +269,7 @@ static void hpet_msi_write_msg(struct irq_data *data, struct msi_msg *msg)
|
||||
hpet_msi_write(irq_data_get_irq_handler_data(data), msg);
|
||||
}
|
||||
|
||||
static struct irq_chip hpet_msi_controller = {
|
||||
static struct irq_chip hpet_msi_controller __ro_after_init = {
|
||||
.name = "HPET-MSI",
|
||||
.irq_unmask = hpet_msi_unmask,
|
||||
.irq_mask = hpet_msi_mask,
|
||||
|
@ -72,7 +72,7 @@ static int probe_default(void)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct apic apic_default = {
|
||||
static struct apic apic_default __ro_after_init = {
|
||||
|
||||
.name = "default",
|
||||
.probe = probe_default,
|
||||
@ -126,7 +126,7 @@ static struct apic apic_default = {
|
||||
|
||||
apic_driver(apic_default);
|
||||
|
||||
struct apic *apic = &apic_default;
|
||||
struct apic *apic __ro_after_init = &apic_default;
|
||||
EXPORT_SYMBOL_GPL(apic);
|
||||
|
||||
static int cmdline_apic __initdata;
|
||||
|
@ -227,7 +227,7 @@ static void cluster_vector_allocation_domain(int cpu, struct cpumask *retmask,
|
||||
cpumask_and(retmask, mask, per_cpu(cpus_in_cluster, cpu));
|
||||
}
|
||||
|
||||
static struct apic apic_x2apic_cluster = {
|
||||
static struct apic apic_x2apic_cluster __ro_after_init = {
|
||||
|
||||
.name = "cluster x2apic",
|
||||
.probe = x2apic_cluster_probe,
|
||||
|
@ -98,7 +98,7 @@ static int x2apic_phys_probe(void)
|
||||
return apic == &apic_x2apic_phys;
|
||||
}
|
||||
|
||||
static struct apic apic_x2apic_phys = {
|
||||
static struct apic apic_x2apic_phys __ro_after_init = {
|
||||
|
||||
.name = "physical x2apic",
|
||||
.probe = x2apic_phys_probe,
|
||||
|
@ -560,7 +560,7 @@ static int uv_probe(void)
|
||||
return apic == &apic_x2apic_uv_x;
|
||||
}
|
||||
|
||||
static struct apic __refdata apic_x2apic_uv_x = {
|
||||
static struct apic apic_x2apic_uv_x __ro_after_init = {
|
||||
|
||||
.name = "UV large system",
|
||||
.probe = uv_probe,
|
||||
|
@ -29,10 +29,13 @@
|
||||
|
||||
void common(void) {
|
||||
BLANK();
|
||||
OFFSET(TI_flags, thread_info, flags);
|
||||
OFFSET(TI_status, thread_info, status);
|
||||
OFFSET(TASK_threadsp, task_struct, thread.sp);
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
OFFSET(TASK_stack_canary, task_struct, stack_canary);
|
||||
#endif
|
||||
|
||||
BLANK();
|
||||
OFFSET(TASK_TI_flags, task_struct, thread_info.flags);
|
||||
OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
|
||||
|
||||
BLANK();
|
||||
|
@ -57,6 +57,11 @@ void foo(void)
|
||||
/* Size of SYSENTER_stack */
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
BLANK();
|
||||
OFFSET(stack_canary_offset, stack_canary, canary);
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
|
||||
BLANK();
|
||||
OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled);
|
||||
|
@ -56,6 +56,11 @@ int main(void)
|
||||
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
|
||||
BLANK();
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
|
||||
BLANK();
|
||||
#endif
|
||||
|
||||
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
|
||||
DEFINE(NR_syscalls, sizeof(syscalls_64));
|
||||
|
||||
|
@ -1264,9 +1264,14 @@ static __init int setup_disablecpuid(char *arg)
|
||||
__setup("clearcpuid=", setup_disablecpuid);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
|
||||
struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1,
|
||||
(unsigned long) debug_idt_table };
|
||||
struct desc_ptr idt_descr __ro_after_init = {
|
||||
.size = NR_VECTORS * 16 - 1,
|
||||
.address = (unsigned long) idt_table,
|
||||
};
|
||||
const struct desc_ptr debug_idt_descr = {
|
||||
.size = NR_VECTORS * 16 - 1,
|
||||
.address = (unsigned long) debug_idt_table,
|
||||
};
|
||||
|
||||
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||
irq_stack_union) __aligned(PAGE_SIZE) __visible;
|
||||
@ -1280,7 +1285,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
|
||||
EXPORT_PER_CPU_SYMBOL(current_task);
|
||||
|
||||
DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
||||
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
|
||||
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
|
||||
|
||||
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
||||
|
||||
@ -1304,11 +1309,6 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
/* May not be marked __init: used by software suspend */
|
||||
void syscall_init(void)
|
||||
{
|
||||
/*
|
||||
* LSTAR and STAR live in a bit strange symbiosis.
|
||||
* They both write to the same internal register. STAR allows to
|
||||
* set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
||||
*/
|
||||
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
|
||||
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
||||
|
||||
|
@ -72,14 +72,14 @@ static DEFINE_MUTEX(mtrr_mutex);
|
||||
u64 size_or_mask, size_and_mask;
|
||||
static bool mtrr_aps_delayed_init;
|
||||
|
||||
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
|
||||
static const struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM] __ro_after_init;
|
||||
|
||||
const struct mtrr_ops *mtrr_if;
|
||||
|
||||
static void set_mtrr(unsigned int reg, unsigned long base,
|
||||
unsigned long size, mtrr_type type);
|
||||
|
||||
void set_mtrr_ops(const struct mtrr_ops *ops)
|
||||
void __init set_mtrr_ops(const struct mtrr_ops *ops)
|
||||
{
|
||||
if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
|
||||
mtrr_ops[ops->vendor] = ops;
|
||||
|
@ -54,7 +54,7 @@ void fill_mtrr_var_range(unsigned int index,
|
||||
bool get_mtrr_state(void);
|
||||
void mtrr_bp_pat_init(void);
|
||||
|
||||
extern void set_mtrr_ops(const struct mtrr_ops *ops);
|
||||
extern void __init set_mtrr_ops(const struct mtrr_ops *ops);
|
||||
|
||||
extern u64 size_or_mask, size_and_mask;
|
||||
extern const struct mtrr_ops *mtrr_if;
|
||||
|
@ -17,7 +17,7 @@
|
||||
#include <linux/sysfs.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
#include <asm/unwind.h>
|
||||
|
||||
int panic_on_unrecovered_nmi;
|
||||
int panic_on_io_nmi;
|
||||
@ -25,11 +25,29 @@ unsigned int code_bytes = 64;
|
||||
int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE;
|
||||
static int die_counter;
|
||||
|
||||
static void printk_stack_address(unsigned long address, int reliable,
|
||||
void *data)
|
||||
bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info)
|
||||
{
|
||||
unsigned long *begin = task_stack_page(task);
|
||||
unsigned long *end = task_stack_page(task) + THREAD_SIZE;
|
||||
|
||||
if (stack < begin || stack >= end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_TASK;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
info->next_sp = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void printk_stack_address(unsigned long address, int reliable,
|
||||
char *log_lvl)
|
||||
{
|
||||
touch_nmi_watchdog();
|
||||
printk("%s [<%p>] %s%pB\n",
|
||||
(char *)data, (void *)address, reliable ? "" : "? ",
|
||||
log_lvl, (void *)address, reliable ? "" : "? ",
|
||||
(void *)address);
|
||||
}
|
||||
|
||||
@ -38,176 +56,120 @@ void printk_address(unsigned long address)
|
||||
pr_cont(" [<%p>] %pS\n", (void *)address, (void *)address);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static void
|
||||
print_ftrace_graph_addr(unsigned long addr, void *data,
|
||||
const struct stacktrace_ops *ops,
|
||||
struct task_struct *task, int *graph)
|
||||
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, char *log_lvl)
|
||||
{
|
||||
unsigned long ret_addr;
|
||||
int index;
|
||||
struct unwind_state state;
|
||||
struct stack_info stack_info = {0};
|
||||
unsigned long visit_mask = 0;
|
||||
int graph_idx = 0;
|
||||
|
||||
if (addr != (unsigned long)return_to_handler)
|
||||
return;
|
||||
printk("%sCall Trace:\n", log_lvl);
|
||||
|
||||
index = task->curr_ret_stack;
|
||||
|
||||
if (!task->ret_stack || index < *graph)
|
||||
return;
|
||||
|
||||
index -= *graph;
|
||||
ret_addr = task->ret_stack[index].ret;
|
||||
|
||||
ops->address(data, ret_addr, 1);
|
||||
|
||||
(*graph)++;
|
||||
}
|
||||
#else
|
||||
static inline void
|
||||
print_ftrace_graph_addr(unsigned long addr, void *data,
|
||||
const struct stacktrace_ops *ops,
|
||||
struct task_struct *task, int *graph)
|
||||
{ }
|
||||
#endif
|
||||
|
||||
/*
|
||||
* x86-64 can have up to three kernel stacks:
|
||||
* process stack
|
||||
* interrupt stack
|
||||
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
|
||||
*/
|
||||
|
||||
static inline int valid_stack_ptr(struct task_struct *task,
|
||||
void *p, unsigned int size, void *end)
|
||||
{
|
||||
void *t = task_stack_page(task);
|
||||
if (end) {
|
||||
if (p < end && p >= (end-THREAD_SIZE))
|
||||
return 1;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
return p >= t && p < t + THREAD_SIZE - size;
|
||||
}
|
||||
|
||||
unsigned long
|
||||
print_context_stack(struct task_struct *task,
|
||||
unsigned long *stack, unsigned long bp,
|
||||
const struct stacktrace_ops *ops, void *data,
|
||||
unsigned long *end, int *graph)
|
||||
{
|
||||
struct stack_frame *frame = (struct stack_frame *)bp;
|
||||
unwind_start(&state, task, regs, stack);
|
||||
|
||||
/*
|
||||
* If we overflowed the stack into a guard page, jump back to the
|
||||
* bottom of the usable stack.
|
||||
* Iterate through the stacks, starting with the current stack pointer.
|
||||
* Each stack has a pointer to the next one.
|
||||
*
|
||||
* x86-64 can have several stacks:
|
||||
* - task stack
|
||||
* - interrupt stack
|
||||
* - HW exception stacks (double fault, nmi, debug, mce)
|
||||
*
|
||||
* x86-32 can have up to three stacks:
|
||||
* - task stack
|
||||
* - softirq stack
|
||||
* - hardirq stack
|
||||
*/
|
||||
if ((unsigned long)task_stack_page(task) - (unsigned long)stack <
|
||||
PAGE_SIZE)
|
||||
stack = (unsigned long *)task_stack_page(task);
|
||||
for (; stack; stack = stack_info.next_sp) {
|
||||
const char *str_begin, *str_end;
|
||||
|
||||
while (valid_stack_ptr(task, stack, sizeof(*stack), end)) {
|
||||
unsigned long addr;
|
||||
/*
|
||||
* If we overflowed the task stack into a guard page, jump back
|
||||
* to the bottom of the usable stack.
|
||||
*/
|
||||
if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
|
||||
stack = task_stack_page(task);
|
||||
|
||||
addr = *stack;
|
||||
if (__kernel_text_address(addr)) {
|
||||
if ((unsigned long) stack == bp + sizeof(long)) {
|
||||
ops->address(data, addr, 1);
|
||||
frame = frame->next_frame;
|
||||
bp = (unsigned long) frame;
|
||||
} else {
|
||||
ops->address(data, addr, 0);
|
||||
}
|
||||
print_ftrace_graph_addr(addr, data, ops, task, graph);
|
||||
if (get_stack_info(stack, task, &stack_info, &visit_mask))
|
||||
break;
|
||||
|
||||
stack_type_str(stack_info.type, &str_begin, &str_end);
|
||||
if (str_begin)
|
||||
printk("%s <%s> ", log_lvl, str_begin);
|
||||
|
||||
/*
|
||||
* Scan the stack, printing any text addresses we find. At the
|
||||
* same time, follow proper stack frames with the unwinder.
|
||||
*
|
||||
* Addresses found during the scan which are not reported by
|
||||
* the unwinder are considered to be additional clues which are
|
||||
* sometimes useful for debugging and are prefixed with '?'.
|
||||
* This also serves as a failsafe option in case the unwinder
|
||||
* goes off in the weeds.
|
||||
*/
|
||||
for (; stack < stack_info.end; stack++) {
|
||||
unsigned long real_addr;
|
||||
int reliable = 0;
|
||||
unsigned long addr = *stack;
|
||||
unsigned long *ret_addr_p =
|
||||
unwind_get_return_address_ptr(&state);
|
||||
|
||||
if (!__kernel_text_address(addr))
|
||||
continue;
|
||||
|
||||
if (stack == ret_addr_p)
|
||||
reliable = 1;
|
||||
|
||||
/*
|
||||
* When function graph tracing is enabled for a
|
||||
* function, its return address on the stack is
|
||||
* replaced with the address of an ftrace handler
|
||||
* (return_to_handler). In that case, before printing
|
||||
* the "real" address, we want to print the handler
|
||||
* address as an "unreliable" hint that function graph
|
||||
* tracing was involved.
|
||||
*/
|
||||
real_addr = ftrace_graph_ret_addr(task, &graph_idx,
|
||||
addr, stack);
|
||||
if (real_addr != addr)
|
||||
printk_stack_address(addr, 0, log_lvl);
|
||||
printk_stack_address(real_addr, reliable, log_lvl);
|
||||
|
||||
if (!reliable)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Get the next frame from the unwinder. No need to
|
||||
* check for an error: if anything goes wrong, the rest
|
||||
* of the addresses will just be printed as unreliable.
|
||||
*/
|
||||
unwind_next_frame(&state);
|
||||
}
|
||||
stack++;
|
||||
|
||||
if (str_end)
|
||||
printk("%s <%s> ", log_lvl, str_end);
|
||||
}
|
||||
return bp;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(print_context_stack);
|
||||
|
||||
unsigned long
|
||||
print_context_stack_bp(struct task_struct *task,
|
||||
unsigned long *stack, unsigned long bp,
|
||||
const struct stacktrace_ops *ops, void *data,
|
||||
unsigned long *end, int *graph)
|
||||
{
|
||||
struct stack_frame *frame = (struct stack_frame *)bp;
|
||||
unsigned long *ret_addr = &frame->return_address;
|
||||
|
||||
while (valid_stack_ptr(task, ret_addr, sizeof(*ret_addr), end)) {
|
||||
unsigned long addr = *ret_addr;
|
||||
|
||||
if (!__kernel_text_address(addr))
|
||||
break;
|
||||
|
||||
if (ops->address(data, addr, 1))
|
||||
break;
|
||||
frame = frame->next_frame;
|
||||
ret_addr = &frame->return_address;
|
||||
print_ftrace_graph_addr(addr, data, ops, task, graph);
|
||||
}
|
||||
|
||||
return (unsigned long)frame;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(print_context_stack_bp);
|
||||
|
||||
static int print_trace_stack(void *data, char *name)
|
||||
{
|
||||
printk("%s <%s> ", (char *)data, name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Print one address/symbol entries per line.
|
||||
*/
|
||||
static int print_trace_address(void *data, unsigned long addr, int reliable)
|
||||
{
|
||||
touch_nmi_watchdog();
|
||||
printk_stack_address(addr, reliable, data);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct stacktrace_ops print_trace_ops = {
|
||||
.stack = print_trace_stack,
|
||||
.address = print_trace_address,
|
||||
.walk_stack = print_context_stack,
|
||||
};
|
||||
|
||||
void
|
||||
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp, char *log_lvl)
|
||||
{
|
||||
printk("%sCall Trace:\n", log_lvl);
|
||||
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
||||
}
|
||||
|
||||
void show_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp)
|
||||
{
|
||||
show_trace_log_lvl(task, regs, stack, bp, "");
|
||||
}
|
||||
|
||||
void show_stack(struct task_struct *task, unsigned long *sp)
|
||||
{
|
||||
unsigned long bp = 0;
|
||||
unsigned long stack;
|
||||
task = task ? : current;
|
||||
|
||||
/*
|
||||
* Stack frames below this one aren't interesting. Don't show them
|
||||
* if we're printing for %current.
|
||||
*/
|
||||
if (!sp && (!task || task == current)) {
|
||||
sp = &stack;
|
||||
bp = stack_frame(current, NULL);
|
||||
}
|
||||
if (!sp && task == current)
|
||||
sp = get_stack_pointer(current, NULL);
|
||||
|
||||
show_stack_log_lvl(task, NULL, sp, bp, "");
|
||||
show_stack_log_lvl(task, NULL, sp, "");
|
||||
}
|
||||
|
||||
void show_stack_regs(struct pt_regs *regs)
|
||||
{
|
||||
show_stack_log_lvl(current, regs, (unsigned long *)regs->sp, regs->bp, "");
|
||||
show_stack_log_lvl(current, regs, NULL, "");
|
||||
}
|
||||
|
||||
static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED;
|
||||
|
@ -16,93 +16,121 @@
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
static void *is_irq_stack(void *p, void *irq)
|
||||
void stack_type_str(enum stack_type type, const char **begin, const char **end)
|
||||
{
|
||||
if (p < irq || p >= (irq + THREAD_SIZE))
|
||||
return NULL;
|
||||
return irq + THREAD_SIZE;
|
||||
switch (type) {
|
||||
case STACK_TYPE_IRQ:
|
||||
case STACK_TYPE_SOFTIRQ:
|
||||
*begin = "IRQ";
|
||||
*end = "EOI";
|
||||
break;
|
||||
default:
|
||||
*begin = NULL;
|
||||
*end = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void *is_hardirq_stack(unsigned long *stack, int cpu)
|
||||
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
void *irq = per_cpu(hardirq_stack, cpu);
|
||||
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
|
||||
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
||||
|
||||
return is_irq_stack(stack, irq);
|
||||
/*
|
||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||
* It just means the stack is empty.
|
||||
*/
|
||||
if (stack < begin || stack > end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_IRQ;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
|
||||
/*
|
||||
* See irq_32.c -- the next stack pointer is stored at the beginning of
|
||||
* the stack.
|
||||
*/
|
||||
info->next_sp = (unsigned long *)*begin;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void *is_softirq_stack(unsigned long *stack, int cpu)
|
||||
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
void *irq = per_cpu(softirq_stack, cpu);
|
||||
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
|
||||
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
||||
|
||||
return is_irq_stack(stack, irq);
|
||||
/*
|
||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||
* It just means the stack is empty.
|
||||
*/
|
||||
if (stack < begin || stack > end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_SOFTIRQ;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
|
||||
/*
|
||||
* The next stack pointer is stored at the beginning of the stack.
|
||||
* See irq_32.c.
|
||||
*/
|
||||
info->next_sp = (unsigned long *)*begin;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp,
|
||||
const struct stacktrace_ops *ops, void *data)
|
||||
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info, unsigned long *visit_mask)
|
||||
{
|
||||
const unsigned cpu = get_cpu();
|
||||
int graph = 0;
|
||||
u32 *prev_esp;
|
||||
if (!stack)
|
||||
goto unknown;
|
||||
|
||||
if (!task)
|
||||
task = current;
|
||||
task = task ? : current;
|
||||
|
||||
if (!stack) {
|
||||
unsigned long dummy;
|
||||
if (in_task_stack(stack, task, info))
|
||||
goto recursion_check;
|
||||
|
||||
stack = &dummy;
|
||||
if (task != current)
|
||||
stack = (unsigned long *)task->thread.sp;
|
||||
if (task != current)
|
||||
goto unknown;
|
||||
|
||||
if (in_hardirq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_softirq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
goto unknown;
|
||||
|
||||
recursion_check:
|
||||
/*
|
||||
* Make sure we don't iterate through any given stack more than once.
|
||||
* If it comes up a second time then there's something wrong going on:
|
||||
* just break out and report an unknown stack type.
|
||||
*/
|
||||
if (visit_mask) {
|
||||
if (*visit_mask & (1UL << info->type))
|
||||
goto unknown;
|
||||
*visit_mask |= 1UL << info->type;
|
||||
}
|
||||
|
||||
if (!bp)
|
||||
bp = stack_frame(task, regs);
|
||||
return 0;
|
||||
|
||||
for (;;) {
|
||||
void *end_stack;
|
||||
|
||||
end_stack = is_hardirq_stack(stack, cpu);
|
||||
if (!end_stack)
|
||||
end_stack = is_softirq_stack(stack, cpu);
|
||||
|
||||
bp = ops->walk_stack(task, stack, bp, ops, data,
|
||||
end_stack, &graph);
|
||||
|
||||
/* Stop if not on irq stack */
|
||||
if (!end_stack)
|
||||
break;
|
||||
|
||||
/* The previous esp is saved on the bottom of the stack */
|
||||
prev_esp = (u32 *)(end_stack - THREAD_SIZE);
|
||||
stack = (unsigned long *)*prev_esp;
|
||||
if (!stack)
|
||||
break;
|
||||
|
||||
if (ops->stack(data, "IRQ") < 0)
|
||||
break;
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
put_cpu();
|
||||
unknown:
|
||||
info->type = STACK_TYPE_UNKNOWN;
|
||||
return -EINVAL;
|
||||
}
|
||||
EXPORT_SYMBOL(dump_trace);
|
||||
|
||||
void
|
||||
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl)
|
||||
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, char *log_lvl)
|
||||
{
|
||||
unsigned long *stack;
|
||||
int i;
|
||||
|
||||
if (sp == NULL) {
|
||||
if (regs)
|
||||
sp = (unsigned long *)regs->sp;
|
||||
else if (task)
|
||||
sp = (unsigned long *)task->thread.sp;
|
||||
else
|
||||
sp = (unsigned long *)&sp;
|
||||
}
|
||||
if (!try_get_task_stack(task))
|
||||
return;
|
||||
|
||||
sp = sp ? : get_stack_pointer(task, regs);
|
||||
|
||||
stack = sp;
|
||||
for (i = 0; i < kstack_depth_to_print; i++) {
|
||||
@ -117,7 +145,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
pr_cont("\n");
|
||||
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
|
||||
show_trace_log_lvl(task, regs, sp, log_lvl);
|
||||
|
||||
put_task_stack(task);
|
||||
}
|
||||
|
||||
|
||||
@ -139,7 +169,7 @@ void show_regs(struct pt_regs *regs)
|
||||
u8 *ip;
|
||||
|
||||
pr_emerg("Stack:\n");
|
||||
show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG);
|
||||
show_stack_log_lvl(current, regs, NULL, KERN_EMERG);
|
||||
|
||||
pr_emerg("Code:");
|
||||
|
||||
|
@ -16,261 +16,145 @@
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
|
||||
#define N_EXCEPTION_STACKS_END \
|
||||
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
|
||||
|
||||
static char x86_stack_ids[][8] = {
|
||||
[ DEBUG_STACK-1 ] = "#DB",
|
||||
[ NMI_STACK-1 ] = "NMI",
|
||||
[ DOUBLEFAULT_STACK-1 ] = "#DF",
|
||||
[ MCE_STACK-1 ] = "#MC",
|
||||
#if DEBUG_STKSZ > EXCEPTION_STKSZ
|
||||
[ N_EXCEPTION_STACKS ...
|
||||
N_EXCEPTION_STACKS_END ] = "#DB[?]"
|
||||
#endif
|
||||
static char *exception_stack_names[N_EXCEPTION_STACKS] = {
|
||||
[ DOUBLEFAULT_STACK-1 ] = "#DF",
|
||||
[ NMI_STACK-1 ] = "NMI",
|
||||
[ DEBUG_STACK-1 ] = "#DB",
|
||||
[ MCE_STACK-1 ] = "#MC",
|
||||
};
|
||||
|
||||
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
|
||||
unsigned *usedp, char **idp)
|
||||
static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
|
||||
void stack_type_str(enum stack_type type, const char **begin, const char **end)
|
||||
{
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
||||
|
||||
switch (type) {
|
||||
case STACK_TYPE_IRQ:
|
||||
*begin = "IRQ";
|
||||
*end = "EOI";
|
||||
break;
|
||||
case STACK_TYPE_EXCEPTION ... STACK_TYPE_EXCEPTION_LAST:
|
||||
*begin = exception_stack_names[type - STACK_TYPE_EXCEPTION];
|
||||
*end = "EOE";
|
||||
break;
|
||||
default:
|
||||
*begin = NULL;
|
||||
*end = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
unsigned long *begin, *end;
|
||||
struct pt_regs *regs;
|
||||
unsigned k;
|
||||
|
||||
/*
|
||||
* Iterate over all exception stacks, and figure out whether
|
||||
* 'stack' is in one of them:
|
||||
*/
|
||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
||||
|
||||
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
|
||||
unsigned long end = per_cpu(orig_ist, cpu).ist[k];
|
||||
/*
|
||||
* Is 'stack' above this exception frame's end?
|
||||
* If yes then skip to the next frame.
|
||||
*/
|
||||
if (stack >= end)
|
||||
end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
|
||||
begin = end - (exception_stack_sizes[k] / sizeof(long));
|
||||
regs = (struct pt_regs *)end - 1;
|
||||
|
||||
if (stack < begin || stack >= end)
|
||||
continue;
|
||||
/*
|
||||
* Is 'stack' above this exception frame's start address?
|
||||
* If yes then we found the right frame.
|
||||
*/
|
||||
if (stack >= end - EXCEPTION_STKSZ) {
|
||||
/*
|
||||
* Make sure we only iterate through an exception
|
||||
* stack once. If it comes up for the second time
|
||||
* then there's something wrong going on - just
|
||||
* break out and return NULL:
|
||||
*/
|
||||
if (*usedp & (1U << k))
|
||||
break;
|
||||
*usedp |= 1U << k;
|
||||
*idp = x86_stack_ids[k];
|
||||
return (unsigned long *)end;
|
||||
}
|
||||
/*
|
||||
* If this is a debug stack, and if it has a larger size than
|
||||
* the usual exception stacks, then 'stack' might still
|
||||
* be within the lower portion of the debug stack:
|
||||
*/
|
||||
#if DEBUG_STKSZ > EXCEPTION_STKSZ
|
||||
if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
|
||||
unsigned j = N_EXCEPTION_STACKS - 1;
|
||||
|
||||
/*
|
||||
* Black magic. A large debug stack is composed of
|
||||
* multiple exception stack entries, which we
|
||||
* iterate through now. Dont look:
|
||||
*/
|
||||
do {
|
||||
++j;
|
||||
end -= EXCEPTION_STKSZ;
|
||||
x86_stack_ids[j][4] = '1' +
|
||||
(j - N_EXCEPTION_STACKS);
|
||||
} while (stack < end - EXCEPTION_STKSZ);
|
||||
if (*usedp & (1U << j))
|
||||
break;
|
||||
*usedp |= 1U << j;
|
||||
*idp = x86_stack_ids[j];
|
||||
return (unsigned long *)end;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
info->type = STACK_TYPE_EXCEPTION + k;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
info->next_sp = (unsigned long *)regs->sp;
|
||||
|
||||
static inline int
|
||||
in_irq_stack(unsigned long *stack, unsigned long *irq_stack,
|
||||
unsigned long *irq_stack_end)
|
||||
{
|
||||
return (stack >= irq_stack && stack < irq_stack_end);
|
||||
}
|
||||
|
||||
static const unsigned long irq_stack_size =
|
||||
(IRQ_STACK_SIZE - 64) / sizeof(unsigned long);
|
||||
|
||||
enum stack_type {
|
||||
STACK_IS_UNKNOWN,
|
||||
STACK_IS_NORMAL,
|
||||
STACK_IS_EXCEPTION,
|
||||
STACK_IS_IRQ,
|
||||
};
|
||||
|
||||
static enum stack_type
|
||||
analyze_stack(int cpu, struct task_struct *task, unsigned long *stack,
|
||||
unsigned long **stack_end, unsigned long *irq_stack,
|
||||
unsigned *used, char **id)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
addr = ((unsigned long)stack & (~(THREAD_SIZE - 1)));
|
||||
if ((unsigned long)task_stack_page(task) == addr)
|
||||
return STACK_IS_NORMAL;
|
||||
|
||||
*stack_end = in_exception_stack(cpu, (unsigned long)stack,
|
||||
used, id);
|
||||
if (*stack_end)
|
||||
return STACK_IS_EXCEPTION;
|
||||
|
||||
if (!irq_stack)
|
||||
return STACK_IS_NORMAL;
|
||||
|
||||
*stack_end = irq_stack;
|
||||
irq_stack = irq_stack - irq_stack_size;
|
||||
|
||||
if (in_irq_stack(stack, irq_stack, *stack_end))
|
||||
return STACK_IS_IRQ;
|
||||
|
||||
return STACK_IS_UNKNOWN;
|
||||
}
|
||||
|
||||
/*
|
||||
* x86-64 can have up to three kernel stacks:
|
||||
* process stack
|
||||
* interrupt stack
|
||||
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
|
||||
*/
|
||||
|
||||
void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, unsigned long bp,
|
||||
const struct stacktrace_ops *ops, void *data)
|
||||
{
|
||||
const unsigned cpu = get_cpu();
|
||||
unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu);
|
||||
unsigned long dummy;
|
||||
unsigned used = 0;
|
||||
int graph = 0;
|
||||
int done = 0;
|
||||
|
||||
if (!task)
|
||||
task = current;
|
||||
|
||||
if (!stack) {
|
||||
if (regs)
|
||||
stack = (unsigned long *)regs->sp;
|
||||
else if (task != current)
|
||||
stack = (unsigned long *)task->thread.sp;
|
||||
else
|
||||
stack = &dummy;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!bp)
|
||||
bp = stack_frame(task, regs);
|
||||
/*
|
||||
* Print function call entries in all stacks, starting at the
|
||||
* current stack address. If the stacks consist of nested
|
||||
* exceptions
|
||||
*/
|
||||
while (!done) {
|
||||
unsigned long *stack_end;
|
||||
enum stack_type stype;
|
||||
char *id;
|
||||
return false;
|
||||
}
|
||||
|
||||
stype = analyze_stack(cpu, task, stack, &stack_end,
|
||||
irq_stack, &used, &id);
|
||||
|
||||
/* Default finish unless specified to continue */
|
||||
done = 1;
|
||||
|
||||
switch (stype) {
|
||||
|
||||
/* Break out early if we are on the thread stack */
|
||||
case STACK_IS_NORMAL:
|
||||
break;
|
||||
|
||||
case STACK_IS_EXCEPTION:
|
||||
|
||||
if (ops->stack(data, id) < 0)
|
||||
break;
|
||||
|
||||
bp = ops->walk_stack(task, stack, bp, ops,
|
||||
data, stack_end, &graph);
|
||||
ops->stack(data, "<EOE>");
|
||||
/*
|
||||
* We link to the next stack via the
|
||||
* second-to-last pointer (index -2 to end) in the
|
||||
* exception stack:
|
||||
*/
|
||||
stack = (unsigned long *) stack_end[-2];
|
||||
done = 0;
|
||||
break;
|
||||
|
||||
case STACK_IS_IRQ:
|
||||
|
||||
if (ops->stack(data, "IRQ") < 0)
|
||||
break;
|
||||
bp = ops->walk_stack(task, stack, bp,
|
||||
ops, data, stack_end, &graph);
|
||||
/*
|
||||
* We link to the next stack (which would be
|
||||
* the process stack normally) the last
|
||||
* pointer (index -1 to end) in the IRQ stack:
|
||||
*/
|
||||
stack = (unsigned long *) (stack_end[-1]);
|
||||
irq_stack = NULL;
|
||||
ops->stack(data, "EOI");
|
||||
done = 0;
|
||||
break;
|
||||
|
||||
case STACK_IS_UNKNOWN:
|
||||
ops->stack(data, "UNK");
|
||||
break;
|
||||
}
|
||||
}
|
||||
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
|
||||
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
|
||||
|
||||
/*
|
||||
* This handles the process stack:
|
||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||
* It just means the stack is empty.
|
||||
*/
|
||||
bp = ops->walk_stack(task, stack, bp, ops, data, NULL, &graph);
|
||||
put_cpu();
|
||||
}
|
||||
EXPORT_SYMBOL(dump_trace);
|
||||
if (stack < begin || stack > end)
|
||||
return false;
|
||||
|
||||
void
|
||||
show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, unsigned long bp, char *log_lvl)
|
||||
info->type = STACK_TYPE_IRQ;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
|
||||
/*
|
||||
* The next stack pointer is the first thing pushed by the entry code
|
||||
* after switching to the irq stack.
|
||||
*/
|
||||
info->next_sp = (unsigned long *)*(end - 1);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info, unsigned long *visit_mask)
|
||||
{
|
||||
if (!stack)
|
||||
goto unknown;
|
||||
|
||||
task = task ? : current;
|
||||
|
||||
if (in_task_stack(stack, task, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (task != current)
|
||||
goto unknown;
|
||||
|
||||
if (in_exception_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_irq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
goto unknown;
|
||||
|
||||
recursion_check:
|
||||
/*
|
||||
* Make sure we don't iterate through any given stack more than once.
|
||||
* If it comes up a second time then there's something wrong going on:
|
||||
* just break out and report an unknown stack type.
|
||||
*/
|
||||
if (visit_mask) {
|
||||
if (*visit_mask & (1UL << info->type))
|
||||
goto unknown;
|
||||
*visit_mask |= 1UL << info->type;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
unknown:
|
||||
info->type = STACK_TYPE_UNKNOWN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
void show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *sp, char *log_lvl)
|
||||
{
|
||||
unsigned long *irq_stack_end;
|
||||
unsigned long *irq_stack;
|
||||
unsigned long *stack;
|
||||
int cpu;
|
||||
int i;
|
||||
|
||||
preempt_disable();
|
||||
cpu = smp_processor_id();
|
||||
if (!try_get_task_stack(task))
|
||||
return;
|
||||
|
||||
irq_stack_end = (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
|
||||
irq_stack = (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
|
||||
irq_stack_end = (unsigned long *)this_cpu_read(irq_stack_ptr);
|
||||
irq_stack = irq_stack_end - (IRQ_STACK_SIZE / sizeof(long));
|
||||
|
||||
/*
|
||||
* Debugging aid: "show_stack(NULL, NULL);" prints the
|
||||
* back trace for this cpu:
|
||||
*/
|
||||
if (sp == NULL) {
|
||||
if (regs)
|
||||
sp = (unsigned long *)regs->sp;
|
||||
else if (task)
|
||||
sp = (unsigned long *)task->thread.sp;
|
||||
else
|
||||
sp = (unsigned long *)&sp;
|
||||
}
|
||||
sp = sp ? : get_stack_pointer(task, regs);
|
||||
|
||||
stack = sp;
|
||||
for (i = 0; i < kstack_depth_to_print; i++) {
|
||||
@ -299,18 +183,17 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
stack++;
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
preempt_enable();
|
||||
|
||||
pr_cont("\n");
|
||||
show_trace_log_lvl(task, regs, sp, bp, log_lvl);
|
||||
show_trace_log_lvl(task, regs, sp, log_lvl);
|
||||
|
||||
put_task_stack(task);
|
||||
}
|
||||
|
||||
void show_regs(struct pt_regs *regs)
|
||||
{
|
||||
int i;
|
||||
unsigned long sp;
|
||||
|
||||
sp = regs->sp;
|
||||
show_regs_print_info(KERN_DEFAULT);
|
||||
__show_regs(regs, 1);
|
||||
|
||||
@ -325,8 +208,7 @@ void show_regs(struct pt_regs *regs)
|
||||
u8 *ip;
|
||||
|
||||
printk(KERN_DEFAULT "Stack:\n");
|
||||
show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
|
||||
0, KERN_DEFAULT);
|
||||
show_stack_log_lvl(current, regs, NULL, KERN_DEFAULT);
|
||||
|
||||
printk(KERN_DEFAULT "Code: ");
|
||||
|
||||
|
@ -317,7 +317,6 @@ static void __init fpu__init_system_ctx_switch(void)
|
||||
on_boot_cpu = 0;
|
||||
|
||||
WARN_ON_FPU(current->thread.fpu.fpstate_active);
|
||||
current_thread_info()->status = 0;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_XSAVEOPT) && eagerfpu != DISABLE)
|
||||
eagerfpu = ENABLE;
|
||||
|
@ -1029,7 +1029,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
|
||||
}
|
||||
|
||||
if (ftrace_push_return_trace(old, self_addr, &trace.depth,
|
||||
frame_pointer) == -EBUSY) {
|
||||
frame_pointer, parent) == -EBUSY) {
|
||||
*parent = old;
|
||||
return;
|
||||
}
|
||||
|
@ -94,7 +94,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
|
||||
*/
|
||||
__HEAD
|
||||
ENTRY(startup_32)
|
||||
movl pa(stack_start),%ecx
|
||||
movl pa(initial_stack),%ecx
|
||||
|
||||
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
|
||||
us to not reload segments */
|
||||
@ -286,7 +286,7 @@ num_subarch_entries = (. - subarch_entries) / 4
|
||||
* start_secondary().
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movl stack_start, %ecx
|
||||
movl initial_stack, %ecx
|
||||
movl %ecx, %esp
|
||||
jmp *(initial_code)
|
||||
ENDPROC(start_cpu0)
|
||||
@ -307,7 +307,7 @@ ENTRY(startup_32_smp)
|
||||
movl %eax,%es
|
||||
movl %eax,%fs
|
||||
movl %eax,%gs
|
||||
movl pa(stack_start),%ecx
|
||||
movl pa(initial_stack),%ecx
|
||||
movl %eax,%ss
|
||||
leal -__PAGE_OFFSET(%ecx),%esp
|
||||
|
||||
@ -703,7 +703,7 @@ ENTRY(initial_page_table)
|
||||
|
||||
.data
|
||||
.balign 4
|
||||
ENTRY(stack_start)
|
||||
ENTRY(initial_stack)
|
||||
.long init_thread_union+THREAD_SIZE
|
||||
|
||||
__INITRODATA
|
||||
|
@ -66,7 +66,7 @@ startup_64:
|
||||
*/
|
||||
|
||||
/*
|
||||
* Setup stack for verify_cpu(). "-8" because stack_start is defined
|
||||
* Setup stack for verify_cpu(). "-8" because initial_stack is defined
|
||||
* this way, see below. Our best guess is a NULL ptr for stack
|
||||
* termination heuristics and we don't want to break anything which
|
||||
* might depend on it (kgdb, ...).
|
||||
@ -226,7 +226,7 @@ ENTRY(secondary_startup_64)
|
||||
movq %rax, %cr0
|
||||
|
||||
/* Setup a boot time stack */
|
||||
movq stack_start(%rip), %rsp
|
||||
movq initial_stack(%rip), %rsp
|
||||
|
||||
/* zero EFLAGS after setting rsp */
|
||||
pushq $0
|
||||
@ -310,7 +310,7 @@ ENDPROC(secondary_startup_64)
|
||||
* start_secondary().
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movq stack_start(%rip),%rsp
|
||||
movq initial_stack(%rip),%rsp
|
||||
movq initial_code(%rip),%rax
|
||||
pushq $0 # fake return address to stop unwinder
|
||||
pushq $__KERNEL_CS # set correct cs
|
||||
@ -319,17 +319,15 @@ ENTRY(start_cpu0)
|
||||
ENDPROC(start_cpu0)
|
||||
#endif
|
||||
|
||||
/* SMP bootup changes these two */
|
||||
/* Both SMP bootup and ACPI suspend change these variables */
|
||||
__REFDATA
|
||||
.balign 8
|
||||
GLOBAL(initial_code)
|
||||
.quad x86_64_start_kernel
|
||||
GLOBAL(initial_gs)
|
||||
.quad INIT_PER_CPU_VAR(irq_stack_union)
|
||||
|
||||
GLOBAL(stack_start)
|
||||
GLOBAL(initial_stack)
|
||||
.quad init_thread_union+THREAD_SIZE-8
|
||||
.word 0
|
||||
__FINITDATA
|
||||
|
||||
bad_address:
|
||||
|
@ -40,8 +40,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
|
||||
if (user_mode(regs))
|
||||
return;
|
||||
|
||||
if (regs->sp >= curbase + sizeof(struct thread_info) +
|
||||
sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
|
||||
if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
|
||||
regs->sp <= curbase + THREAD_SIZE)
|
||||
return;
|
||||
|
||||
|
@ -50,6 +50,7 @@
|
||||
#include <asm/apicdef.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/nmi.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
|
||||
{
|
||||
@ -166,21 +167,19 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
|
||||
gdb_regs[GDB_DX] = 0;
|
||||
gdb_regs[GDB_SI] = 0;
|
||||
gdb_regs[GDB_DI] = 0;
|
||||
gdb_regs[GDB_BP] = *(unsigned long *)p->thread.sp;
|
||||
gdb_regs[GDB_BP] = ((struct inactive_task_frame *)p->thread.sp)->bp;
|
||||
#ifdef CONFIG_X86_32
|
||||
gdb_regs[GDB_DS] = __KERNEL_DS;
|
||||
gdb_regs[GDB_ES] = __KERNEL_DS;
|
||||
gdb_regs[GDB_PS] = 0;
|
||||
gdb_regs[GDB_CS] = __KERNEL_CS;
|
||||
gdb_regs[GDB_PC] = p->thread.ip;
|
||||
gdb_regs[GDB_SS] = __KERNEL_DS;
|
||||
gdb_regs[GDB_FS] = 0xFFFF;
|
||||
gdb_regs[GDB_GS] = 0xFFFF;
|
||||
#else
|
||||
gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8);
|
||||
gdb_regs32[GDB_PS] = 0;
|
||||
gdb_regs32[GDB_CS] = __KERNEL_CS;
|
||||
gdb_regs32[GDB_SS] = __KERNEL_DS;
|
||||
gdb_regs[GDB_PC] = 0;
|
||||
gdb_regs[GDB_R8] = 0;
|
||||
gdb_regs[GDB_R9] = 0;
|
||||
gdb_regs[GDB_R10] = 0;
|
||||
@ -190,6 +189,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
|
||||
gdb_regs[GDB_R14] = 0;
|
||||
gdb_regs[GDB_R15] = 0;
|
||||
#endif
|
||||
gdb_regs[GDB_PC] = 0;
|
||||
gdb_regs[GDB_SP] = p->thread.sp;
|
||||
}
|
||||
|
||||
|
@ -184,7 +184,7 @@ out:
|
||||
|
||||
static struct kobj_attribute type_attr = __ATTR_RO(type);
|
||||
|
||||
static struct bin_attribute data_attr = {
|
||||
static struct bin_attribute data_attr __ro_after_init = {
|
||||
.attr = {
|
||||
.name = "data",
|
||||
.mode = S_IRUGO,
|
||||
|
@ -29,7 +29,7 @@
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/reboot.h>
|
||||
|
||||
static int kvmclock = 1;
|
||||
static int kvmclock __ro_after_init = 1;
|
||||
static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME;
|
||||
static int msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK;
|
||||
static cycle_t kvm_sched_clock_offset;
|
||||
|
@ -332,7 +332,6 @@ __visible struct pv_cpu_ops pv_cpu_ops = {
|
||||
.read_cr0 = native_read_cr0,
|
||||
.write_cr0 = native_write_cr0,
|
||||
.read_cr4 = native_read_cr4,
|
||||
.read_cr4_safe = native_read_cr4_safe,
|
||||
.write_cr4 = native_write_cr4,
|
||||
#ifdef CONFIG_X86_64
|
||||
.read_cr8 = native_read_cr8,
|
||||
@ -389,7 +388,7 @@ NOKPROBE_SYMBOL(native_load_idt);
|
||||
#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
|
||||
#endif
|
||||
|
||||
struct pv_mmu_ops pv_mmu_ops = {
|
||||
struct pv_mmu_ops pv_mmu_ops __ro_after_init = {
|
||||
|
||||
.read_cr2 = native_read_cr2,
|
||||
.write_cr2 = native_write_cr2,
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/mce.h>
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
/*
|
||||
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
|
||||
@ -512,6 +513,17 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
|
||||
return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return saved PC of a blocked thread.
|
||||
* What is this good for? it will be always the scheduler or ret_from_fork.
|
||||
*/
|
||||
unsigned long thread_saved_pc(struct task_struct *tsk)
|
||||
{
|
||||
struct inactive_task_frame *frame =
|
||||
(struct inactive_task_frame *) READ_ONCE(tsk->thread.sp);
|
||||
return READ_ONCE_NOCHECK(frame->ret_addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from fs/proc with a reference on @p to find the function
|
||||
* which called into schedule(). This needs to be done carefully
|
||||
@ -520,15 +532,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
|
||||
*/
|
||||
unsigned long get_wchan(struct task_struct *p)
|
||||
{
|
||||
unsigned long start, bottom, top, sp, fp, ip;
|
||||
unsigned long start, bottom, top, sp, fp, ip, ret = 0;
|
||||
int count = 0;
|
||||
|
||||
if (!p || p == current || p->state == TASK_RUNNING)
|
||||
return 0;
|
||||
|
||||
if (!try_get_task_stack(p))
|
||||
return 0;
|
||||
|
||||
start = (unsigned long)task_stack_page(p);
|
||||
if (!start)
|
||||
return 0;
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Layout of the stack page:
|
||||
@ -537,9 +552,7 @@ unsigned long get_wchan(struct task_struct *p)
|
||||
* PADDING
|
||||
* ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING
|
||||
* stack
|
||||
* ----------- bottom = start + sizeof(thread_info)
|
||||
* thread_info
|
||||
* ----------- start
|
||||
* ----------- bottom = start
|
||||
*
|
||||
* The tasks stack pointer points at the location where the
|
||||
* framepointer is stored. The data on the stack is:
|
||||
@ -550,20 +563,25 @@ unsigned long get_wchan(struct task_struct *p)
|
||||
*/
|
||||
top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
|
||||
top -= 2 * sizeof(unsigned long);
|
||||
bottom = start + sizeof(struct thread_info);
|
||||
bottom = start;
|
||||
|
||||
sp = READ_ONCE(p->thread.sp);
|
||||
if (sp < bottom || sp > top)
|
||||
return 0;
|
||||
goto out;
|
||||
|
||||
fp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
|
||||
fp = READ_ONCE_NOCHECK(((struct inactive_task_frame *)sp)->bp);
|
||||
do {
|
||||
if (fp < bottom || fp > top)
|
||||
return 0;
|
||||
goto out;
|
||||
ip = READ_ONCE_NOCHECK(*(unsigned long *)(fp + sizeof(unsigned long)));
|
||||
if (!in_sched_functions(ip))
|
||||
return ip;
|
||||
if (!in_sched_functions(ip)) {
|
||||
ret = ip;
|
||||
goto out;
|
||||
}
|
||||
fp = READ_ONCE_NOCHECK(*(unsigned long *)fp);
|
||||
} while (count++ < 16 && p->state != TASK_RUNNING);
|
||||
return 0;
|
||||
|
||||
out:
|
||||
put_task_stack(p);
|
||||
return ret;
|
||||
}
|
||||
|
@ -55,17 +55,6 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/vm86.h>
|
||||
|
||||
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
||||
asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
|
||||
|
||||
/*
|
||||
* Return saved PC of a blocked thread.
|
||||
*/
|
||||
unsigned long thread_saved_pc(struct task_struct *tsk)
|
||||
{
|
||||
return ((unsigned long *)tsk->thread.sp)[3];
|
||||
}
|
||||
|
||||
void __show_regs(struct pt_regs *regs, int all)
|
||||
{
|
||||
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
|
||||
@ -101,7 +90,7 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
cr0 = read_cr0();
|
||||
cr2 = read_cr2();
|
||||
cr3 = read_cr3();
|
||||
cr4 = __read_cr4_safe();
|
||||
cr4 = __read_cr4();
|
||||
printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
|
||||
cr0, cr2, cr3, cr4);
|
||||
|
||||
@ -133,35 +122,31 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
unsigned long arg, struct task_struct *p, unsigned long tls)
|
||||
{
|
||||
struct pt_regs *childregs = task_pt_regs(p);
|
||||
struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
|
||||
struct inactive_task_frame *frame = &fork_frame->frame;
|
||||
struct task_struct *tsk;
|
||||
int err;
|
||||
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
frame->bp = 0;
|
||||
frame->ret_addr = (unsigned long) ret_from_fork;
|
||||
p->thread.sp = (unsigned long) fork_frame;
|
||||
p->thread.sp0 = (unsigned long) (childregs+1);
|
||||
memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
||||
|
||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||
/* kernel thread */
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
p->thread.ip = (unsigned long) ret_from_kernel_thread;
|
||||
task_user_gs(p) = __KERNEL_STACK_CANARY;
|
||||
childregs->ds = __USER_DS;
|
||||
childregs->es = __USER_DS;
|
||||
childregs->fs = __KERNEL_PERCPU;
|
||||
childregs->bx = sp; /* function */
|
||||
childregs->bp = arg;
|
||||
childregs->orig_ax = -1;
|
||||
childregs->cs = __KERNEL_CS | get_kernel_rpl();
|
||||
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
|
||||
frame->bx = sp; /* function */
|
||||
frame->di = arg;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
return 0;
|
||||
}
|
||||
frame->bx = 0;
|
||||
*childregs = *current_pt_regs();
|
||||
childregs->ax = 0;
|
||||
if (sp)
|
||||
childregs->sp = sp;
|
||||
|
||||
p->thread.ip = (unsigned long) ret_from_fork;
|
||||
task_user_gs(p) = get_user_gs(current_pt_regs());
|
||||
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
@ -50,8 +50,6 @@
|
||||
#include <asm/switch_to.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
asmlinkage extern void ret_from_fork(void);
|
||||
|
||||
__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
|
||||
|
||||
/* Prints also some state that isn't saved in the pt_regs */
|
||||
@ -141,12 +139,17 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
{
|
||||
int err;
|
||||
struct pt_regs *childregs;
|
||||
struct fork_frame *fork_frame;
|
||||
struct inactive_task_frame *frame;
|
||||
struct task_struct *me = current;
|
||||
|
||||
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
|
||||
childregs = task_pt_regs(p);
|
||||
p->thread.sp = (unsigned long) childregs;
|
||||
set_tsk_thread_flag(p, TIF_FORK);
|
||||
fork_frame = container_of(childregs, struct fork_frame, regs);
|
||||
frame = &fork_frame->frame;
|
||||
frame->bp = 0;
|
||||
frame->ret_addr = (unsigned long) ret_from_fork;
|
||||
p->thread.sp = (unsigned long) fork_frame;
|
||||
p->thread.io_bitmap_ptr = NULL;
|
||||
|
||||
savesegment(gs, p->thread.gsindex);
|
||||
@ -160,15 +163,11 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
if (unlikely(p->flags & PF_KTHREAD)) {
|
||||
/* kernel thread */
|
||||
memset(childregs, 0, sizeof(struct pt_regs));
|
||||
childregs->sp = (unsigned long)childregs;
|
||||
childregs->ss = __KERNEL_DS;
|
||||
childregs->bx = sp; /* function */
|
||||
childregs->bp = arg;
|
||||
childregs->orig_ax = -1;
|
||||
childregs->cs = __KERNEL_CS | get_kernel_rpl();
|
||||
childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
|
||||
frame->bx = sp; /* function */
|
||||
frame->r12 = arg;
|
||||
return 0;
|
||||
}
|
||||
frame->bx = 0;
|
||||
*childregs = *current_pt_regs();
|
||||
|
||||
childregs->ax = 0;
|
||||
@ -511,7 +510,7 @@ void set_personality_ia32(bool x32)
|
||||
current->personality &= ~READ_IMPLIES_EXEC;
|
||||
/* in_compat_syscall() uses the presence of the x32
|
||||
syscall bit flag to determine compat status */
|
||||
current_thread_info()->status &= ~TS_COMPAT;
|
||||
current->thread.status &= ~TS_COMPAT;
|
||||
} else {
|
||||
set_thread_flag(TIF_IA32);
|
||||
clear_thread_flag(TIF_X32);
|
||||
@ -519,7 +518,7 @@ void set_personality_ia32(bool x32)
|
||||
current->mm->context.ia32_compat = TIF_IA32;
|
||||
current->personality |= force_personality32;
|
||||
/* Prepare the first "return" to user space */
|
||||
current_thread_info()->status |= TS_COMPAT;
|
||||
current->thread.status |= TS_COMPAT;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(set_personality_ia32);
|
||||
|
@ -173,8 +173,8 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs)
|
||||
return sp;
|
||||
|
||||
prev_esp = (u32 *)(context);
|
||||
if (prev_esp)
|
||||
return (unsigned long)prev_esp;
|
||||
if (*prev_esp)
|
||||
return (unsigned long)*prev_esp;
|
||||
|
||||
return (unsigned long)regs;
|
||||
}
|
||||
@ -934,7 +934,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 value)
|
||||
*/
|
||||
regs->orig_ax = value;
|
||||
if (syscall_get_nr(child, regs) >= 0)
|
||||
task_thread_info(child)->status |= TS_I386_REGS_POKED;
|
||||
child->thread.status |= TS_I386_REGS_POKED;
|
||||
break;
|
||||
|
||||
case offsetof(struct user32, regs.eflags):
|
||||
@ -1250,7 +1250,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
static struct user_regset x86_64_regsets[] __read_mostly = {
|
||||
static struct user_regset x86_64_regsets[] __ro_after_init = {
|
||||
[REGSET_GENERAL] = {
|
||||
.core_note_type = NT_PRSTATUS,
|
||||
.n = sizeof(struct user_regs_struct) / sizeof(long),
|
||||
@ -1291,7 +1291,7 @@ static const struct user_regset_view user_x86_64_view = {
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
#if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
|
||||
static struct user_regset x86_32_regsets[] __read_mostly = {
|
||||
static struct user_regset x86_32_regsets[] __ro_after_init = {
|
||||
[REGSET_GENERAL] = {
|
||||
.core_note_type = NT_PRSTATUS,
|
||||
.n = sizeof(struct user_regs_struct32) / sizeof(u32),
|
||||
@ -1344,7 +1344,7 @@ static const struct user_regset_view user_x86_32_view = {
|
||||
*/
|
||||
u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];
|
||||
|
||||
void update_regset_xstate_info(unsigned int size, u64 xstate_mask)
|
||||
void __init update_regset_xstate_info(unsigned int size, u64 xstate_mask)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
x86_64_regsets[REGSET_XSTATE].n = size / sizeof(u64);
|
||||
|
@ -705,7 +705,7 @@ static void native_machine_power_off(void)
|
||||
tboot_shutdown(TB_SHUTDOWN_HALT);
|
||||
}
|
||||
|
||||
struct machine_ops machine_ops = {
|
||||
struct machine_ops machine_ops __ro_after_init = {
|
||||
.power_off = native_machine_power_off,
|
||||
.shutdown = native_machine_shutdown,
|
||||
.emergency_restart = native_machine_emergency_restart,
|
||||
|
@ -210,9 +210,9 @@ EXPORT_SYMBOL(boot_cpu_data);
|
||||
|
||||
|
||||
#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
|
||||
__visible unsigned long mmu_cr4_features;
|
||||
__visible unsigned long mmu_cr4_features __ro_after_init;
|
||||
#else
|
||||
__visible unsigned long mmu_cr4_features = X86_CR4_PAE;
|
||||
__visible unsigned long mmu_cr4_features __ro_after_init = X86_CR4_PAE;
|
||||
#endif
|
||||
|
||||
/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
|
||||
@ -1137,7 +1137,7 @@ void __init setup_arch(char **cmdline_p)
|
||||
* auditing all the early-boot CR4 manipulation would be needed to
|
||||
* rule it out.
|
||||
*/
|
||||
mmu_cr4_features = __read_cr4_safe();
|
||||
mmu_cr4_features = __read_cr4();
|
||||
|
||||
memblock_set_current_limit(get_max_mapped());
|
||||
|
||||
|
@ -33,7 +33,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number);
|
||||
DEFINE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
|
||||
EXPORT_PER_CPU_SYMBOL(this_cpu_off);
|
||||
|
||||
unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
|
||||
unsigned long __per_cpu_offset[NR_CPUS] __ro_after_init = {
|
||||
[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
|
||||
};
|
||||
EXPORT_SYMBOL(__per_cpu_offset);
|
||||
@ -246,7 +246,7 @@ void __init setup_per_cpu_areas(void)
|
||||
#ifdef CONFIG_X86_64
|
||||
per_cpu(irq_stack_ptr, cpu) =
|
||||
per_cpu(irq_stack_union.irq_stack, cpu) +
|
||||
IRQ_STACK_SIZE - 64;
|
||||
IRQ_STACK_SIZE;
|
||||
#endif
|
||||
#ifdef CONFIG_NUMA
|
||||
per_cpu(x86_cpu_to_node_map, cpu) =
|
||||
|
@ -783,7 +783,7 @@ static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
|
||||
* than the tracee.
|
||||
*/
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
|
||||
if (current->thread.status & (TS_COMPAT|TS_I386_REGS_POKED))
|
||||
return __NR_ia32_restart_syscall;
|
||||
#endif
|
||||
#ifdef CONFIG_X86_X32_ABI
|
||||
|
@ -943,7 +943,6 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
per_cpu(cpu_current_top_of_stack, cpu) =
|
||||
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
|
||||
#else
|
||||
clear_tsk_thread_flag(idle, TIF_FORK);
|
||||
initial_gs = per_cpu_offset(cpu);
|
||||
#endif
|
||||
}
|
||||
@ -970,7 +969,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
|
||||
|
||||
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
|
||||
initial_code = (unsigned long)start_secondary;
|
||||
stack_start = idle->thread.sp;
|
||||
initial_stack = idle->thread.sp;
|
||||
|
||||
/*
|
||||
* Enable the espfix hack for this CPU
|
||||
|
@ -8,80 +8,69 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
static int save_stack_stack(void *data, char *name)
|
||||
static int save_stack_address(struct stack_trace *trace, unsigned long addr,
|
||||
bool nosched)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
|
||||
{
|
||||
struct stack_trace *trace = data;
|
||||
#ifdef CONFIG_FRAME_POINTER
|
||||
if (!reliable)
|
||||
return 0;
|
||||
#endif
|
||||
if (nosched && in_sched_functions(addr))
|
||||
return 0;
|
||||
|
||||
if (trace->skip > 0) {
|
||||
trace->skip--;
|
||||
return 0;
|
||||
}
|
||||
if (trace->nr_entries < trace->max_entries) {
|
||||
trace->entries[trace->nr_entries++] = addr;
|
||||
return 0;
|
||||
} else {
|
||||
return -1; /* no more room, stop walking the stack */
|
||||
|
||||
if (trace->nr_entries >= trace->max_entries)
|
||||
return -1;
|
||||
|
||||
trace->entries[trace->nr_entries++] = addr;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __save_stack_trace(struct stack_trace *trace,
|
||||
struct task_struct *task, struct pt_regs *regs,
|
||||
bool nosched)
|
||||
{
|
||||
struct unwind_state state;
|
||||
unsigned long addr;
|
||||
|
||||
if (regs)
|
||||
save_stack_address(trace, regs->ip, nosched);
|
||||
|
||||
for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
|
||||
unwind_next_frame(&state)) {
|
||||
addr = unwind_get_return_address(&state);
|
||||
if (!addr || save_stack_address(trace, addr, nosched))
|
||||
break;
|
||||
}
|
||||
|
||||
if (trace->nr_entries < trace->max_entries)
|
||||
trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||||
}
|
||||
|
||||
static int save_stack_address(void *data, unsigned long addr, int reliable)
|
||||
{
|
||||
return __save_stack_address(data, addr, reliable, false);
|
||||
}
|
||||
|
||||
static int
|
||||
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
|
||||
{
|
||||
return __save_stack_address(data, addr, reliable, true);
|
||||
}
|
||||
|
||||
static const struct stacktrace_ops save_stack_ops = {
|
||||
.stack = save_stack_stack,
|
||||
.address = save_stack_address,
|
||||
.walk_stack = print_context_stack,
|
||||
};
|
||||
|
||||
static const struct stacktrace_ops save_stack_ops_nosched = {
|
||||
.stack = save_stack_stack,
|
||||
.address = save_stack_address_nosched,
|
||||
.walk_stack = print_context_stack,
|
||||
};
|
||||
|
||||
/*
|
||||
* Save stack-backtrace addresses into a stack_trace buffer.
|
||||
*/
|
||||
void save_stack_trace(struct stack_trace *trace)
|
||||
{
|
||||
dump_trace(current, NULL, NULL, 0, &save_stack_ops, trace);
|
||||
if (trace->nr_entries < trace->max_entries)
|
||||
trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||||
__save_stack_trace(trace, current, NULL, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(save_stack_trace);
|
||||
|
||||
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
|
||||
{
|
||||
dump_trace(current, regs, NULL, 0, &save_stack_ops, trace);
|
||||
if (trace->nr_entries < trace->max_entries)
|
||||
trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||||
__save_stack_trace(trace, current, regs, false);
|
||||
}
|
||||
|
||||
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
|
||||
{
|
||||
dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
|
||||
if (trace->nr_entries < trace->max_entries)
|
||||
trace->entries[trace->nr_entries++] = ULONG_MAX;
|
||||
if (!try_get_task_stack(tsk))
|
||||
return;
|
||||
|
||||
__save_stack_trace(trace, tsk, NULL, true);
|
||||
|
||||
put_task_stack(tsk);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
|
||||
|
||||
|
@ -292,12 +292,30 @@ DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present)
|
||||
DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment)
|
||||
DO_ERROR(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check)
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
__visible void __noreturn handle_stack_overflow(const char *message,
|
||||
struct pt_regs *regs,
|
||||
unsigned long fault_address)
|
||||
{
|
||||
printk(KERN_EMERG "BUG: stack guard page was hit at %p (stack is %p..%p)\n",
|
||||
(void *)fault_address, current->stack,
|
||||
(char *)current->stack + THREAD_SIZE - 1);
|
||||
die(message, regs, 0);
|
||||
|
||||
/* Be absolutely certain we don't return. */
|
||||
panic(message);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/* Runs on IST stack */
|
||||
dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
{
|
||||
static const char str[] = "double fault";
|
||||
struct task_struct *tsk = current;
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
unsigned long cr2;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
extern unsigned char native_irq_return_iret[];
|
||||
@ -332,6 +350,49 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
tsk->thread.error_code = error_code;
|
||||
tsk->thread.trap_nr = X86_TRAP_DF;
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* If we overflow the stack into a guard page, the CPU will fail
|
||||
* to deliver #PF and will send #DF instead. Similarly, if we
|
||||
* take any non-IST exception while too close to the bottom of
|
||||
* the stack, the processor will get a page fault while
|
||||
* delivering the exception and will generate a double fault.
|
||||
*
|
||||
* According to the SDM (footnote in 6.15 under "Interrupt 14 -
|
||||
* Page-Fault Exception (#PF):
|
||||
*
|
||||
* Processors update CR2 whenever a page fault is detected. If a
|
||||
* second page fault occurs while an earlier page fault is being
|
||||
* deliv- ered, the faulting linear address of the second fault will
|
||||
* overwrite the contents of CR2 (replacing the previous
|
||||
* address). These updates to CR2 occur even if the page fault
|
||||
* results in a double fault or occurs during the delivery of a
|
||||
* double fault.
|
||||
*
|
||||
* The logic below has a small possibility of incorrectly diagnosing
|
||||
* some errors as stack overflows. For example, if the IDT or GDT
|
||||
* gets corrupted such that #GP delivery fails due to a bad descriptor
|
||||
* causing #GP and we hit this condition while CR2 coincidentally
|
||||
* points to the stack guard page, we'll think we overflowed the
|
||||
* stack. Given that we're going to panic one way or another
|
||||
* if this happens, this isn't necessarily worth fixing.
|
||||
*
|
||||
* If necessary, we could improve the test by only diagnosing
|
||||
* a stack overflow if the saved RSP points within 47 bytes of
|
||||
* the bottom of the stack: if RSP == tsk_stack + 48 and we
|
||||
* take an exception, the stack is already aligned and there
|
||||
* will be enough room SS, RSP, RFLAGS, CS, RIP, and a
|
||||
* possible error code, so a stack overflow would *not* double
|
||||
* fault. With any less space left, exception delivery could
|
||||
* fail, and, as a practical matter, we've overflowed the
|
||||
* stack even if the actual trigger for the double fault was
|
||||
* something else.
|
||||
*/
|
||||
cr2 = read_cr2();
|
||||
if ((unsigned long)task_stack_page(tsk) - 1 - cr2 < PAGE_SIZE)
|
||||
handle_stack_overflow("kernel stack overflow (double-fault)", regs, cr2);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DOUBLEFAULT
|
||||
df_debug(regs, error_code);
|
||||
#endif
|
||||
|
93
arch/x86/kernel/unwind_frame.c
Normal file
93
arch/x86/kernel/unwind_frame.c
Normal file
@ -0,0 +1,93 @@
|
||||
#include <linux/sched.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/bitops.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
#define FRAME_HEADER_SIZE (sizeof(long) * 2)
|
||||
|
||||
unsigned long unwind_get_return_address(struct unwind_state *state)
|
||||
{
|
||||
unsigned long addr;
|
||||
unsigned long *addr_p = unwind_get_return_address_ptr(state);
|
||||
|
||||
if (unwind_done(state))
|
||||
return 0;
|
||||
|
||||
addr = ftrace_graph_ret_addr(state->task, &state->graph_idx, *addr_p,
|
||||
addr_p);
|
||||
|
||||
return __kernel_text_address(addr) ? addr : 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unwind_get_return_address);
|
||||
|
||||
static bool update_stack_state(struct unwind_state *state, void *addr,
|
||||
size_t len)
|
||||
{
|
||||
struct stack_info *info = &state->stack_info;
|
||||
|
||||
/*
|
||||
* If addr isn't on the current stack, switch to the next one.
|
||||
*
|
||||
* We may have to traverse multiple stacks to deal with the possibility
|
||||
* that 'info->next_sp' could point to an empty stack and 'addr' could
|
||||
* be on a subsequent stack.
|
||||
*/
|
||||
while (!on_stack(info, addr, len))
|
||||
if (get_stack_info(info->next_sp, state->task, info,
|
||||
&state->stack_mask))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool unwind_next_frame(struct unwind_state *state)
|
||||
{
|
||||
unsigned long *next_bp;
|
||||
|
||||
if (unwind_done(state))
|
||||
return false;
|
||||
|
||||
next_bp = (unsigned long *)*state->bp;
|
||||
|
||||
/* make sure the next frame's data is accessible */
|
||||
if (!update_stack_state(state, next_bp, FRAME_HEADER_SIZE))
|
||||
return false;
|
||||
|
||||
/* move to the next frame */
|
||||
state->bp = next_bp;
|
||||
return true;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unwind_next_frame);
|
||||
|
||||
void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
struct pt_regs *regs, unsigned long *first_frame)
|
||||
{
|
||||
memset(state, 0, sizeof(*state));
|
||||
state->task = task;
|
||||
|
||||
/* don't even attempt to start from user mode regs */
|
||||
if (regs && user_mode(regs)) {
|
||||
state->stack_info.type = STACK_TYPE_UNKNOWN;
|
||||
return;
|
||||
}
|
||||
|
||||
/* set up the starting stack frame */
|
||||
state->bp = get_frame_pointer(task, regs);
|
||||
|
||||
/* initialize stack info and make sure the frame data is accessible */
|
||||
get_stack_info(state->bp, state->task, &state->stack_info,
|
||||
&state->stack_mask);
|
||||
update_stack_state(state, state->bp, FRAME_HEADER_SIZE);
|
||||
|
||||
/*
|
||||
* The caller can provide the address of the first frame directly
|
||||
* (first_frame) or indirectly (regs->sp) to indicate which stack frame
|
||||
* to start unwinding at. Skip ahead until we reach it.
|
||||
*/
|
||||
while (!unwind_done(state) &&
|
||||
(!on_stack(&state->stack_info, first_frame, sizeof(long)) ||
|
||||
state->bp < first_frame))
|
||||
unwind_next_frame(state);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__unwind_start);
|
43
arch/x86/kernel/unwind_guess.c
Normal file
43
arch/x86/kernel/unwind_guess.c
Normal file
@ -0,0 +1,43 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/bitops.h>
|
||||
#include <asm/stacktrace.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
bool unwind_next_frame(struct unwind_state *state)
|
||||
{
|
||||
struct stack_info *info = &state->stack_info;
|
||||
|
||||
if (unwind_done(state))
|
||||
return false;
|
||||
|
||||
do {
|
||||
for (state->sp++; state->sp < info->end; state->sp++)
|
||||
if (__kernel_text_address(*state->sp))
|
||||
return true;
|
||||
|
||||
state->sp = info->next_sp;
|
||||
|
||||
} while (!get_stack_info(state->sp, state->task, info,
|
||||
&state->stack_mask));
|
||||
|
||||
return false;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(unwind_next_frame);
|
||||
|
||||
void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
struct pt_regs *regs, unsigned long *first_frame)
|
||||
{
|
||||
memset(state, 0, sizeof(*state));
|
||||
|
||||
state->task = task;
|
||||
state->sp = first_frame;
|
||||
|
||||
get_stack_info(first_frame, state->task, &state->stack_info,
|
||||
&state->stack_mask);
|
||||
|
||||
if (!__kernel_text_address(*first_frame))
|
||||
unwind_next_frame(state);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(__unwind_start);
|
@ -91,7 +91,7 @@ struct x86_cpuinit_ops x86_cpuinit = {
|
||||
static void default_nmi_init(void) { };
|
||||
static int default_i8042_detect(void) { return 1; };
|
||||
|
||||
struct x86_platform_ops x86_platform = {
|
||||
struct x86_platform_ops x86_platform __ro_after_init = {
|
||||
.calibrate_cpu = native_calibrate_cpu,
|
||||
.calibrate_tsc = native_calibrate_tsc,
|
||||
.get_wallclock = mach_get_cmos_time,
|
||||
@ -108,7 +108,7 @@ struct x86_platform_ops x86_platform = {
|
||||
EXPORT_SYMBOL_GPL(x86_platform);
|
||||
|
||||
#if defined(CONFIG_PCI_MSI)
|
||||
struct x86_msi_ops x86_msi = {
|
||||
struct x86_msi_ops x86_msi __ro_after_init = {
|
||||
.setup_msi_irqs = native_setup_msi_irqs,
|
||||
.teardown_msi_irq = native_teardown_msi_irq,
|
||||
.teardown_msi_irqs = default_teardown_msi_irqs,
|
||||
@ -137,7 +137,7 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
struct x86_io_apic_ops x86_io_apic_ops = {
|
||||
struct x86_io_apic_ops x86_io_apic_ops __ro_after_init = {
|
||||
.read = native_io_apic_read,
|
||||
.disable = native_disable_io_apic,
|
||||
};
|
||||
|
@ -4961,7 +4961,7 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
|
||||
avic_handle_ldr_update(vcpu);
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops svm_x86_ops = {
|
||||
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
|
||||
.cpu_has_kvm_support = has_svm,
|
||||
.disabled_by_bios = is_disabled,
|
||||
.hardware_setup = svm_hardware_setup,
|
||||
|
@ -11177,7 +11177,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
|
||||
~FEATURE_CONTROL_LMCE;
|
||||
}
|
||||
|
||||
static struct kvm_x86_ops vmx_x86_ops = {
|
||||
static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
|
||||
.cpu_has_kvm_support = cpu_has_kvm_support,
|
||||
.disabled_by_bios = vmx_disabled_by_bios,
|
||||
.hardware_setup = hardware_setup,
|
||||
|
@ -753,6 +753,38 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_VMAP_STACK
|
||||
/*
|
||||
* Stack overflow? During boot, we can fault near the initial
|
||||
* stack in the direct map, but that's not an overflow -- check
|
||||
* that we're in vmalloc space to avoid this.
|
||||
*/
|
||||
if (is_vmalloc_addr((void *)address) &&
|
||||
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
|
||||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
||||
register void *__sp asm("rsp");
|
||||
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
|
||||
/*
|
||||
* We're likely to be running with very little stack space
|
||||
* left. It's plausible that we'd hit this condition but
|
||||
* double-fault even before we get this far, in which case
|
||||
* we're fine: the double-fault handler will deal with it.
|
||||
*
|
||||
* We don't want to make it all the way into the oops code
|
||||
* and then double-fault, though, because we're likely to
|
||||
* break the console driver and lose most of the stack dump.
|
||||
*/
|
||||
asm volatile ("movq %[stack], %%rsp\n\t"
|
||||
"call handle_stack_overflow\n\t"
|
||||
"1: jmp 1b"
|
||||
: "+r" (__sp)
|
||||
: "D" ("kernel stack overflow (page fault)"),
|
||||
"S" (regs), "d" (address),
|
||||
[stack] "rm" (stack));
|
||||
unreachable();
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* 32-bit:
|
||||
*
|
||||
|
@ -40,17 +40,26 @@
|
||||
* You need to add an if/def entry if you introduce a new memory region
|
||||
* compatible with KASLR. Your entry must be in logical order with memory
|
||||
* layout. For example, ESPFIX is before EFI because its virtual address is
|
||||
* before. You also need to add a BUILD_BUG_ON in kernel_randomize_memory to
|
||||
* before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
|
||||
* ensure that this order is correct and won't be changed.
|
||||
*/
|
||||
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
|
||||
static const unsigned long vaddr_end = VMEMMAP_START;
|
||||
|
||||
#if defined(CONFIG_X86_ESPFIX64)
|
||||
static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
|
||||
#elif defined(CONFIG_EFI)
|
||||
static const unsigned long vaddr_end = EFI_VA_START;
|
||||
#else
|
||||
static const unsigned long vaddr_end = __START_KERNEL_map;
|
||||
#endif
|
||||
|
||||
/* Default values */
|
||||
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
|
||||
EXPORT_SYMBOL(page_offset_base);
|
||||
unsigned long vmalloc_base = __VMALLOC_BASE;
|
||||
EXPORT_SYMBOL(vmalloc_base);
|
||||
unsigned long vmemmap_base = __VMEMMAP_BASE;
|
||||
EXPORT_SYMBOL(vmemmap_base);
|
||||
|
||||
/*
|
||||
* Memory regions randomized by KASLR (except modules that use a separate logic
|
||||
@ -63,6 +72,7 @@ static __initdata struct kaslr_memory_region {
|
||||
} kaslr_regions[] = {
|
||||
{ &page_offset_base, 64/* Maximum */ },
|
||||
{ &vmalloc_base, VMALLOC_SIZE_TB },
|
||||
{ &vmemmap_base, 1 },
|
||||
};
|
||||
|
||||
/* Get size in bytes used by the memory region */
|
||||
@ -89,6 +99,18 @@ void __init kernel_randomize_memory(void)
|
||||
struct rnd_state rand_state;
|
||||
unsigned long remain_entropy;
|
||||
|
||||
/*
|
||||
* All these BUILD_BUG_ON checks ensures the memory layout is
|
||||
* consistent with the vaddr_start/vaddr_end variables.
|
||||
*/
|
||||
BUILD_BUG_ON(vaddr_start >= vaddr_end);
|
||||
BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) &&
|
||||
vaddr_end >= EFI_VA_START);
|
||||
BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) ||
|
||||
config_enabled(CONFIG_EFI)) &&
|
||||
vaddr_end >= __START_KERNEL_map);
|
||||
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
|
||||
|
||||
if (!kaslr_memory_enabled())
|
||||
return;
|
||||
|
||||
|
@ -77,10 +77,25 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
unsigned cpu = smp_processor_id();
|
||||
|
||||
if (likely(prev != next)) {
|
||||
if (IS_ENABLED(CONFIG_VMAP_STACK)) {
|
||||
/*
|
||||
* If our current stack is in vmalloc space and isn't
|
||||
* mapped in the new pgd, we'll double-fault. Forcibly
|
||||
* map it.
|
||||
*/
|
||||
unsigned int stack_pgd_index = pgd_index(current_stack_pointer());
|
||||
|
||||
pgd_t *pgd = next->pgd + stack_pgd_index;
|
||||
|
||||
if (unlikely(pgd_none(*pgd)))
|
||||
set_pgd(pgd, init_mm.pgd[stack_pgd_index]);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
||||
this_cpu_write(cpu_tlbstate.active_mm, next);
|
||||
#endif
|
||||
|
||||
cpumask_set_cpu(cpu, mm_cpumask(next));
|
||||
|
||||
/*
|
||||
|
@ -16,27 +16,7 @@
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
static int backtrace_stack(void *data, char *name)
|
||||
{
|
||||
/* Yes, we want all stacks */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int backtrace_address(void *data, unsigned long addr, int reliable)
|
||||
{
|
||||
unsigned int *depth = data;
|
||||
|
||||
if ((*depth)--)
|
||||
oprofile_add_trace(addr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct stacktrace_ops backtrace_ops = {
|
||||
.stack = backtrace_stack,
|
||||
.address = backtrace_address,
|
||||
.walk_stack = print_context_stack,
|
||||
};
|
||||
#include <asm/unwind.h>
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static struct stack_frame_ia32 *
|
||||
@ -113,10 +93,29 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
|
||||
struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
|
||||
|
||||
if (!user_mode(regs)) {
|
||||
unsigned long stack = kernel_stack_pointer(regs);
|
||||
if (depth)
|
||||
dump_trace(NULL, regs, (unsigned long *)stack, 0,
|
||||
&backtrace_ops, &depth);
|
||||
struct unwind_state state;
|
||||
unsigned long addr;
|
||||
|
||||
if (!depth)
|
||||
return;
|
||||
|
||||
oprofile_add_trace(regs->ip);
|
||||
|
||||
if (!--depth)
|
||||
return;
|
||||
|
||||
for (unwind_start(&state, current, regs, NULL);
|
||||
!unwind_done(&state); unwind_next_frame(&state)) {
|
||||
addr = unwind_get_return_address(&state);
|
||||
if (!addr)
|
||||
break;
|
||||
|
||||
oprofile_add_trace(addr);
|
||||
|
||||
if (!--depth)
|
||||
break;
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -120,9 +120,12 @@ static unsigned long __init bios32_service(unsigned long service)
|
||||
static struct {
|
||||
unsigned long address;
|
||||
unsigned short segment;
|
||||
} pci_indirect = { 0, __KERNEL_CS };
|
||||
} pci_indirect __ro_after_init = {
|
||||
.address = 0,
|
||||
.segment = __KERNEL_CS,
|
||||
};
|
||||
|
||||
static int pci_bios_present;
|
||||
static int pci_bios_present __ro_after_init;
|
||||
|
||||
static int __init check_pcibios(void)
|
||||
{
|
||||
|
@ -130,7 +130,7 @@ static void __save_processor_state(struct saved_context *ctxt)
|
||||
ctxt->cr0 = read_cr0();
|
||||
ctxt->cr2 = read_cr2();
|
||||
ctxt->cr3 = read_cr3();
|
||||
ctxt->cr4 = __read_cr4_safe();
|
||||
ctxt->cr4 = __read_cr4();
|
||||
#ifdef CONFIG_X86_64
|
||||
ctxt->cr8 = read_cr8();
|
||||
#endif
|
||||
|
@ -194,7 +194,7 @@ int peek_user(struct task_struct *child, long addr, long data)
|
||||
|
||||
static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
|
||||
{
|
||||
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
|
||||
int err, n, cpu = task_cpu(child);
|
||||
struct user_i387_struct fpregs;
|
||||
|
||||
err = save_i387_registers(userspace_pid[cpu],
|
||||
@ -211,7 +211,7 @@ static int get_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
|
||||
|
||||
static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *child)
|
||||
{
|
||||
int n, cpu = ((struct thread_info *) child->stack)->cpu;
|
||||
int n, cpu = task_cpu(child);
|
||||
struct user_i387_struct fpregs;
|
||||
|
||||
n = copy_from_user(&fpregs, buf, sizeof(fpregs));
|
||||
@ -224,7 +224,7 @@ static int set_fpregs(struct user_i387_struct __user *buf, struct task_struct *c
|
||||
|
||||
static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
|
||||
{
|
||||
int err, n, cpu = ((struct thread_info *) child->stack)->cpu;
|
||||
int err, n, cpu = task_cpu(child);
|
||||
struct user_fxsr_struct fpregs;
|
||||
|
||||
err = save_fpx_registers(userspace_pid[cpu], (unsigned long *) &fpregs);
|
||||
@ -240,7 +240,7 @@ static int get_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *
|
||||
|
||||
static int set_fpxregs(struct user_fxsr_struct __user *buf, struct task_struct *child)
|
||||
{
|
||||
int n, cpu = ((struct thread_info *) child->stack)->cpu;
|
||||
int n, cpu = task_cpu(child);
|
||||
struct user_fxsr_struct fpregs;
|
||||
|
||||
n = copy_from_user(&fpregs, buf, sizeof(fpregs));
|
||||
|
@ -1237,7 +1237,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
|
||||
.write_cr0 = xen_write_cr0,
|
||||
|
||||
.read_cr4 = native_read_cr4,
|
||||
.read_cr4_safe = native_read_cr4_safe,
|
||||
.write_cr4 = xen_write_cr4,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -940,15 +940,13 @@ static void build_inv_irt(struct iommu_cmd *cmd, u16 devid)
|
||||
* Writes the command to the IOMMUs command buffer and informs the
|
||||
* hardware about the new command.
|
||||
*/
|
||||
static int iommu_queue_command_sync(struct amd_iommu *iommu,
|
||||
struct iommu_cmd *cmd,
|
||||
bool sync)
|
||||
static int __iommu_queue_command_sync(struct amd_iommu *iommu,
|
||||
struct iommu_cmd *cmd,
|
||||
bool sync)
|
||||
{
|
||||
u32 left, tail, head, next_tail;
|
||||
unsigned long flags;
|
||||
|
||||
again:
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
|
||||
head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET);
|
||||
tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET);
|
||||
@ -957,15 +955,14 @@ again:
|
||||
|
||||
if (left <= 2) {
|
||||
struct iommu_cmd sync_cmd;
|
||||
volatile u64 sem = 0;
|
||||
int ret;
|
||||
|
||||
build_completion_wait(&sync_cmd, (u64)&sem);
|
||||
iommu->cmd_sem = 0;
|
||||
|
||||
build_completion_wait(&sync_cmd, (u64)&iommu->cmd_sem);
|
||||
copy_cmd_to_buffer(iommu, &sync_cmd, tail);
|
||||
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
if ((ret = wait_on_sem(&sem)) != 0)
|
||||
if ((ret = wait_on_sem(&iommu->cmd_sem)) != 0)
|
||||
return ret;
|
||||
|
||||
goto again;
|
||||
@ -976,9 +973,21 @@ again:
|
||||
/* We need to sync now to make sure all commands are processed */
|
||||
iommu->need_sync = sync;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int iommu_queue_command_sync(struct amd_iommu *iommu,
|
||||
struct iommu_cmd *cmd,
|
||||
bool sync)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
ret = __iommu_queue_command_sync(iommu, cmd, sync);
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
|
||||
@ -993,19 +1002,29 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
|
||||
static int iommu_completion_wait(struct amd_iommu *iommu)
|
||||
{
|
||||
struct iommu_cmd cmd;
|
||||
volatile u64 sem = 0;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
if (!iommu->need_sync)
|
||||
return 0;
|
||||
|
||||
build_completion_wait(&cmd, (u64)&sem);
|
||||
|
||||
ret = iommu_queue_command_sync(iommu, &cmd, false);
|
||||
build_completion_wait(&cmd, (u64)&iommu->cmd_sem);
|
||||
|
||||
spin_lock_irqsave(&iommu->lock, flags);
|
||||
|
||||
iommu->cmd_sem = 0;
|
||||
|
||||
ret = __iommu_queue_command_sync(iommu, &cmd, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out_unlock;
|
||||
|
||||
return wait_on_sem(&sem);
|
||||
ret = wait_on_sem(&iommu->cmd_sem);
|
||||
|
||||
out_unlock:
|
||||
spin_unlock_irqrestore(&iommu->lock, flags);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int iommu_flush_dte(struct amd_iommu *iommu, u16 devid)
|
||||
|
@ -524,6 +524,8 @@ struct amd_iommu {
|
||||
struct irq_domain *ir_domain;
|
||||
struct irq_domain *msi_domain;
|
||||
#endif
|
||||
|
||||
volatile u64 __aligned(8) cmd_sem;
|
||||
};
|
||||
|
||||
#define ACPIHID_UID_LEN 256
|
||||
|
@ -483,7 +483,7 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
|
||||
save_stack_trace_tsk(task, &trace);
|
||||
|
||||
for (i = 0; i < trace.nr_entries; i++) {
|
||||
seq_printf(m, "[<%pK>] %pS\n",
|
||||
seq_printf(m, "[<%pK>] %pB\n",
|
||||
(void *)entries[i], (void *)entries[i]);
|
||||
}
|
||||
unlock_trace(task);
|
||||
|
@ -795,7 +795,12 @@ struct ftrace_ret_stack {
|
||||
unsigned long func;
|
||||
unsigned long long calltime;
|
||||
unsigned long long subtime;
|
||||
#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
|
||||
unsigned long fp;
|
||||
#endif
|
||||
#ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
|
||||
unsigned long *retp;
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
@ -807,7 +812,10 @@ extern void return_to_handler(void);
|
||||
|
||||
extern int
|
||||
ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth,
|
||||
unsigned long frame_pointer);
|
||||
unsigned long frame_pointer, unsigned long *retp);
|
||||
|
||||
unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
|
||||
unsigned long ret, unsigned long *retp);
|
||||
|
||||
/*
|
||||
* Sometimes we don't want to trace a function with the function
|
||||
@ -870,6 +878,13 @@ static inline int task_curr_ret_stack(struct task_struct *tsk)
|
||||
return -1;
|
||||
}
|
||||
|
||||
static inline unsigned long
|
||||
ftrace_graph_ret_addr(struct task_struct *task, int *idx, unsigned long ret,
|
||||
unsigned long *retp)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void pause_graph_tracing(void) { }
|
||||
static inline void unpause_graph_tracing(void) { }
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <net/net_namespace.h>
|
||||
#include <linux/sched/rt.h>
|
||||
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
# define INIT_PUSHABLE_TASKS(tsk) \
|
||||
.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO),
|
||||
@ -183,12 +185,21 @@ extern struct task_group root_task_group;
|
||||
# define INIT_KASAN(tsk)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_THREAD_INFO_IN_TASK
|
||||
# define INIT_TASK_TI(tsk) \
|
||||
.thread_info = INIT_THREAD_INFO(tsk), \
|
||||
.stack_refcount = ATOMIC_INIT(1),
|
||||
#else
|
||||
# define INIT_TASK_TI(tsk)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* INIT_TASK is used to set up the first task table, touch at
|
||||
* your own risk!. Base=0, limit=0x1fffff (=2MB)
|
||||
*/
|
||||
#define INIT_TASK(tsk) \
|
||||
{ \
|
||||
INIT_TASK_TI(tsk) \
|
||||
.state = 0, \
|
||||
.stack = init_stack, \
|
||||
.usage = ATOMIC_INIT(2), \
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user