From 3780578761921f094179c6289072a74b2228c602 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sat, 20 May 2017 15:03:29 -0500 Subject: [PATCH 1/8] x86/boot: Use CROSS_COMPILE prefix for readelf The boot code Makefile contains a straight 'readelf' invocation. This causes build warnings in cross compile environments, when there is no unprefixed readelf accessible via $PATH. Add the missing $(CROSS_COMPILE) prefix. [ tglx: Rewrote changelog ] Fixes: 98f78525371b ("x86/boot: Refuse to build with data relocations") Signed-off-by: Rob Landley Acked-by: Kees Cook Cc: Jiri Kosina Cc: Paul Bolle Cc: "H.J. Lu" Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/ced18878-693a-9576-a024-113ef39a22c0@landley.net Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 44163e8c3868..2c860ad4fe06 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -94,7 +94,7 @@ vmlinux-objs-$(CONFIG_EFI_MIXED) += $(obj)/efi_thunk_$(BITS).o quiet_cmd_check_data_rel = DATAREL $@ define cmd_check_data_rel for obj in $(filter %.o,$^); do \ - readelf -S $$obj | grep -qF .rel.local && { \ + ${CROSS_COMPILE}readelf -S $$obj | grep -qF .rel.local && { \ echo "error: $$obj has data relocations!" >&2; \ exit 1; \ } || true; \ From ebd574994c63164d538a197172157318f58ac647 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 23 May 2017 10:37:29 -0500 Subject: [PATCH 2/8] Revert "x86/entry: Fix the end of the stack for newly forked tasks" Petr Mladek reported the following warning when loading the livepatch sample module: WARNING: CPU: 1 PID: 3699 at arch/x86/kernel/stacktrace.c:132 save_stack_trace_tsk_reliable+0x133/0x1a0 ... Call Trace: __schedule+0x273/0x820 schedule+0x36/0x80 kthreadd+0x305/0x310 ? kthread_create_on_cpu+0x80/0x80 ? icmp_echo.part.32+0x50/0x50 ret_from_fork+0x2c/0x40 That warning means the end of the stack is no longer recognized as such for newly forked tasks. The problem was introduced with the following commit: ff3f7e2475bb ("x86/entry: Fix the end of the stack for newly forked tasks") ... which was completely misguided. It only partially fixed the reported issue, and it introduced another bug in the process. None of the other entry code saves the frame pointer before calling into C code, so it doesn't make sense for ret_from_fork to do so either. Contrary to what I originally thought, the original issue wasn't related to newly forked tasks. It was actually related to ftrace. When entry code calls into a function which then calls into an ftrace handler, the stack frame looks different than normal. The original issue will be fixed in the unwinder, in a subsequent patch. Reported-by: Petr Mladek Signed-off-by: Josh Poimboeuf Acked-by: Thomas Gleixner Cc: Dave Jones Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: live-patching@vger.kernel.org Fixes: ff3f7e2475bb ("x86/entry: Fix the end of the stack for newly forked tasks") Link: http://lkml.kernel.org/r/f350760f7e82f0750c8d1dd093456eb212751caa.1495553739.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/entry/entry_32.S | 30 +++++++++++++++++++----------- arch/x86/entry/entry_64.S | 11 ++++------- 2 files changed, 23 insertions(+), 18 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 50bc26949e9e..48ef7bb32c42 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -251,6 +251,23 @@ ENTRY(__switch_to_asm) jmp __switch_to END(__switch_to_asm) +/* + * The unwinder expects the last frame on the stack to always be at the same + * offset from the end of the page, which allows it to validate the stack. + * Calling schedule_tail() directly would break that convention because its an + * asmlinkage function so its argument has to be pushed on the stack. This + * wrapper creates a proper "end of stack" frame header before the call. + */ +ENTRY(schedule_tail_wrapper) + FRAME_BEGIN + + pushl %eax + call schedule_tail + popl %eax + + FRAME_END + ret +ENDPROC(schedule_tail_wrapper) /* * A newly forked process directly context switches into this address. * @@ -259,24 +276,15 @@ END(__switch_to_asm) * edi: kernel thread arg */ ENTRY(ret_from_fork) - FRAME_BEGIN /* help unwinder find end of stack */ - - /* - * schedule_tail() is asmlinkage so we have to put its 'prev' argument - * on the stack. - */ - pushl %eax - call schedule_tail - popl %eax + call schedule_tail_wrapper testl %ebx, %ebx jnz 1f /* kernel threads are uncommon */ 2: /* When we fork, we trace the syscall return in the child, too. */ - leal FRAME_OFFSET(%esp), %eax + movl %esp, %eax call syscall_return_slowpath - FRAME_END jmp restore_all /* kernel thread */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 607d72c4a485..4a4c0834f965 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -36,7 +36,6 @@ #include #include #include -#include #include .code64 @@ -406,19 +405,17 @@ END(__switch_to_asm) * r12: kernel thread arg */ ENTRY(ret_from_fork) - FRAME_BEGIN /* help unwinder find end of stack */ movq %rax, %rdi - call schedule_tail /* rdi: 'prev' task parameter */ + call schedule_tail /* rdi: 'prev' task parameter */ - testq %rbx, %rbx /* from kernel_thread? */ - jnz 1f /* kernel threads are uncommon */ + testq %rbx, %rbx /* from kernel_thread? */ + jnz 1f /* kernel threads are uncommon */ 2: - leaq FRAME_OFFSET(%rsp),%rdi /* pt_regs pointer */ + movq %rsp, %rdi call syscall_return_slowpath /* returns with IRQs disabled */ TRACE_IRQS_ON /* user mode is traced as IRQS on */ SWAPGS - FRAME_END jmp restore_regs_and_iret 1: From 519fb5c3350d1b5225b27b1cac55144f79351718 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 23 May 2017 10:37:30 -0500 Subject: [PATCH 3/8] x86/unwind: Add end-of-stack check for ftrace handlers Dave Jones and Steven Rostedt reported unwinder warnings like the following: WARNING: kernel stack frame pointer at ffff8800bda0ff30 in sshd:1090 has bad value 000055b32abf1fa8 In both cases, the unwinder was attempting to unwind from an ftrace handler into entry code. The callchain was something like: syscall entry code C function ftrace handler save_stack_trace() The problem is that the unwinder's end-of-stack logic gets confused by the way ftrace lays out the stack frame (with fentry enabled). I was able to recreate this warning with: echo call_usermodehelper_exec_async:stacktrace > /sys/kernel/debug/tracing/set_ftrace_filter (exit login session) I considered fixing this by changing the ftrace code to rewrite the stack to make the unwinder happy. But that seemed too intrusive after I implemented it. Instead, just add another check to the unwinder's end-of-stack logic to detect this special case. Side note: We could probably get rid of these end-of-stack checks by encoding the frame pointer for syscall entry just like we do for interrupt entry. That would be simpler, but it would also be a lot more intrusive since it would slightly affect the performance of every syscall. Reported-by: Dave Jones Reported-by: Steven Rostedt Signed-off-by: Josh Poimboeuf Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Petr Mladek Cc: live-patching@vger.kernel.org Fixes: c32c47c68a0a ("x86/unwind: Warn on bad frame pointer") Link: http://lkml.kernel.org/r/671ba22fbc0156b8f7e0cfa5ab2a795e08bc37e1.1495553739.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_frame.c | 49 +++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 82c6d7f1fd73..b9389d72b2f7 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -104,6 +104,11 @@ static inline unsigned long *last_frame(struct unwind_state *state) return (unsigned long *)task_pt_regs(state->task) - 2; } +static bool is_last_frame(struct unwind_state *state) +{ + return state->bp == last_frame(state); +} + #ifdef CONFIG_X86_32 #define GCC_REALIGN_WORDS 3 #else @@ -115,16 +120,15 @@ static inline unsigned long *last_aligned_frame(struct unwind_state *state) return last_frame(state) - GCC_REALIGN_WORDS; } -static bool is_last_task_frame(struct unwind_state *state) +static bool is_last_aligned_frame(struct unwind_state *state) { unsigned long *last_bp = last_frame(state); unsigned long *aligned_bp = last_aligned_frame(state); /* - * We have to check for the last task frame at two different locations - * because gcc can occasionally decide to realign the stack pointer and - * change the offset of the stack frame in the prologue of a function - * called by head/entry code. Examples: + * GCC can occasionally decide to realign the stack pointer and change + * the offset of the stack frame in the prologue of a function called + * by head/entry code. Examples: * * : * push %edi @@ -141,11 +145,38 @@ static bool is_last_task_frame(struct unwind_state *state) * push %rbp * mov %rsp,%rbp * - * Note that after aligning the stack, it pushes a duplicate copy of - * the return address before pushing the frame pointer. + * After aligning the stack, it pushes a duplicate copy of the return + * address before pushing the frame pointer. */ - return (state->bp == last_bp || - (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); + return (state->bp == aligned_bp && *(aligned_bp + 1) == *(last_bp + 1)); +} + +static bool is_last_ftrace_frame(struct unwind_state *state) +{ + unsigned long *last_bp = last_frame(state); + unsigned long *last_ftrace_bp = last_bp - 3; + + /* + * When unwinding from an ftrace handler of a function called by entry + * code, the stack layout of the last frame is: + * + * bp + * parent ret addr + * bp + * function ret addr + * parent ret addr + * pt_regs + * ----------------- + */ + return (state->bp == last_ftrace_bp && + *state->bp == *(state->bp + 2) && + *(state->bp + 1) == *(state->bp + 4)); +} + +static bool is_last_task_frame(struct unwind_state *state) +{ + return is_last_frame(state) || is_last_aligned_frame(state) || + is_last_ftrace_frame(state); } /* From 7e6091209f7f73e2a81943020793b5ad26d645c6 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 23 May 2017 18:27:54 +0200 Subject: [PATCH 4/8] x86/build: Permit building with old make versions At least Make 3.82 dislikes the tab in front of the $(warning) function: arch/x86/Makefile:162: *** recipe commences before first target. Stop. Let's be gentle. Signed-off-by: Jan Kiszka Acked-by: Thomas Gleixner Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1944fcd8-e3df-d1f7-c0e4-60aeb1917a24@siemens.com Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5851411e60fb..bf240b920473 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -159,7 +159,7 @@ ifdef CONFIG_FUNCTION_GRAPH_TRACER # If '-Os' is enabled, disable it and print a warning. ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE undefine CONFIG_CC_OPTIMIZE_FOR_SIZE - $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.) + $(warning Disabling CONFIG_CC_OPTIMIZE_FOR_SIZE. Your compiler does not have -mfentry so you cannot optimize for size with CONFIG_FUNCTION_GRAPH_TRACER.) endif endif From c9525a3fab63fbe091007494f8b7a06438eea6a7 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sat, 20 May 2017 17:20:16 -0700 Subject: [PATCH 5/8] x86/watchdog: Fix Kconfig help text file path reference to lockup watchdog documentation Signed-off-by: Benjamin Peterson Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Fixes: 9919cba7ff71147803c988521cc1ceb80e7f0f6d ("watchdog: Update documentation") Link: http://lkml.kernel.org/r/20170521002016.13258-1-bp@benjamin.pe Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index cd18994a9555..4ccfacc7232a 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -360,7 +360,7 @@ config SMP Management" code will be disabled if you say Y here. See also , - and the SMP-HOWTO available at + and the SMP-HOWTO available at . If you don't know what to do here, say N. From cbed27cdf0e3f7ea3b2259e86b9e34df02be3fe4 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 18 Apr 2017 15:07:11 -0400 Subject: [PATCH 6/8] x86/PAT: Fix Xorg regression on CPUs that don't support PAT In the file arch/x86/mm/pat.c, there's a '__pat_enabled' variable. The variable is set to 1 by default and the function pat_init() sets __pat_enabled to 0 if the CPU doesn't support PAT. However, on AMD K6-3 CPUs, the processor initialization code never calls pat_init() and so __pat_enabled stays 1 and the function pat_enabled() returns true, even though the K6-3 CPU doesn't support PAT. The result of this bug is that a kernel warning is produced when attempting to start the Xserver and the Xserver doesn't start (fork() returns ENOMEM). Another symptom of this bug is that the framebuffer driver doesn't set the K6-3 MTRR registers: x86/PAT: Xorg:3891 map pfn expected mapping type uncached-minus for [mem 0xe4000000-0xe5ffffff], got write-combining ------------[ cut here ]------------ WARNING: CPU: 0 PID: 3891 at arch/x86/mm/pat.c:1020 untrack_pfn+0x5c/0x9f ... x86/PAT: Xorg:3891 map pfn expected mapping type uncached-minus for [mem 0xe4000000-0xe5ffffff], got write-combining To fix the bug change pat_enabled() so that it returns true only if PAT initialization was actually done. Also, I changed boot_cpu_has(X86_FEATURE_PAT) to this_cpu_has(X86_FEATURE_PAT) in pat_ap_init(), so that we check the PAT feature on the processor that is being initialized. Signed-off-by: Mikulas Patocka Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: stable@vger.kernel.org # v4.2+ Link: http://lkml.kernel.org/r/alpine.LRH.2.02.1704181501450.26399@file01.intranet.prod.int.rdu2.redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/pat.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 9b78685b66e6..83a59a67757a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -65,9 +65,11 @@ static int __init nopat(char *str) } early_param("nopat", nopat); +static bool __read_mostly __pat_initialized = false; + bool pat_enabled(void) { - return !!__pat_enabled; + return __pat_initialized; } EXPORT_SYMBOL_GPL(pat_enabled); @@ -225,13 +227,14 @@ static void pat_bsp_init(u64 pat) } wrmsrl(MSR_IA32_CR_PAT, pat); + __pat_initialized = true; __init_cache_modes(pat); } static void pat_ap_init(u64 pat) { - if (!boot_cpu_has(X86_FEATURE_PAT)) { + if (!this_cpu_has(X86_FEATURE_PAT)) { /* * If this happens we are on a secondary CPU, but switched to * PAT on the boot CPU. We have no way to undo PAT. @@ -306,7 +309,7 @@ void pat_init(void) u64 pat; struct cpuinfo_x86 *c = &boot_cpu_data; - if (!pat_enabled()) { + if (!__pat_enabled) { init_cache_modes(); return; } From fc152d22d6e9fac95a9a990e6c29510bdf1b9425 Mon Sep 17 00:00:00 2001 From: Mateusz Jurczyk Date: Wed, 24 May 2017 15:55:00 +0200 Subject: [PATCH 7/8] x86/alternatives: Prevent uninitialized stack byte read in apply_alternatives() In the current form of the code, if a->replacementlen is 0, the reference to *insnbuf for comparison touches potentially garbage memory. While it doesn't affect the execution flow due to the subsequent a->replacementlen comparison, it is (rightly) detected as use of uninitialized memory by a runtime instrumentation currently under my development, and could be detected as such by other tools in the future, too (e.g. KMSAN). Fix the "false-positive" by reordering the conditions to first check the replacement instruction length before referencing specific opcode bytes. Signed-off-by: Mateusz Jurczyk Reviewed-by: Borislav Petkov Cc: Andy Lutomirski Link: http://lkml.kernel.org/r/20170524135500.27223-1-mjurczyk@google.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/alternative.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index c5b8f760473c..32e14d137416 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -409,8 +409,13 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, memcpy(insnbuf, replacement, a->replacementlen); insnbuf_sz = a->replacementlen; - /* 0xe8 is a relative jump; fix the offset. */ - if (*insnbuf == 0xe8 && a->replacementlen == 5) { + /* + * 0xe8 is a relative jump; fix the offset. + * + * Instruction length is checked before the opcode to avoid + * accessing uninitialized bytes for zero-length replacements. + */ + if (a->replacementlen == 5 && *insnbuf == 0xe8) { *(s32 *)(insnbuf + 1) += replacement - instr; DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx", *(s32 *)(insnbuf + 1), From 702644ec1cab10ffefcebb4060d4da46d4ef2c7f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 24 May 2017 20:04:41 +0200 Subject: [PATCH 8/8] x86/timers: Move simple_udelay_calibration past init_hypervisor_platform This ensures that adjustments to x86_platform done by the hypervisor setup is already respected by this simple calibration. The current user of this, introduced by 1b5aeebf3a92 ("x86/earlyprintk: Add support for earlyprintk via USB3 debug port"), comes much later into play. Fixes: dd759d93f4dd ("x86/timers: Add simple udelay calibration") Signed-off-by: Jan Kiszka Signed-off-by: Thomas Gleixner Acked-by: Lu Baolu Link: http://lkml.kernel.org/r/5e89fe60-aab3-2c1c-aba8-32f8ad376189@siemens.com --- arch/x86/kernel/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 0b4d3c686b1e..f81823695014 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -980,8 +980,6 @@ void __init setup_arch(char **cmdline_p) */ x86_configure_nx(); - simple_udelay_calibration(); - parse_early_param(); #ifdef CONFIG_MEMORY_HOTPLUG @@ -1041,6 +1039,8 @@ void __init setup_arch(char **cmdline_p) */ init_hypervisor_platform(); + simple_udelay_calibration(); + x86_init.resources.probe_roms(); /* after parse_early_param, so could debug it */