x86/entry: Call enter_from_user_mode() with IRQs off

Now that slow-path syscalls always enter C before enabling
interrupts, it's straightforward to call enter_from_user_mode() before
enabling interrupts rather than doing it as part of entry tracing.

With this change, we should finally be able to retire exception_enter().

This will also enable optimizations based on knowing that we never
change context tracking state with interrupts on.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: Frédéric Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/bc376ecf87921a495e874ff98139b1ca2f5c5dd7.1457558566.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
Andy Lutomirski 2016-03-09 13:24:33 -08:00 committed by Ingo Molnar
parent a798f09111
commit 9999c8c01f
2 changed files with 15 additions and 23 deletions

View File

@ -45,6 +45,8 @@ __visible void enter_from_user_mode(void)
CT_WARN_ON(ct_state() != CONTEXT_USER); CT_WARN_ON(ct_state() != CONTEXT_USER);
user_exit(); user_exit();
} }
#else
static inline void enter_from_user_mode(void) {}
#endif #endif
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch) static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
@ -85,17 +87,6 @@ unsigned long syscall_trace_enter_phase1(struct pt_regs *regs, u32 arch)
work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY; work = ACCESS_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY;
#ifdef CONFIG_CONTEXT_TRACKING
/*
* If TIF_NOHZ is set, we are required to call user_exit() before
* doing anything that could touch RCU.
*/
if (work & _TIF_NOHZ) {
enter_from_user_mode();
work &= ~_TIF_NOHZ;
}
#endif
#ifdef CONFIG_SECCOMP #ifdef CONFIG_SECCOMP
/* /*
* Do seccomp first -- it should minimize exposure of other * Do seccomp first -- it should minimize exposure of other
@ -344,6 +335,7 @@ __visible void do_syscall_64(struct pt_regs *regs)
struct thread_info *ti = pt_regs_to_thread_info(regs); struct thread_info *ti = pt_regs_to_thread_info(regs);
unsigned long nr = regs->orig_ax; unsigned long nr = regs->orig_ax;
enter_from_user_mode();
local_irq_enable(); local_irq_enable();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY) if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
@ -366,9 +358,9 @@ __visible void do_syscall_64(struct pt_regs *regs)
#if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION)
/* /*
* Does a 32-bit syscall. Called with IRQs on and does all entry and * Does a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. Does
* exit work and returns with IRQs off. This function is extremely hot * all entry and exit work and returns with IRQs off. This function is
* in workloads that use it, and it's usually called from * extremely hot in workloads that use it, and it's usually called from
* do_fast_syscall_32, so forcibly inline it to improve performance. * do_fast_syscall_32, so forcibly inline it to improve performance.
*/ */
static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs) static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
@ -409,6 +401,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
/* Handles int $0x80 */ /* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs) __visible void do_int80_syscall_32(struct pt_regs *regs)
{ {
enter_from_user_mode();
local_irq_enable(); local_irq_enable();
do_syscall_32_irqs_on(regs); do_syscall_32_irqs_on(regs);
} }
@ -431,12 +424,11 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/ */
regs->ip = landing_pad; regs->ip = landing_pad;
/* enter_from_user_mode();
* Fetch EBP from where the vDSO stashed it.
*
* WARNING: We are in CONTEXT_USER and RCU isn't paying attention!
*/
local_irq_enable(); local_irq_enable();
/* Fetch EBP from where the vDSO stashed it. */
if ( if (
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* /*
@ -454,9 +446,6 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */ /* User code screwed up. */
local_irq_disable(); local_irq_disable();
regs->ax = -EFAULT; regs->ax = -EFAULT;
#ifdef CONFIG_CONTEXT_TRACKING
enter_from_user_mode();
#endif
prepare_exit_to_usermode(regs); prepare_exit_to_usermode(regs);
return 0; /* Keep it simple: use IRET. */ return 0; /* Keep it simple: use IRET. */
} }

View File

@ -134,7 +134,10 @@ struct thread_info {
#define _TIF_ADDR32 (1 << TIF_ADDR32) #define _TIF_ADDR32 (1 << TIF_ADDR32)
#define _TIF_X32 (1 << TIF_X32) #define _TIF_X32 (1 << TIF_X32)
/* work to do in syscall_trace_enter() */ /*
* work to do in syscall_trace_enter(). Also includes TIF_NOHZ for
* enter_from_user_mode()
*/
#define _TIF_WORK_SYSCALL_ENTRY \ #define _TIF_WORK_SYSCALL_ENTRY \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \ _TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \