x86/entry/32: Introduce and use X86_BUG_ESPFIX instead of paravirt_enabled
x86_64 has very clean espfix handling on paravirt: espfix64 is set up in native_iret, so paravirt systems that override iret bypass espfix64 automatically. This is robust and straightforward. x86_32 is messier. espfix is set up before the IRET paravirt patch point, so it can't be directly conditionalized on whether we use native_iret. We also can't easily move it into native_iret without regressing performance due to a bizarre consideration. Specifically, on 64-bit kernels, the logic is: if (regs->ss & 0x4) setup_espfix; On 32-bit kernels, the logic is: if ((regs->ss & 0x4) && (regs->cs & 0x3) == 3 && (regs->flags & X86_EFLAGS_VM) == 0) setup_espfix; The performance of setup_espfix itself is essentially irrelevant, but the comparison happens on every IRET so its performance matters. On x86_64, there's no need for any registers except flags to implement the comparison, so we fold the whole thing into native_iret. On x86_32, we don't do that because we need a free register to implement the comparison efficiently. We therefore do espfix setup before restoring registers on x86_32. This patch gets rid of the explicit paravirt_enabled check by introducing X86_BUG_ESPFIX on 32-bit systems and using an ALTERNATIVE to skip espfix on paravirt systems where iret != native_iret. This is also messy, but it's at least in line with other things we do. This improves espfix performance by removing a branch, but no one cares. More importantly, it removes a paravirt_enabled user, which is good because paravirt_enabled is ill-defined and is going away. Signed-off-by: Andy Lutomirski <luto@kernel.org> Reviewed-by: Borislav Petkov <bp@suse.de> Cc: Andrew Cooper <andrew.cooper3@citrix.com> Cc: Andy Lutomirski <luto@amacapital.net> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Luis R. Rodriguez <mcgrof@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: boris.ostrovsky@oracle.com Cc: david.vrabel@citrix.com Cc: konrad.wilk@oracle.com Cc: lguest@lists.ozlabs.org Cc: xen-devel@lists.xensource.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
ec87e1cf7d
commit
58a5aac533
@ -361,6 +361,8 @@ restore_all:
|
||||
TRACE_IRQS_IRET
|
||||
restore_all_notrace:
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
ALTERNATIVE "jmp restore_nocheck", "", X86_BUG_ESPFIX
|
||||
|
||||
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
|
||||
/*
|
||||
* Warning: PT_OLDSS(%esp) contains the wrong/random values if we
|
||||
@ -387,19 +389,6 @@ ENTRY(iret_exc )
|
||||
|
||||
#ifdef CONFIG_X86_ESPFIX32
|
||||
ldt_ss:
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
/*
|
||||
* The kernel can't run on a non-flat stack if paravirt mode
|
||||
* is active. Rather than try to fixup the high bits of
|
||||
* ESP, bypass this code entirely. This may break DOSemu
|
||||
* and/or Wine support in a paravirt VM, although the option
|
||||
* is still available to implement the setting of the high
|
||||
* 16-bits in the INTERRUPT_RETURN paravirt-op.
|
||||
*/
|
||||
cmpl $0, pv_info+PARAVIRT_enabled
|
||||
jne restore_nocheck
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Setup and switch to ESPFIX stack
|
||||
*
|
||||
|
@ -286,4 +286,12 @@
|
||||
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
|
||||
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
|
||||
* to avoid confusion.
|
||||
*/
|
||||
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
|
||||
#endif
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
@ -802,6 +802,31 @@ static void detect_nopl(struct cpuinfo_x86 *c)
|
||||
clear_cpu_cap(c, X86_FEATURE_NOPL);
|
||||
#else
|
||||
set_cpu_cap(c, X86_FEATURE_NOPL);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
|
||||
* systems that run Linux at CPL > 0 may or may not have the
|
||||
* issue, but, even if they have the issue, there's absolutely
|
||||
* nothing we can do about it because we can't use the real IRET
|
||||
* instruction.
|
||||
*
|
||||
* NB: For the time being, only 32-bit kernels support
|
||||
* X86_BUG_ESPFIX as such. 64-bit kernels directly choose
|
||||
* whether to apply espfix using paravirt hooks. If any
|
||||
* non-paravirt system ever shows up that does *not* have the
|
||||
* ESPFIX issue, we can change this.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
do {
|
||||
extern void native_iret(void);
|
||||
if (pv_cpu_ops.iret == native_iret)
|
||||
set_cpu_bug(c, X86_BUG_ESPFIX);
|
||||
} while (0);
|
||||
#else
|
||||
set_cpu_bug(c, X86_BUG_ESPFIX);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user