x86/entry/64: Convert SYSRET validation tests to C
No change in functionality expected. Signed-off-by: Brian Gerst <brgerst@gmail.com> Signed-off-by: Ingo Molnar <mingo@kernel.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Uros Bizjak <ubizjak@gmail.com> Link: https://lore.kernel.org/r/20231011224351.130935-2-brgerst@gmail.com
This commit is contained in:
parent
bab9fa6dc5
commit
ca282b486a
@ -71,7 +71,8 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr)
|
||||
return false;
|
||||
}
|
||||
|
||||
__visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
|
||||
/* Returns true to return using SYSRET, or false to use IRET */
|
||||
__visible noinstr bool do_syscall_64(struct pt_regs *regs, int nr)
|
||||
{
|
||||
add_random_kstack_offset();
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
@ -85,6 +86,46 @@ __visible noinstr void do_syscall_64(struct pt_regs *regs, int nr)
|
||||
|
||||
instrumentation_end();
|
||||
syscall_exit_to_user_mode(regs);
|
||||
|
||||
/*
|
||||
* Check that the register state is valid for using SYSRET to exit
|
||||
* to userspace. Otherwise use the slower but fully capable IRET
|
||||
* exit path.
|
||||
*/
|
||||
|
||||
/* XEN PV guests always use the IRET path */
|
||||
if (cpu_feature_enabled(X86_FEATURE_XENPV))
|
||||
return false;
|
||||
|
||||
/* SYSRET requires RCX == RIP and R11 == EFLAGS */
|
||||
if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
|
||||
return false;
|
||||
|
||||
/* CS and SS must match the values set in MSR_STAR */
|
||||
if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||
* in kernel space. This essentially lets the user take over
|
||||
* the kernel, since userspace controls RSP.
|
||||
*
|
||||
* Change top bits to match the most significant bit (47th or 56th bit
|
||||
* depending on paging mode) in the address.
|
||||
*/
|
||||
if (unlikely(!__is_canonical_address(regs->ip, __VIRTUAL_MASK_SHIFT + 1)))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* SYSRET cannot restore RF. It can restore TF, but unlike IRET,
|
||||
* restoring TF results in a trap from userspace immediately after
|
||||
* SYSRET.
|
||||
*/
|
||||
if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)))
|
||||
return false;
|
||||
|
||||
/* Use SYSRET to exit to userspace */
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -126,57 +126,8 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
|
||||
* In the Xen PV case we must use iret anyway.
|
||||
*/
|
||||
|
||||
ALTERNATIVE "", "jmp swapgs_restore_regs_and_return_to_usermode", \
|
||||
X86_FEATURE_XENPV
|
||||
|
||||
movq RCX(%rsp), %rcx
|
||||
movq RIP(%rsp), %r11
|
||||
|
||||
cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||
* in kernel space. This essentially lets the user take over
|
||||
* the kernel, since userspace controls RSP.
|
||||
*
|
||||
* If width of "canonical tail" ever becomes variable, this will need
|
||||
* to be updated to remain correct on both old and new CPUs.
|
||||
*
|
||||
* Change top bits to match most significant bit (47th or 56th bit
|
||||
* depending on paging mode) in the address.
|
||||
*/
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
|
||||
"shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
|
||||
#else
|
||||
shl $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
|
||||
sar $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
|
||||
#endif
|
||||
|
||||
/* If this changed %rcx, it was not canonical */
|
||||
cmpq %rcx, %r11
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
movq R11(%rsp), %r11
|
||||
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* SYSRET cannot restore RF. It can restore TF, but unlike IRET,
|
||||
* restoring TF results in a trap from userspace immediately after
|
||||
* SYSRET.
|
||||
*/
|
||||
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
|
||||
jnz swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/* nothing to check for RSP */
|
||||
|
||||
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
ALTERNATIVE "testb %al, %al; jz swapgs_restore_regs_and_return_to_usermode", \
|
||||
"jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
|
||||
|
||||
/*
|
||||
* We win! This label is here just for ease of understanding
|
||||
|
@ -126,7 +126,7 @@ static inline int syscall_get_arch(struct task_struct *task)
|
||||
? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
|
||||
}
|
||||
|
||||
void do_syscall_64(struct pt_regs *regs, int nr);
|
||||
bool do_syscall_64(struct pt_regs *regs, int nr);
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user