To address the Intel SKL RSB underflow issue in software it's required to do call depth tracking. Provide a return thunk for call depth tracking on Intel SKL CPUs. The tracking does not use a counter. It uses uses arithmetic shift right on call entry and logical shift left on return. The depth tracking variable is initialized to 0x8000.... when the call depth is zero. The arithmetic shift right sign extends the MSB and saturates after the 12th call. The shift count is 5 so the tracking covers 12 nested calls. On return the variable is shifted left logically so it becomes zero again. CALL RET 0: 0x8000000000000000 0x0000000000000000 1: 0xfc00000000000000 0xf000000000000000 ... 11: 0xfffffffffffffff8 0xfffffffffffffc00 12: 0xffffffffffffffff 0xffffffffffffffe0 After a return buffer fill the depth is credited 12 calls before the next stuffing has to take place. There is a inaccuracy for situations like this: 10 calls 5 returns 3 calls 4 returns 3 calls .... The shift count might cause this to be off by one in either direction, but there is still a cushion vs. the RSB depth. The algorithm does not claim to be perfect, but it should obfuscate the problem enough to make exploitation extremly difficult. The theory behind this is: RSB is a stack with depth 16 which is filled on every call. On the return path speculation "pops" entries to speculate down the call chain. Once the speculative RSB is empty it switches to other predictors, e.g. the Branch History Buffer, which can be mistrained by user space and misguide the speculation path to a gadget. Call depth tracking is designed to break this speculation path by stuffing speculation trap calls into the RSB which are never getting a corresponding return executed. This stalls the prediction path until it gets resteered, The assumption is that stuffing at the 12th return is sufficient to break the speculation before it hits the underflow and the fallback to the other predictors. Testing confirms that it works. Johannes, one of the retbleed researchers. tried to attack this approach but failed. There is obviously no scientific proof that this will withstand future research progress, but all we can do right now is to speculate about it. The SAR/SHL usage was suggested by Andi Kleen. Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20220915111147.890071690@infradead.org
233 lines
5.0 KiB
ArmAsm
233 lines
5.0 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#include <linux/linkage.h>
|
|
#include <asm/asm.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/bitsperlong.h>
|
|
#include <asm/kvm_vcpu_regs.h>
|
|
#include <asm/nospec-branch.h>
|
|
|
|
#define WORD_SIZE (BITS_PER_LONG / 8)
|
|
|
|
/* Intentionally omit RAX as it's context switched by hardware */
|
|
#define VCPU_RCX __VCPU_REGS_RCX * WORD_SIZE
|
|
#define VCPU_RDX __VCPU_REGS_RDX * WORD_SIZE
|
|
#define VCPU_RBX __VCPU_REGS_RBX * WORD_SIZE
|
|
/* Intentionally omit RSP as it's context switched by hardware */
|
|
#define VCPU_RBP __VCPU_REGS_RBP * WORD_SIZE
|
|
#define VCPU_RSI __VCPU_REGS_RSI * WORD_SIZE
|
|
#define VCPU_RDI __VCPU_REGS_RDI * WORD_SIZE
|
|
|
|
#ifdef CONFIG_X86_64
|
|
#define VCPU_R8 __VCPU_REGS_R8 * WORD_SIZE
|
|
#define VCPU_R9 __VCPU_REGS_R9 * WORD_SIZE
|
|
#define VCPU_R10 __VCPU_REGS_R10 * WORD_SIZE
|
|
#define VCPU_R11 __VCPU_REGS_R11 * WORD_SIZE
|
|
#define VCPU_R12 __VCPU_REGS_R12 * WORD_SIZE
|
|
#define VCPU_R13 __VCPU_REGS_R13 * WORD_SIZE
|
|
#define VCPU_R14 __VCPU_REGS_R14 * WORD_SIZE
|
|
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
|
|
#endif
|
|
|
|
.section .noinstr.text, "ax"
|
|
|
|
/**
|
|
* __svm_vcpu_run - Run a vCPU via a transition to SVM guest mode
|
|
* @vmcb_pa: unsigned long
|
|
* @regs: unsigned long * (to guest registers)
|
|
*/
|
|
SYM_FUNC_START(__svm_vcpu_run)
|
|
push %_ASM_BP
|
|
#ifdef CONFIG_X86_64
|
|
push %r15
|
|
push %r14
|
|
push %r13
|
|
push %r12
|
|
#else
|
|
push %edi
|
|
push %esi
|
|
#endif
|
|
push %_ASM_BX
|
|
|
|
/* Save @regs. */
|
|
push %_ASM_ARG2
|
|
|
|
/* Save @vmcb. */
|
|
push %_ASM_ARG1
|
|
|
|
/* Move @regs to RAX. */
|
|
mov %_ASM_ARG2, %_ASM_AX
|
|
|
|
/* Load guest registers. */
|
|
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
|
|
mov VCPU_RDX(%_ASM_AX), %_ASM_DX
|
|
mov VCPU_RBX(%_ASM_AX), %_ASM_BX
|
|
mov VCPU_RBP(%_ASM_AX), %_ASM_BP
|
|
mov VCPU_RSI(%_ASM_AX), %_ASM_SI
|
|
mov VCPU_RDI(%_ASM_AX), %_ASM_DI
|
|
#ifdef CONFIG_X86_64
|
|
mov VCPU_R8 (%_ASM_AX), %r8
|
|
mov VCPU_R9 (%_ASM_AX), %r9
|
|
mov VCPU_R10(%_ASM_AX), %r10
|
|
mov VCPU_R11(%_ASM_AX), %r11
|
|
mov VCPU_R12(%_ASM_AX), %r12
|
|
mov VCPU_R13(%_ASM_AX), %r13
|
|
mov VCPU_R14(%_ASM_AX), %r14
|
|
mov VCPU_R15(%_ASM_AX), %r15
|
|
#endif
|
|
|
|
/* "POP" @vmcb to RAX. */
|
|
pop %_ASM_AX
|
|
|
|
/* Enter guest mode */
|
|
sti
|
|
|
|
1: vmrun %_ASM_AX
|
|
|
|
2: cli
|
|
|
|
#ifdef CONFIG_RETPOLINE
|
|
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
|
|
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
|
|
#endif
|
|
|
|
/* "POP" @regs to RAX. */
|
|
pop %_ASM_AX
|
|
|
|
/* Save all guest registers. */
|
|
mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
|
|
mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
|
|
mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
|
|
mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
|
|
mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
|
|
mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
|
|
#ifdef CONFIG_X86_64
|
|
mov %r8, VCPU_R8 (%_ASM_AX)
|
|
mov %r9, VCPU_R9 (%_ASM_AX)
|
|
mov %r10, VCPU_R10(%_ASM_AX)
|
|
mov %r11, VCPU_R11(%_ASM_AX)
|
|
mov %r12, VCPU_R12(%_ASM_AX)
|
|
mov %r13, VCPU_R13(%_ASM_AX)
|
|
mov %r14, VCPU_R14(%_ASM_AX)
|
|
mov %r15, VCPU_R15(%_ASM_AX)
|
|
#endif
|
|
|
|
/*
|
|
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
|
|
* untrained as soon as we exit the VM and are back to the
|
|
* kernel. This should be done before re-enabling interrupts
|
|
* because interrupt handlers won't sanitize 'ret' if the return is
|
|
* from the kernel.
|
|
*/
|
|
UNTRAIN_RET
|
|
|
|
/*
|
|
* Clear all general purpose registers except RSP and RAX to prevent
|
|
* speculative use of the guest's values, even those that are reloaded
|
|
* via the stack. In theory, an L1 cache miss when restoring registers
|
|
* could lead to speculative execution with the guest's values.
|
|
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
|
|
* free. RSP and RAX are exempt as they are restored by hardware
|
|
* during VM-Exit.
|
|
*/
|
|
xor %ecx, %ecx
|
|
xor %edx, %edx
|
|
xor %ebx, %ebx
|
|
xor %ebp, %ebp
|
|
xor %esi, %esi
|
|
xor %edi, %edi
|
|
#ifdef CONFIG_X86_64
|
|
xor %r8d, %r8d
|
|
xor %r9d, %r9d
|
|
xor %r10d, %r10d
|
|
xor %r11d, %r11d
|
|
xor %r12d, %r12d
|
|
xor %r13d, %r13d
|
|
xor %r14d, %r14d
|
|
xor %r15d, %r15d
|
|
#endif
|
|
|
|
pop %_ASM_BX
|
|
|
|
#ifdef CONFIG_X86_64
|
|
pop %r12
|
|
pop %r13
|
|
pop %r14
|
|
pop %r15
|
|
#else
|
|
pop %esi
|
|
pop %edi
|
|
#endif
|
|
pop %_ASM_BP
|
|
RET
|
|
|
|
3: cmpb $0, kvm_rebooting
|
|
jne 2b
|
|
ud2
|
|
|
|
_ASM_EXTABLE(1b, 3b)
|
|
|
|
SYM_FUNC_END(__svm_vcpu_run)
|
|
|
|
/**
|
|
* __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode
|
|
* @vmcb_pa: unsigned long
|
|
*/
|
|
SYM_FUNC_START(__svm_sev_es_vcpu_run)
|
|
push %_ASM_BP
|
|
#ifdef CONFIG_X86_64
|
|
push %r15
|
|
push %r14
|
|
push %r13
|
|
push %r12
|
|
#else
|
|
push %edi
|
|
push %esi
|
|
#endif
|
|
push %_ASM_BX
|
|
|
|
/* Move @vmcb to RAX. */
|
|
mov %_ASM_ARG1, %_ASM_AX
|
|
|
|
/* Enter guest mode */
|
|
sti
|
|
|
|
1: vmrun %_ASM_AX
|
|
|
|
2: cli
|
|
|
|
#ifdef CONFIG_RETPOLINE
|
|
/* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
|
|
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
|
|
#endif
|
|
|
|
/*
|
|
* Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
|
|
* untrained as soon as we exit the VM and are back to the
|
|
* kernel. This should be done before re-enabling interrupts
|
|
* because interrupt handlers won't sanitize RET if the return is
|
|
* from the kernel.
|
|
*/
|
|
UNTRAIN_RET
|
|
|
|
pop %_ASM_BX
|
|
|
|
#ifdef CONFIG_X86_64
|
|
pop %r12
|
|
pop %r13
|
|
pop %r14
|
|
pop %r15
|
|
#else
|
|
pop %esi
|
|
pop %edi
|
|
#endif
|
|
pop %_ASM_BP
|
|
RET
|
|
|
|
3: cmpb $0, kvm_rebooting
|
|
jne 2b
|
|
ud2
|
|
|
|
_ASM_EXTABLE(1b, 3b)
|
|
|
|
SYM_FUNC_END(__svm_sev_es_vcpu_run)
|