- Fix a couple of SWAPGS fencing issues in the x86 entry code
- Use the proper operand types in __{get,put}_user() to prevent truncation in SEV-ES string io - Make sure the kernel mappings are present in trampoline_pgd in order to prevent any potential accesses to unmapped memory after switching to it - Fix a trivial list corruption in objtool's pv_ops validation - Disable the clocksource watchdog for TSC on platforms which claim that the TSC is constant, doesn't stop in sleep states, CPU has TSC adjust and the number of sockets of the platform are max 2, to prevent erroneous markings of the TSC as unstable. - Make sure TSC adjust is always checked not only when going idle - Prevent a stack leak by initializing struct _fpx_sw_bytes properly in the FPU code - Fix INTEL_FAM6_RAPTORLAKE define naming to adhere to the convention -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAmGsnWcACgkQEsHwGGHe VUoR+g/9FcOP0/XLH+LKHumYc9JHXsp5BvYGihyypMFgU0fXQBORtGqdls8jZtiJ kEdbW6iL0MRlyN8aHJCqr7dJqs7KJlpWes6hky7BY+U+7uewtjL5y3eSyZnA34T3 M/Raecx27Hh0L0kHQlHXTUN73v1cgDvq3dCXWsP7Jqgjf5cEmCcV/tPEateqhq/f 8TkLVIm55rJlbJ0LBO/cT0V3Q8QH9JPKm7nviOZuKCh9gcttFEPaM9MkaJyKUhoy O13jlenDoVkVWRXIQec1EZp2pTLxVAm/3Y0plge1yEVsejzh07gsQnMpoNeF+yFC 8mDgSv8ZAED/vbsnB+BcgoRVj6ajG0+ilpLzcfPwUquiqS9pZrBSTddlvYDPjRMC MEXO548xiYgxmipu3r62H89nqmLEYQPk914rJu6bDnDeJ1gaabh8RXbNtQcRqqj3 RETgVOp78iWn+aT33RLLD1EyodZb2IkMy087a3+TZICIXG81aDj9VgHvrVRnWnfY yKuldyrEKzi60yMQkV6h1oc8KSWQhspUSLtOVS9zrulCinYphFOfYFrzFmcKUWIq GdVb9eaP2oNBGfPybXP+TBLGZ4Zv9iXZmaEUk7ZGCjgv3ZmGMWJ18Hs/ufs2cwWK RNNUo3sz/y3OsreHowkWIk1eSxI16MabB7G/PDMnBSHlioVT390= =d6nS -----END PGP SIGNATURE----- Merge tag 'x86_urgent_for_v5.16_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 fixes from Borislav Petkov: - Fix a couple of SWAPGS fencing issues in the x86 entry code - Use the proper operand types in __{get,put}_user() to prevent truncation in SEV-ES string io - Make sure the kernel mappings are present in trampoline_pgd in order to prevent any potential accesses to unmapped memory after switching to it - Fix a trivial list corruption in objtool's pv_ops validation - Disable the clocksource watchdog for TSC on platforms which claim that the TSC is constant, doesn't stop in sleep states, CPU has TSC adjust and the number of sockets of the platform are max 2, to prevent erroneous markings of the TSC as unstable. - Make sure TSC adjust is always checked not only when going idle - Prevent a stack leak by initializing struct _fpx_sw_bytes properly in the FPU code - Fix INTEL_FAM6_RAPTORLAKE define naming to adhere to the convention * tag 'x86_urgent_for_v5.16_rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/xen: Add xenpv_restore_regs_and_return_to_usermode() x86/entry: Use the correct fence macro after swapgs in kernel CR3 x86/entry: Add a fence for kernel entry SWAPGS in paranoid_entry() x86/sev: Fix SEV-ES INS/OUTS instructions for word, dword, and qword x86/64/mm: Map all kernel memory into trampoline_pgd objtool: Fix pv_ops noinstr validation x86/tsc: Disable clocksource watchdog for TSC on qualified platorms x86/tsc: Add a timer to make sure TSC_adjust is always checked x86/fpu/signal: Initialize sw_bytes in save_xstate_epilog() x86/cpu: Drop spurious underscore from RAPTOR_LAKE #define
This commit is contained in:
commit
f5d54a42d3
@ -574,6 +574,10 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
#ifdef CONFIG_XEN_PV
|
||||
ALTERNATIVE "", "jmp xenpv_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV
|
||||
#endif
|
||||
|
||||
POP_REGS pop_rdi=0
|
||||
|
||||
/*
|
||||
@ -890,6 +894,7 @@ SYM_CODE_START_LOCAL(paranoid_entry)
|
||||
.Lparanoid_entry_checkgs:
|
||||
/* EBX = 1 -> kernel GSBASE active, no restore required */
|
||||
movl $1, %ebx
|
||||
|
||||
/*
|
||||
* The kernel-enforced convention is a negative GSBASE indicates
|
||||
* a kernel value. No SWAPGS needed on entry and exit.
|
||||
@ -897,21 +902,14 @@ SYM_CODE_START_LOCAL(paranoid_entry)
|
||||
movl $MSR_GS_BASE, %ecx
|
||||
rdmsr
|
||||
testl %edx, %edx
|
||||
jns .Lparanoid_entry_swapgs
|
||||
ret
|
||||
|
||||
.Lparanoid_entry_swapgs:
|
||||
swapgs
|
||||
|
||||
/*
|
||||
* The above SAVE_AND_SWITCH_TO_KERNEL_CR3 macro doesn't do an
|
||||
* unconditional CR3 write, even in the PTI case. So do an lfence
|
||||
* to prevent GS speculation, regardless of whether PTI is enabled.
|
||||
*/
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
js .Lparanoid_kernel_gsbase
|
||||
|
||||
/* EBX = 0 -> SWAPGS required on exit */
|
||||
xorl %ebx, %ebx
|
||||
swapgs
|
||||
.Lparanoid_kernel_gsbase:
|
||||
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
ret
|
||||
SYM_CODE_END(paranoid_entry)
|
||||
|
||||
@ -993,11 +991,6 @@ SYM_CODE_START_LOCAL(error_entry)
|
||||
pushq %r12
|
||||
ret
|
||||
|
||||
.Lerror_entry_done_lfence:
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
.Lerror_entry_done:
|
||||
ret
|
||||
|
||||
/*
|
||||
* There are two places in the kernel that can potentially fault with
|
||||
* usergs. Handle them here. B stepping K8s sometimes report a
|
||||
@ -1020,8 +1013,14 @@ SYM_CODE_START_LOCAL(error_entry)
|
||||
* .Lgs_change's error handler with kernel gsbase.
|
||||
*/
|
||||
SWAPGS
|
||||
FENCE_SWAPGS_USER_ENTRY
|
||||
jmp .Lerror_entry_done
|
||||
|
||||
/*
|
||||
* Issue an LFENCE to prevent GS speculation, regardless of whether it is a
|
||||
* kernel or user gsbase.
|
||||
*/
|
||||
.Lerror_entry_done_lfence:
|
||||
FENCE_SWAPGS_KERNEL_ENTRY
|
||||
ret
|
||||
|
||||
.Lbstep_iret:
|
||||
/* Fix truncated RIP */
|
||||
|
@ -108,7 +108,7 @@
|
||||
#define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
|
||||
#define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
|
||||
|
||||
#define INTEL_FAM6_RAPTOR_LAKE 0xB7
|
||||
#define INTEL_FAM6_RAPTORLAKE 0xB7
|
||||
|
||||
/* "Small Core" Processors (Atom) */
|
||||
|
||||
|
@ -118,7 +118,7 @@ static inline bool save_xstate_epilog(void __user *buf, int ia32_frame,
|
||||
struct fpstate *fpstate)
|
||||
{
|
||||
struct xregs_state __user *x = buf;
|
||||
struct _fpx_sw_bytes sw_bytes;
|
||||
struct _fpx_sw_bytes sw_bytes = {};
|
||||
u32 xfeatures;
|
||||
int err;
|
||||
|
||||
|
@ -294,11 +294,6 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
|
||||
char *dst, char *buf, size_t size)
|
||||
{
|
||||
unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
|
||||
char __user *target = (char __user *)dst;
|
||||
u64 d8;
|
||||
u32 d4;
|
||||
u16 d2;
|
||||
u8 d1;
|
||||
|
||||
/*
|
||||
* This function uses __put_user() independent of whether kernel or user
|
||||
@ -320,26 +315,42 @@ static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
|
||||
* instructions here would cause infinite nesting.
|
||||
*/
|
||||
switch (size) {
|
||||
case 1:
|
||||
case 1: {
|
||||
u8 d1;
|
||||
u8 __user *target = (u8 __user *)dst;
|
||||
|
||||
memcpy(&d1, buf, 1);
|
||||
if (__put_user(d1, target))
|
||||
goto fault;
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
case 2: {
|
||||
u16 d2;
|
||||
u16 __user *target = (u16 __user *)dst;
|
||||
|
||||
memcpy(&d2, buf, 2);
|
||||
if (__put_user(d2, target))
|
||||
goto fault;
|
||||
break;
|
||||
case 4:
|
||||
}
|
||||
case 4: {
|
||||
u32 d4;
|
||||
u32 __user *target = (u32 __user *)dst;
|
||||
|
||||
memcpy(&d4, buf, 4);
|
||||
if (__put_user(d4, target))
|
||||
goto fault;
|
||||
break;
|
||||
case 8:
|
||||
}
|
||||
case 8: {
|
||||
u64 d8;
|
||||
u64 __user *target = (u64 __user *)dst;
|
||||
|
||||
memcpy(&d8, buf, 8);
|
||||
if (__put_user(d8, target))
|
||||
goto fault;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
|
||||
return ES_UNSUPPORTED;
|
||||
@ -362,11 +373,6 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
|
||||
char *src, char *buf, size_t size)
|
||||
{
|
||||
unsigned long error_code = X86_PF_PROT;
|
||||
char __user *s = (char __user *)src;
|
||||
u64 d8;
|
||||
u32 d4;
|
||||
u16 d2;
|
||||
u8 d1;
|
||||
|
||||
/*
|
||||
* This function uses __get_user() independent of whether kernel or user
|
||||
@ -388,26 +394,41 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
|
||||
* instructions here would cause infinite nesting.
|
||||
*/
|
||||
switch (size) {
|
||||
case 1:
|
||||
case 1: {
|
||||
u8 d1;
|
||||
u8 __user *s = (u8 __user *)src;
|
||||
|
||||
if (__get_user(d1, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d1, 1);
|
||||
break;
|
||||
case 2:
|
||||
}
|
||||
case 2: {
|
||||
u16 d2;
|
||||
u16 __user *s = (u16 __user *)src;
|
||||
|
||||
if (__get_user(d2, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d2, 2);
|
||||
break;
|
||||
case 4:
|
||||
}
|
||||
case 4: {
|
||||
u32 d4;
|
||||
u32 __user *s = (u32 __user *)src;
|
||||
|
||||
if (__get_user(d4, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d4, 4);
|
||||
break;
|
||||
case 8:
|
||||
}
|
||||
case 8: {
|
||||
u64 d8;
|
||||
u64 __user *s = (u64 __user *)src;
|
||||
if (__get_user(d8, s))
|
||||
goto fault;
|
||||
memcpy(buf, &d8, 8);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
|
||||
return ES_UNSUPPORTED;
|
||||
|
@ -1180,6 +1180,12 @@ void mark_tsc_unstable(char *reason)
|
||||
|
||||
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
|
||||
|
||||
static void __init tsc_disable_clocksource_watchdog(void)
|
||||
{
|
||||
clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
}
|
||||
|
||||
static void __init check_system_tsc_reliable(void)
|
||||
{
|
||||
#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC)
|
||||
@ -1196,6 +1202,23 @@ static void __init check_system_tsc_reliable(void)
|
||||
#endif
|
||||
if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE))
|
||||
tsc_clocksource_reliable = 1;
|
||||
|
||||
/*
|
||||
* Disable the clocksource watchdog when the system has:
|
||||
* - TSC running at constant frequency
|
||||
* - TSC which does not stop in C-States
|
||||
* - the TSC_ADJUST register which allows to detect even minimal
|
||||
* modifications
|
||||
* - not more than two sockets. As the number of sockets cannot be
|
||||
* evaluated at the early boot stage where this has to be
|
||||
* invoked, check the number of online memory nodes as a
|
||||
* fallback solution which is an reasonable estimate.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
|
||||
boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
|
||||
boot_cpu_has(X86_FEATURE_TSC_ADJUST) &&
|
||||
nr_online_nodes <= 2)
|
||||
tsc_disable_clocksource_watchdog();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1387,9 +1410,6 @@ static int __init init_tsc_clocksource(void)
|
||||
if (tsc_unstable)
|
||||
goto unreg;
|
||||
|
||||
if (tsc_clocksource_reliable || no_tsc_watchdog)
|
||||
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
|
||||
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
|
||||
|
||||
@ -1527,7 +1547,7 @@ void __init tsc_init(void)
|
||||
}
|
||||
|
||||
if (tsc_clocksource_reliable || no_tsc_watchdog)
|
||||
clocksource_tsc_early.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
|
||||
tsc_disable_clocksource_watchdog();
|
||||
|
||||
clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
|
||||
detect_art();
|
||||
|
@ -30,6 +30,7 @@ struct tsc_adjust {
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
|
||||
static struct timer_list tsc_sync_check_timer;
|
||||
|
||||
/*
|
||||
* TSC's on different sockets may be reset asynchronously.
|
||||
@ -77,6 +78,46 @@ void tsc_verify_tsc_adjust(bool resume)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Normally the tsc_sync will be checked every time system enters idle
|
||||
* state, but there is still caveat that a system won't enter idle,
|
||||
* either because it's too busy or configured purposely to not enter
|
||||
* idle.
|
||||
*
|
||||
* So setup a periodic timer (every 10 minutes) to make sure the check
|
||||
* is always on.
|
||||
*/
|
||||
|
||||
#define SYNC_CHECK_INTERVAL (HZ * 600)
|
||||
|
||||
static void tsc_sync_check_timer_fn(struct timer_list *unused)
|
||||
{
|
||||
int next_cpu;
|
||||
|
||||
tsc_verify_tsc_adjust(false);
|
||||
|
||||
/* Run the check for all onlined CPUs in turn */
|
||||
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
|
||||
if (next_cpu >= nr_cpu_ids)
|
||||
next_cpu = cpumask_first(cpu_online_mask);
|
||||
|
||||
tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL;
|
||||
add_timer_on(&tsc_sync_check_timer, next_cpu);
|
||||
}
|
||||
|
||||
static int __init start_sync_check_timer(void)
|
||||
{
|
||||
if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable)
|
||||
return 0;
|
||||
|
||||
timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0);
|
||||
tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL;
|
||||
add_timer(&tsc_sync_check_timer);
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(start_sync_check_timer);
|
||||
|
||||
static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
|
||||
unsigned int cpu, bool bootcpu)
|
||||
{
|
||||
|
@ -72,6 +72,7 @@ static void __init setup_real_mode(void)
|
||||
#ifdef CONFIG_X86_64
|
||||
u64 *trampoline_pgd;
|
||||
u64 efer;
|
||||
int i;
|
||||
#endif
|
||||
|
||||
base = (unsigned char *)real_mode_header;
|
||||
@ -128,8 +129,17 @@ static void __init setup_real_mode(void)
|
||||
trampoline_header->flags = 0;
|
||||
|
||||
trampoline_pgd = (u64 *) __va(real_mode_header->trampoline_pgd);
|
||||
|
||||
/* Map the real mode stub as virtual == physical */
|
||||
trampoline_pgd[0] = trampoline_pgd_entry.pgd;
|
||||
trampoline_pgd[511] = init_top_pgt[511].pgd;
|
||||
|
||||
/*
|
||||
* Include the entirety of the kernel mapping into the trampoline
|
||||
* PGD. This way, all mappings present in the normal kernel page
|
||||
* tables are usable while running on trampoline_pgd.
|
||||
*/
|
||||
for (i = pgd_index(__PAGE_OFFSET); i < PTRS_PER_PGD; i++)
|
||||
trampoline_pgd[i] = init_top_pgt[i].pgd;
|
||||
#endif
|
||||
|
||||
sme_sev_setup_real_mode(trampoline_header);
|
||||
|
@ -20,6 +20,7 @@
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/linkage.h>
|
||||
#include <../entry/calling.h>
|
||||
|
||||
.pushsection .noinstr.text, "ax"
|
||||
/*
|
||||
@ -192,6 +193,25 @@ SYM_CODE_START(xen_iret)
|
||||
jmp hypercall_iret
|
||||
SYM_CODE_END(xen_iret)
|
||||
|
||||
/*
|
||||
* XEN pv doesn't use trampoline stack, PER_CPU_VAR(cpu_tss_rw + TSS_sp0) is
|
||||
* also the kernel stack. Reusing swapgs_restore_regs_and_return_to_usermode()
|
||||
* in XEN pv would cause %rsp to move up to the top of the kernel stack and
|
||||
* leave the IRET frame below %rsp, which is dangerous to be corrupted if #NMI
|
||||
* interrupts. And swapgs_restore_regs_and_return_to_usermode() pushing the IRET
|
||||
* frame at the same address is useless.
|
||||
*/
|
||||
SYM_CODE_START(xenpv_restore_regs_and_return_to_usermode)
|
||||
UNWIND_HINT_REGS
|
||||
POP_REGS
|
||||
|
||||
/* stackleak_erase() can work safely on the kernel stack. */
|
||||
STACKLEAK_ERASE_NOCLOBBER
|
||||
|
||||
addq $8, %rsp /* skip regs->orig_ax */
|
||||
jmp xen_iret
|
||||
SYM_CODE_END(xenpv_restore_regs_and_return_to_usermode)
|
||||
|
||||
/*
|
||||
* Xen handles syscall callbacks much like ordinary exceptions, which
|
||||
* means we have:
|
||||
|
@ -375,6 +375,7 @@ static int read_symbols(struct elf *elf)
|
||||
return -1;
|
||||
}
|
||||
memset(sym, 0, sizeof(*sym));
|
||||
INIT_LIST_HEAD(&sym->pv_target);
|
||||
sym->alias = sym;
|
||||
|
||||
sym->idx = i;
|
||||
|
@ -153,6 +153,10 @@ void objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
|
||||
!strcmp(func->name, "_paravirt_ident_64"))
|
||||
return;
|
||||
|
||||
/* already added this function */
|
||||
if (!list_empty(&func->pv_target))
|
||||
return;
|
||||
|
||||
list_add(&func->pv_target, &f->pv_ops[idx].targets);
|
||||
f->pv_ops[idx].clean = false;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user