A single update for the x86 entry code:
The current CR3 handling for kernel page table isolation in the paranoid return paths which are relevant for #NMI, #MCE, #VC, #DB and #DF is unconditionally writing CR3 with the value retrieved on exception entry. In the vast majority of cases when returning to the kernel this is a pointless exercise because CR3 was not modified on exception entry. The only situation where this is necessary is when the exception interrupts a entry from user before switching to kernel CR3 or interrupts an exit to user after switching back to user CR3. As CR3 writes can be expensive on some systems this becomes measurable overhead with high frequency #NMIs such as perf. Avoid this overhead by checking the CR3 value, which was saved on entry, and write it back to CR3 only when it us a user CR3. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmXvTXYTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYoYMED/40YXFa0si5/9LRh/LSYglxVe/RaXCn 3oU19oWFRxdHCCLYHeQdlQGrpugM773X+4EC1dE92QpYjFnuLhl5H10h3t2e+3Uw Q2VoWEo95FuJ2v7nqex7p2pglOvNjT2VBBlcFFdhqxiC1FCupXvU17nCcLeBsPkj wbY2Sq4DxPDoWhWMNK2jhCQNVyYYluJERylS5+j0CK8vhQghq1N1WjcB6tQiAYsa 7nXz2ZJeGF0jnvLanyhAVSHDKU7QOMO3zkQpaaMlGQ9izawupe5/Gbi8ouFieCh+ xoLnGo1sgtMOXInnYaJnCiwuc+WiVN3d83aO/s7NZi8ZF60ib72xhzsRip2Cu4aV kBtJaCVLFItQZ81HRSBABj6s9MLphHVm4AaOCvCIxK0ib5KDFaWy3tZpwTU4dvwX rcwKsQrSLlOOD5zqO5dZn+HX6hK2lsNeTPLfcKVqARGn5S9fITzYbUMlkhO/FGaj ZhIgadH8+rXwFDbgS6CGbVYKtM6Ncf/VBGFfE7tEOUQVUmLws3pdLiWo6I2QTGtw fCAeF9uYmvhtiKk0e2jotZdbAg6HP2XTQSZfBxQpRgY6AnYW+XyDezcN0X1eNMJC lmNC72WYxURHZUoOIxiiVzDS9kz7YTUo3pBHFrpQlNqGTqP8r+tAhUyou16yDK/0 2G9Mms/85u89MQ== =UcMe -----END PGP SIGNATURE----- Merge tag 'x86-entry-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 entry update from Thomas Gleixner: "A single update for the x86 entry code: The current CR3 handling for kernel page table isolation in the paranoid return paths which are relevant for #NMI, #MCE, #VC, #DB and #DF is unconditionally writing CR3 with the value retrieved on exception entry. In the vast majority of cases when returning to the kernel this is a pointless exercise because CR3 was not modified on exception entry. The only situation where this is necessary is when the exception interrupts a entry from user before switching to kernel CR3 or interrupts an exit to user after switching back to user CR3. As CR3 writes can be expensive on some systems this becomes measurable overhead with high frequency #NMIs such as perf. Avoid this overhead by checking the CR3 value, which was saved on entry, and write it back to CR3 only when it is a user CR3" * tag 'x86-entry-2024-03-11' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/entry: Avoid redundant CR3 write on paranoid returns
This commit is contained in:
commit
86833aec44
@ -244,17 +244,19 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
.Ldone_\@:
|
||||
.endm
|
||||
|
||||
.macro RESTORE_CR3 scratch_reg:req save_reg:req
|
||||
/* Restore CR3 from a kernel context. May restore a user CR3 value. */
|
||||
.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
|
||||
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
|
||||
|
||||
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
|
||||
|
||||
/*
|
||||
* KERNEL pages can always resume with NOFLUSH as we do
|
||||
* explicit flushes.
|
||||
* If CR3 contained the kernel page tables at the paranoid exception
|
||||
* entry, then there is nothing to restore as CR3 is not modified while
|
||||
* handling the exception.
|
||||
*/
|
||||
bt $PTI_USER_PGTABLE_BIT, \save_reg
|
||||
jnc .Lnoflush_\@
|
||||
jnc .Lend_\@
|
||||
|
||||
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
|
||||
|
||||
/*
|
||||
* Check if there's a pending flush for the user ASID we're
|
||||
@ -262,20 +264,12 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
*/
|
||||
movq \save_reg, \scratch_reg
|
||||
andq $(0x7FF), \scratch_reg
|
||||
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||
jnc .Lnoflush_\@
|
||||
|
||||
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||
jmp .Lwrcr3_\@
|
||||
jc .Lwrcr3_\@
|
||||
|
||||
.Lnoflush_\@:
|
||||
SET_NOFLUSH_BIT \save_reg
|
||||
|
||||
.Lwrcr3_\@:
|
||||
/*
|
||||
* The CR3 write could be avoided when not changing its value,
|
||||
* but would require a CR3 read *and* a scratch register.
|
||||
*/
|
||||
movq \save_reg, %cr3
|
||||
.Lend_\@:
|
||||
.endm
|
||||
@ -290,7 +284,7 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
.endm
|
||||
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
|
||||
.endm
|
||||
.macro RESTORE_CR3 scratch_reg:req save_reg:req
|
||||
.macro PARANOID_RESTORE_CR3 scratch_reg:req save_reg:req
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
@ -970,14 +970,14 @@ SYM_CODE_START_LOCAL(paranoid_exit)
|
||||
IBRS_EXIT save_reg=%r15
|
||||
|
||||
/*
|
||||
* The order of operations is important. RESTORE_CR3 requires
|
||||
* The order of operations is important. PARANOID_RESTORE_CR3 requires
|
||||
* kernel GSBASE.
|
||||
*
|
||||
* NB to anyone to try to optimize this code: this code does
|
||||
* not execute at all for exceptions from user mode. Those
|
||||
* exceptions go through error_return instead.
|
||||
*/
|
||||
RESTORE_CR3 scratch_reg=%rax save_reg=%r14
|
||||
PARANOID_RESTORE_CR3 scratch_reg=%rax save_reg=%r14
|
||||
|
||||
/* Handle the three GSBASE cases */
|
||||
ALTERNATIVE "jmp .Lparanoid_exit_checkgs", "", X86_FEATURE_FSGSBASE
|
||||
@ -1406,8 +1406,7 @@ end_repeat_nmi:
|
||||
/* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
|
||||
IBRS_EXIT save_reg=%r15
|
||||
|
||||
/* Always restore stashed CR3 value (see paranoid_entry) */
|
||||
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
|
||||
PARANOID_RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
|
||||
|
||||
/*
|
||||
* The above invocation of paranoid_entry stored the GSBASE
|
||||
|
Loading…
x
Reference in New Issue
Block a user