Fix kCFI/FineIBT weaknesses
The primary bug Alyssa noticed was that with FineIBT enabled function prologues have a spurious ENDBR instruction: __cfi_foo: endbr64 subl $hash, %r10d jz 1f ud2 nop 1: foo: endbr64 <--- *sadface* This means that any indirect call that fails to target the __cfi symbol and instead targets (the regular old) foo+0, will succeed due to that second ENDBR. Fixing this lead to the discovery of a single indirect call that was still doing this: ret_from_fork(), since that's an assembly stub the compmiler would not generate the proper kCFI indirect call magic and it would not get patched. Brian came up with the most comprehensive fix -- convert the thing to C with only a very thin asm wrapper. This ensures the kernel thread boostrap is a proper kCFI call. While discussing all this, Kees noted that kCFI hashes could/should be poisoned to seal all functions whose address is never taken, further limiting the valid kCFI targets -- much like we already do for IBT. So what was a 'simple' observation and fix cascaded into a bunch of inter-related CFI infrastructure fixes. -----BEGIN PGP SIGNATURE----- iQJJBAABCgAzFiEEv3OU3/byMaA0LqWJdkfhpEvA5LoFAmSxr64VHHBldGVyekBp bmZyYWRlYWQub3JnAAoJEHZH4aRLwOS6L7kQAIjDWbxqVtmiBiz+IBcWcsxt7BXX pRBaSe/eBp3KLhqgzYUY0mXIi0ua7y3CBtW4SdQUSPsAKtCgBUuq2JjQWToRghjN 4ndCky4oxb9z8ADr/R/qfU8ZpSOwoX3kgBHqyjcQ0fQsg/DFKs3sWKqluwT0PtvU vLYAw2QKSv56NG/u3CujWPdcIWgzJ+M3214xuqIWCTwEcqdP+xkXmQstkXkyPQ6d XE0iG/wo9uiX4icfsRVp8JL0TkzNqGJfgr9Mv1rBKT4wbT64zKI6RyMJVlUS0yrk 1jeDgNbVfx4ZpvtHmTsQn1jogWI3pqGkqoPwHqJSFg42Eer5OSodH/uVd3HK/0tD 1nlhCfue6zc4smu480064s3fWAE7kC6ySdmijQXOJo3YWVGdagxVp/CSE4Ek0TFq y+CltNEA6bthKImWg8GFWxS8bMnuZv2joJ8yhgfpnG5sppVOYs2HJ3ipIks9sZjO o65auDeOkGg1+NhgDx+2uay6/fbxTNjbAyjV4HttkN70SO5kTTT4zWyh2PLwXaTy wv0B4i0laxTRU7boIA4nFJAKz5xKfyh9e2idxbmPlrV5FY4mEPA2oLeWsn8cS4VG 0SWJ30ky7C4r7VWd9DWhGcCRcrlCvCM8LdjwzImZHXRQ2KweEuGMmrXYtHCrTRZn IMijS/9q653h9ws7 =RhPI -----END PGP SIGNATURE----- Merge tag 'x86_urgent_for_6.5_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 CFI fixes from Peter Zijlstra: "Fix kCFI/FineIBT weaknesses The primary bug Alyssa noticed was that with FineIBT enabled function prologues have a spurious ENDBR instruction: __cfi_foo: endbr64 subl $hash, %r10d jz 1f ud2 nop 1: foo: endbr64 <--- *sadface* This means that any indirect call that fails to target the __cfi symbol and instead targets (the regular old) foo+0, will succeed due to that second ENDBR. Fixing this led to the discovery of a single indirect call that was still doing this: ret_from_fork(). Since that's an assembly stub the compiler would not generate the proper kCFI indirect call magic and it would not get patched. Brian came up with the most comprehensive fix -- convert the thing to C with only a very thin asm wrapper. This ensures the kernel thread boostrap is a proper kCFI call. While discussing all this, Kees noted that kCFI hashes could/should be poisoned to seal all functions whose address is never taken, further limiting the valid kCFI targets -- much like we already do for IBT. So what was a 'simple' observation and fix cascaded into a bunch of inter-related CFI infrastructure fixes" * tag 'x86_urgent_for_6.5_rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/cfi: Only define poison_cfi() if CONFIG_X86_KERNEL_IBT=y x86/fineibt: Poison ENDBR at +0 x86: Rewrite ret_from_fork() in C x86/32: Remove schedule_tail_wrapper() x86/cfi: Extend ENDBR sealing to kCFI x86/alternative: Rename apply_ibt_endbr() x86/cfi: Extend {JMP,CAKK}_NOSPEC comment
This commit is contained in:
commit
b6e6cc1f78
@ -437,7 +437,7 @@ void __init arch_cpu_finalize_init(void)
|
||||
os_check_bugs();
|
||||
}
|
||||
|
||||
void apply_ibt_endbr(s32 *start, s32 *end)
|
||||
void apply_seal_endbr(s32 *start, s32 *end)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -719,26 +719,6 @@ SYM_CODE_START(__switch_to_asm)
|
||||
SYM_CODE_END(__switch_to_asm)
|
||||
.popsection
|
||||
|
||||
/*
|
||||
* The unwinder expects the last frame on the stack to always be at the same
|
||||
* offset from the end of the page, which allows it to validate the stack.
|
||||
* Calling schedule_tail() directly would break that convention because its an
|
||||
* asmlinkage function so its argument has to be pushed on the stack. This
|
||||
* wrapper creates a proper "end of stack" frame header before the call.
|
||||
*/
|
||||
.pushsection .text, "ax"
|
||||
SYM_FUNC_START(schedule_tail_wrapper)
|
||||
FRAME_BEGIN
|
||||
|
||||
pushl %eax
|
||||
call schedule_tail
|
||||
popl %eax
|
||||
|
||||
FRAME_END
|
||||
RET
|
||||
SYM_FUNC_END(schedule_tail_wrapper)
|
||||
.popsection
|
||||
|
||||
/*
|
||||
* A newly forked process directly context switches into this address.
|
||||
*
|
||||
@ -747,29 +727,22 @@ SYM_FUNC_END(schedule_tail_wrapper)
|
||||
* edi: kernel thread arg
|
||||
*/
|
||||
.pushsection .text, "ax"
|
||||
SYM_CODE_START(ret_from_fork)
|
||||
call schedule_tail_wrapper
|
||||
SYM_CODE_START(ret_from_fork_asm)
|
||||
movl %esp, %edx /* regs */
|
||||
|
||||
testl %ebx, %ebx
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
/* return address for the stack unwinder */
|
||||
pushl $.Lsyscall_32_done
|
||||
|
||||
2:
|
||||
/* When we fork, we trace the syscall return in the child, too. */
|
||||
movl %esp, %eax
|
||||
call syscall_exit_to_user_mode
|
||||
jmp .Lsyscall_32_done
|
||||
FRAME_BEGIN
|
||||
/* prev already in EAX */
|
||||
movl %ebx, %ecx /* fn */
|
||||
pushl %edi /* fn_arg */
|
||||
call ret_from_fork
|
||||
addl $4, %esp
|
||||
FRAME_END
|
||||
|
||||
/* kernel thread */
|
||||
1: movl %edi, %eax
|
||||
CALL_NOSPEC ebx
|
||||
/*
|
||||
* A kernel thread is allowed to return here after successfully
|
||||
* calling kernel_execve(). Exit to userspace to complete the execve()
|
||||
* syscall.
|
||||
*/
|
||||
movl $0, PT_EAX(%esp)
|
||||
jmp 2b
|
||||
SYM_CODE_END(ret_from_fork)
|
||||
RET
|
||||
SYM_CODE_END(ret_from_fork_asm)
|
||||
.popsection
|
||||
|
||||
SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
|
||||
|
@ -284,36 +284,19 @@ SYM_FUNC_END(__switch_to_asm)
|
||||
* r12: kernel thread arg
|
||||
*/
|
||||
.pushsection .text, "ax"
|
||||
__FUNC_ALIGN
|
||||
SYM_CODE_START_NOALIGN(ret_from_fork)
|
||||
UNWIND_HINT_END_OF_STACK
|
||||
SYM_CODE_START(ret_from_fork_asm)
|
||||
UNWIND_HINT_REGS
|
||||
ANNOTATE_NOENDBR // copy_thread
|
||||
CALL_DEPTH_ACCOUNT
|
||||
movq %rax, %rdi
|
||||
call schedule_tail /* rdi: 'prev' task parameter */
|
||||
|
||||
testq %rbx, %rbx /* from kernel_thread? */
|
||||
jnz 1f /* kernel threads are uncommon */
|
||||
movq %rax, %rdi /* prev */
|
||||
movq %rsp, %rsi /* regs */
|
||||
movq %rbx, %rdx /* fn */
|
||||
movq %r12, %rcx /* fn_arg */
|
||||
call ret_from_fork
|
||||
|
||||
2:
|
||||
UNWIND_HINT_REGS
|
||||
movq %rsp, %rdi
|
||||
call syscall_exit_to_user_mode /* returns with IRQs disabled */
|
||||
jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
1:
|
||||
/* kernel thread */
|
||||
UNWIND_HINT_END_OF_STACK
|
||||
movq %r12, %rdi
|
||||
CALL_NOSPEC rbx
|
||||
/*
|
||||
* A kernel thread is allowed to return here after successfully
|
||||
* calling kernel_execve(). Exit to userspace to complete the execve()
|
||||
* syscall.
|
||||
*/
|
||||
movq $0, RAX(%rsp)
|
||||
jmp 2b
|
||||
SYM_CODE_END(ret_from_fork)
|
||||
SYM_CODE_END(ret_from_fork_asm)
|
||||
.popsection
|
||||
|
||||
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
|
@ -96,7 +96,7 @@ extern void alternative_instructions(void);
|
||||
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
|
||||
extern void apply_retpolines(s32 *start, s32 *end);
|
||||
extern void apply_returns(s32 *start, s32 *end);
|
||||
extern void apply_ibt_endbr(s32 *start, s32 *end);
|
||||
extern void apply_seal_endbr(s32 *start, s32 *end);
|
||||
extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
|
||||
s32 *start_cfi, s32 *end_cfi);
|
||||
|
||||
|
@ -34,7 +34,7 @@
|
||||
/*
|
||||
* Create a dummy function pointer reference to prevent objtool from marking
|
||||
* the function as needing to be "sealed" (i.e. ENDBR converted to NOP by
|
||||
* apply_ibt_endbr()).
|
||||
* apply_seal_endbr()).
|
||||
*/
|
||||
#define IBT_NOSEAL(fname) \
|
||||
".pushsection .discard.ibt_endbr_noseal\n\t" \
|
||||
|
@ -234,6 +234,10 @@
|
||||
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
|
||||
* indirect jmp/call which may be susceptible to the Spectre variant 2
|
||||
* attack.
|
||||
*
|
||||
* NOTE: these do not take kCFI into account and are thus not comparable to C
|
||||
* indirect calls, take care when using. The target of these should be an ENDBR
|
||||
* instruction irrespective of kCFI.
|
||||
*/
|
||||
.macro JMP_NOSPEC reg:req
|
||||
#ifdef CONFIG_RETPOLINE
|
||||
|
@ -12,7 +12,9 @@ struct task_struct *__switch_to_asm(struct task_struct *prev,
|
||||
__visible struct task_struct *__switch_to(struct task_struct *prev,
|
||||
struct task_struct *next);
|
||||
|
||||
asmlinkage void ret_from_fork(void);
|
||||
asmlinkage void ret_from_fork_asm(void);
|
||||
__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
|
||||
int (*fn)(void *), void *fn_arg);
|
||||
|
||||
/*
|
||||
* This is the structure pointed to by thread.sp for an inactive task. The
|
||||
|
@ -778,6 +778,8 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
|
||||
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
|
||||
static void poison_cfi(void *addr);
|
||||
|
||||
static void __init_or_module poison_endbr(void *addr, bool warn)
|
||||
{
|
||||
u32 endbr, poison = gen_endbr_poison();
|
||||
@ -802,8 +804,11 @@ static void __init_or_module poison_endbr(void *addr, bool warn)
|
||||
|
||||
/*
|
||||
* Generated by: objtool --ibt
|
||||
*
|
||||
* Seal the functions for indirect calls by clobbering the ENDBR instructions
|
||||
* and the kCFI hash value.
|
||||
*/
|
||||
void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
|
||||
void __init_or_module noinline apply_seal_endbr(s32 *start, s32 *end)
|
||||
{
|
||||
s32 *s;
|
||||
|
||||
@ -812,13 +817,13 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
|
||||
|
||||
poison_endbr(addr, true);
|
||||
if (IS_ENABLED(CONFIG_FINEIBT))
|
||||
poison_endbr(addr - 16, false);
|
||||
poison_cfi(addr - 16);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { }
|
||||
void __init_or_module apply_seal_endbr(s32 *start, s32 *end) { }
|
||||
|
||||
#endif /* CONFIG_X86_KERNEL_IBT */
|
||||
|
||||
@ -1063,6 +1068,17 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cfi_rewrite_endbr(s32 *start, s32 *end)
|
||||
{
|
||||
s32 *s;
|
||||
|
||||
for (s = start; s < end; s++) {
|
||||
void *addr = (void *)s + *s;
|
||||
|
||||
poison_endbr(addr+16, false);
|
||||
}
|
||||
}
|
||||
|
||||
/* .retpoline_sites */
|
||||
static int cfi_rand_callers(s32 *start, s32 *end)
|
||||
{
|
||||
@ -1157,14 +1173,19 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
|
||||
return;
|
||||
|
||||
case CFI_FINEIBT:
|
||||
/* place the FineIBT preamble at func()-16 */
|
||||
ret = cfi_rewrite_preamble(start_cfi, end_cfi);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* rewrite the callers to target func()-16 */
|
||||
ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
|
||||
if (ret)
|
||||
goto err;
|
||||
|
||||
/* now that nobody targets func()+0, remove ENDBR there */
|
||||
cfi_rewrite_endbr(start_cfi, end_cfi);
|
||||
|
||||
if (builtin)
|
||||
pr_info("Using FineIBT CFI\n");
|
||||
return;
|
||||
@ -1177,6 +1198,41 @@ err:
|
||||
pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
|
||||
}
|
||||
|
||||
static inline void poison_hash(void *addr)
|
||||
{
|
||||
*(u32 *)addr = 0;
|
||||
}
|
||||
|
||||
static void poison_cfi(void *addr)
|
||||
{
|
||||
switch (cfi_mode) {
|
||||
case CFI_FINEIBT:
|
||||
/*
|
||||
* __cfi_\func:
|
||||
* osp nopl (%rax)
|
||||
* subl $0, %r10d
|
||||
* jz 1f
|
||||
* ud2
|
||||
* 1: nop
|
||||
*/
|
||||
poison_endbr(addr, false);
|
||||
poison_hash(addr + fineibt_preamble_hash);
|
||||
break;
|
||||
|
||||
case CFI_KCFI:
|
||||
/*
|
||||
* __cfi_\func:
|
||||
* movl $0, %eax
|
||||
* .skip 11, 0x90
|
||||
*/
|
||||
poison_hash(addr + 1);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
|
||||
@ -1184,6 +1240,10 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_KERNEL_IBT
|
||||
static void poison_cfi(void *addr) { }
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
|
||||
@ -1565,7 +1625,10 @@ void __init alternative_instructions(void)
|
||||
*/
|
||||
callthunks_patch_builtin_calls();
|
||||
|
||||
apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
|
||||
/*
|
||||
* Seal all functions that do not have their address taken.
|
||||
*/
|
||||
apply_seal_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
/* Patch to UP if other cpus not imminent. */
|
||||
|
@ -358,7 +358,7 @@ int module_finalize(const Elf_Ehdr *hdr,
|
||||
}
|
||||
if (ibt_endbr) {
|
||||
void *iseg = (void *)ibt_endbr->sh_addr;
|
||||
apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size);
|
||||
apply_seal_endbr(iseg, iseg + ibt_endbr->sh_size);
|
||||
}
|
||||
if (locks) {
|
||||
void *lseg = (void *)locks->sh_addr;
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include <linux/static_call.h>
|
||||
#include <trace/events/power.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/entry-common.h>
|
||||
#include <asm/cpu.h>
|
||||
#include <asm/apic.h>
|
||||
#include <linux/uaccess.h>
|
||||
@ -134,6 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls)
|
||||
return do_set_thread_area_64(p, ARCH_SET_FS, tls);
|
||||
}
|
||||
|
||||
__visible void ret_from_fork(struct task_struct *prev, struct pt_regs *regs,
|
||||
int (*fn)(void *), void *fn_arg)
|
||||
{
|
||||
schedule_tail(prev);
|
||||
|
||||
/* Is this a kernel thread? */
|
||||
if (unlikely(fn)) {
|
||||
fn(fn_arg);
|
||||
/*
|
||||
* A kernel thread is allowed to return here after successfully
|
||||
* calling kernel_execve(). Exit to userspace to complete the
|
||||
* execve() syscall.
|
||||
*/
|
||||
regs->ax = 0;
|
||||
}
|
||||
|
||||
syscall_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
{
|
||||
unsigned long clone_flags = args->flags;
|
||||
@ -149,7 +169,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
|
||||
frame = &fork_frame->frame;
|
||||
|
||||
frame->bp = encode_frame_pointer(childregs);
|
||||
frame->ret_addr = (unsigned long) ret_from_fork;
|
||||
frame->ret_addr = (unsigned long) ret_from_fork_asm;
|
||||
p->thread.sp = (unsigned long) fork_frame;
|
||||
p->thread.io_bitmap = NULL;
|
||||
p->thread.iopl_warn = 0;
|
||||
|
Loading…
x
Reference in New Issue
Block a user