bpf, x86: Fix PROBE_MEM runtime load check

When a load is marked PROBE_MEM - e.g. due to PTR_UNTRUSTED access - the
address being loaded from is not necessarily valid. The BPF jit sets up
exception handlers for each such load which catch page faults and 0 out
the destination register.

If the address for the load is outside kernel address space, the load
will escape the exception handling and crash the kernel. To prevent this
from happening, the emits some instruction to verify that addr is > end
of userspace addresses.

x86 has a legacy vsyscall ABI where a page at address 0xffffffffff600000
is mapped with user accessible permissions. The addresses in this page
are considered userspace addresses by the fault handler. Therefore, a
BPF program accessing this page will crash the kernel.

This patch fixes the runtime checks to also check that the PROBE_MEM
address is below VSYSCALL_ADDR.

Example BPF program:

 SEC("fentry/tcp_v4_connect")
 int BPF_PROG(fentry_tcp_v4_connect, struct sock *sk)
 {
	*(volatile unsigned long *)&sk->sk_tsq_flags;
	return 0;
 }

BPF Assembly:

 0: (79) r1 = *(u64 *)(r1 +0)
 1: (79) r1 = *(u64 *)(r1 +344)
 2: (b7) r0 = 0
 3: (95) exit

			       x86-64 JIT
			       ==========

            BEFORE                                    AFTER
	    ------                                    -----

 0:   nopl   0x0(%rax,%rax,1)             0:   nopl   0x0(%rax,%rax,1)
 5:   xchg   %ax,%ax                      5:   xchg   %ax,%ax
 7:   push   %rbp                         7:   push   %rbp
 8:   mov    %rsp,%rbp                    8:   mov    %rsp,%rbp
 b:   mov    0x0(%rdi),%rdi               b:   mov    0x0(%rdi),%rdi
-------------------------------------------------------------------------------
 f:   movabs $0x100000000000000,%r11      f:   movabs $0xffffffffff600000,%r10
19:   add    $0x2a0,%rdi                 19:   mov    %rdi,%r11
20:   cmp    %r11,%rdi                   1c:   add    $0x2a0,%r11
23:   jae    0x0000000000000029          23:   sub    %r10,%r11
25:   xor    %edi,%edi                   26:   movabs $0x100000000a00000,%r10
27:   jmp    0x000000000000002d          30:   cmp    %r10,%r11
29:   mov    0x0(%rdi),%rdi              33:   ja     0x0000000000000039
--------------------------------\        35:   xor    %edi,%edi
2d:   xor    %eax,%eax           \       37:   jmp    0x0000000000000040
2f:   leave                       \      39:   mov    0x2a0(%rdi),%rdi
30:   ret                          \--------------------------------------------
                                         40:   xor    %eax,%eax
                                         42:   leave
                                         43:   ret

Signed-off-by: Puranjay Mohan <puranjay@kernel.org>
Link: https://lore.kernel.org/r/20240424100210.11982-3-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Puranjay Mohan 2024-04-24 10:02:09 +00:00 committed by Alexei Starovoitov
parent 66e13b615a
commit b599d7d26d

View File

@ -1807,36 +1807,41 @@ populate_extable:
if (BPF_MODE(insn->code) == BPF_PROBE_MEM || if (BPF_MODE(insn->code) == BPF_PROBE_MEM ||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { BPF_MODE(insn->code) == BPF_PROBE_MEMSX) {
/* Conservatively check that src_reg + insn->off is a kernel address: /* Conservatively check that src_reg + insn->off is a kernel address:
* src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE * src_reg + insn->off > TASK_SIZE_MAX + PAGE_SIZE
* src_reg is used as scratch for src_reg += insn->off and restored * and
* after emit_ldx if necessary * src_reg + insn->off < VSYSCALL_ADDR
*/ */
u64 limit = TASK_SIZE_MAX + PAGE_SIZE; u64 limit = TASK_SIZE_MAX + PAGE_SIZE - VSYSCALL_ADDR;
u8 *end_of_jmp; u8 *end_of_jmp;
/* At end of these emitted checks, insn->off will have been added /* movabsq r10, VSYSCALL_ADDR */
* to src_reg, so no need to do relative load with insn->off offset emit_mov_imm64(&prog, BPF_REG_AX, (long)VSYSCALL_ADDR >> 32,
*/ (u32)(long)VSYSCALL_ADDR);
insn_off = 0;
/* movabsq r11, limit */ /* mov src_reg, r11 */
EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); EMIT_mov(AUX_REG, src_reg);
EMIT((u32)limit, 4);
EMIT(limit >> 32, 4);
if (insn->off) { if (insn->off) {
/* add src_reg, insn->off */ /* add r11, insn->off */
maybe_emit_1mod(&prog, src_reg, true); maybe_emit_1mod(&prog, AUX_REG, true);
EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); EMIT2_off32(0x81, add_1reg(0xC0, AUX_REG), insn->off);
} }
/* cmp src_reg, r11 */ /* sub r11, r10 */
maybe_emit_mod(&prog, src_reg, AUX_REG, true); maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); EMIT2(0x29, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
/* if unsigned '>=', goto load */ /* movabsq r10, limit */
EMIT2(X86_JAE, 0); emit_mov_imm64(&prog, BPF_REG_AX, (long)limit >> 32,
(u32)(long)limit);
/* cmp r10, r11 */
maybe_emit_mod(&prog, AUX_REG, BPF_REG_AX, true);
EMIT2(0x39, add_2reg(0xC0, AUX_REG, BPF_REG_AX));
/* if unsigned '>', goto load */
EMIT2(X86_JA, 0);
end_of_jmp = prog; end_of_jmp = prog;
/* xor dst_reg, dst_reg */ /* xor dst_reg, dst_reg */
@ -1862,18 +1867,6 @@ populate_extable:
/* populate jmp_offset for JMP above */ /* populate jmp_offset for JMP above */
start_of_ldx[-1] = prog - start_of_ldx; start_of_ldx[-1] = prog - start_of_ldx;
if (insn->off && src_reg != dst_reg) {
/* sub src_reg, insn->off
* Restore src_reg after "add src_reg, insn->off" in prev
* if statement. But if src_reg == dst_reg, emit_ldx
* above already clobbered src_reg, so no need to restore.
* If add src_reg, insn->off was unnecessary, no need to
* restore either.
*/
maybe_emit_1mod(&prog, src_reg, true);
EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off);
}
if (!bpf_prog->aux->extable) if (!bpf_prog->aux->extable)
break; break;