bpf, x64: remove ld_abs/ld_ind
Since LD_ABS/LD_IND instructions are now removed from the core and reimplemented through a combination of inlined BPF instructions and a slow-path helper, we can get rid of the complexity from x64 JIT. Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Alexei Starovoitov <ast@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
committed by
Alexei Starovoitov
parent
4e1ec56cdc
commit
e782bdcf58
@ -17,15 +17,6 @@
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/nospec-branch.h>
|
||||
|
||||
/*
|
||||
* Assembly code in arch/x86/net/bpf_jit.S
|
||||
*/
|
||||
extern u8 sk_load_word[], sk_load_half[], sk_load_byte[];
|
||||
extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[];
|
||||
extern u8 sk_load_byte_positive_offset[];
|
||||
extern u8 sk_load_word_negative_offset[], sk_load_half_negative_offset[];
|
||||
extern u8 sk_load_byte_negative_offset[];
|
||||
|
||||
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
|
||||
{
|
||||
if (len == 1)
|
||||
@ -107,9 +98,6 @@ static int bpf_size_to_x86_bytes(int bpf_size)
|
||||
#define X86_JLE 0x7E
|
||||
#define X86_JG 0x7F
|
||||
|
||||
#define CHOOSE_LOAD_FUNC(K, func) \
|
||||
((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
|
||||
|
||||
/* Pick a register outside of BPF range for JIT internal work */
|
||||
#define AUX_REG (MAX_BPF_JIT_REG + 1)
|
||||
|
||||
@ -120,8 +108,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
|
||||
* register in load/store instructions, it always needs an
|
||||
* extra byte of encoding and is callee saved.
|
||||
*
|
||||
* R9 caches skb->len - skb->data_len
|
||||
* R10 caches skb->data, and used for blinding (if enabled)
|
||||
* Also x86-64 register R9 is unused. x86-64 register R10 is
|
||||
* used for blinding (if enabled).
|
||||
*/
|
||||
static const int reg2hex[] = {
|
||||
[BPF_REG_0] = 0, /* RAX */
|
||||
@ -196,19 +184,15 @@ static void jit_fill_hole(void *area, unsigned int size)
|
||||
|
||||
struct jit_context {
|
||||
int cleanup_addr; /* Epilogue code offset */
|
||||
bool seen_ld_abs;
|
||||
bool seen_ax_reg;
|
||||
};
|
||||
|
||||
/* Maximum number of bytes emitted while JITing one eBPF insn */
|
||||
#define BPF_MAX_INSN_SIZE 128
|
||||
#define BPF_INSN_SAFETY 64
|
||||
|
||||
#define AUX_STACK_SPACE \
|
||||
(32 /* Space for RBX, R13, R14, R15 */ + \
|
||||
8 /* Space for skb_copy_bits() buffer */)
|
||||
#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */
|
||||
|
||||
#define PROLOGUE_SIZE 37
|
||||
#define PROLOGUE_SIZE 37
|
||||
|
||||
/*
|
||||
* Emit x86-64 prologue code for BPF program and check its size.
|
||||
@ -232,20 +216,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
|
||||
/* sub rbp, AUX_STACK_SPACE */
|
||||
EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
|
||||
|
||||
/* All classic BPF filters use R6(rbx) save it */
|
||||
|
||||
/* mov qword ptr [rbp+0],rbx */
|
||||
EMIT4(0x48, 0x89, 0x5D, 0);
|
||||
|
||||
/*
|
||||
* bpf_convert_filter() maps classic BPF register X to R7 and uses R8
|
||||
* as temporary, so all tcpdump filters need to spill/fill R7(R13) and
|
||||
* R8(R14). R9(R15) spill could be made conditional, but there is only
|
||||
* one 'bpf_error' return path out of helper functions inside bpf_jit.S
|
||||
* The overhead of extra spill is negligible for any filter other
|
||||
* than synthetic ones. Therefore not worth adding complexity.
|
||||
*/
|
||||
|
||||
/* mov qword ptr [rbp+8],r13 */
|
||||
EMIT4(0x4C, 0x89, 0x6D, 8);
|
||||
/* mov qword ptr [rbp+16],r14 */
|
||||
@ -353,27 +325,6 @@ static void emit_bpf_tail_call(u8 **pprog)
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
|
||||
static void emit_load_skb_data_hlen(u8 **pprog)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
int cnt = 0;
|
||||
|
||||
/*
|
||||
* r9d = skb->len - skb->data_len (headlen)
|
||||
* r10 = skb->data
|
||||
*/
|
||||
/* mov %r9d, off32(%rdi) */
|
||||
EMIT3_off32(0x44, 0x8b, 0x8f, offsetof(struct sk_buff, len));
|
||||
|
||||
/* sub %r9d, off32(%rdi) */
|
||||
EMIT3_off32(0x44, 0x2b, 0x8f, offsetof(struct sk_buff, data_len));
|
||||
|
||||
/* mov %r10, off32(%rdi) */
|
||||
EMIT3_off32(0x4c, 0x8b, 0x97, offsetof(struct sk_buff, data));
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
|
||||
u32 dst_reg, const u32 imm32)
|
||||
{
|
||||
@ -462,8 +413,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
||||
{
|
||||
struct bpf_insn *insn = bpf_prog->insnsi;
|
||||
int insn_cnt = bpf_prog->len;
|
||||
bool seen_ld_abs = ctx->seen_ld_abs | (oldproglen == 0);
|
||||
bool seen_ax_reg = ctx->seen_ax_reg | (oldproglen == 0);
|
||||
bool seen_exit = false;
|
||||
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
|
||||
int i, cnt = 0;
|
||||
@ -473,9 +422,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
||||
emit_prologue(&prog, bpf_prog->aux->stack_depth,
|
||||
bpf_prog_was_classic(bpf_prog));
|
||||
|
||||
if (seen_ld_abs)
|
||||
emit_load_skb_data_hlen(&prog);
|
||||
|
||||
for (i = 0; i < insn_cnt; i++, insn++) {
|
||||
const s32 imm32 = insn->imm;
|
||||
u32 dst_reg = insn->dst_reg;
|
||||
@ -483,13 +429,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
|
||||
u8 b2 = 0, b3 = 0;
|
||||
s64 jmp_offset;
|
||||
u8 jmp_cond;
|
||||
bool reload_skb_data;
|
||||
int ilen;
|
||||
u8 *func;
|
||||
|
||||
if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
|
||||
ctx->seen_ax_reg = seen_ax_reg = true;
|
||||
|
||||
switch (insn->code) {
|
||||
/* ALU */
|
||||
case BPF_ALU | BPF_ADD | BPF_X:
|
||||
@ -916,36 +858,12 @@ xadd: if (is_imm8(insn->off))
|
||||
case BPF_JMP | BPF_CALL:
|
||||
func = (u8 *) __bpf_call_base + imm32;
|
||||
jmp_offset = func - (image + addrs[i]);
|
||||
if (seen_ld_abs) {
|
||||
reload_skb_data = bpf_helper_changes_pkt_data(func);
|
||||
if (reload_skb_data) {
|
||||
EMIT1(0x57); /* push %rdi */
|
||||
jmp_offset += 22; /* pop, mov, sub, mov */
|
||||
} else {
|
||||
EMIT2(0x41, 0x52); /* push %r10 */
|
||||
EMIT2(0x41, 0x51); /* push %r9 */
|
||||
/*
|
||||
* We need to adjust jmp offset, since
|
||||
* pop %r9, pop %r10 take 4 bytes after call insn
|
||||
*/
|
||||
jmp_offset += 4;
|
||||
}
|
||||
}
|
||||
if (!imm32 || !is_simm32(jmp_offset)) {
|
||||
pr_err("unsupported BPF func %d addr %p image %p\n",
|
||||
imm32, func, image);
|
||||
return -EINVAL;
|
||||
}
|
||||
EMIT1_off32(0xE8, jmp_offset);
|
||||
if (seen_ld_abs) {
|
||||
if (reload_skb_data) {
|
||||
EMIT1(0x5F); /* pop %rdi */
|
||||
emit_load_skb_data_hlen(&prog);
|
||||
} else {
|
||||
EMIT2(0x41, 0x59); /* pop %r9 */
|
||||
EMIT2(0x41, 0x5A); /* pop %r10 */
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case BPF_JMP | BPF_TAIL_CALL:
|
||||
@ -1080,60 +998,6 @@ emit_jmp:
|
||||
}
|
||||
break;
|
||||
|
||||
case BPF_LD | BPF_IND | BPF_W:
|
||||
func = sk_load_word;
|
||||
goto common_load;
|
||||
case BPF_LD | BPF_ABS | BPF_W:
|
||||
func = CHOOSE_LOAD_FUNC(imm32, sk_load_word);
|
||||
common_load:
|
||||
ctx->seen_ld_abs = seen_ld_abs = true;
|
||||
jmp_offset = func - (image + addrs[i]);
|
||||
if (!func || !is_simm32(jmp_offset)) {
|
||||
pr_err("unsupported BPF func %d addr %p image %p\n",
|
||||
imm32, func, image);
|
||||
return -EINVAL;
|
||||
}
|
||||
if (BPF_MODE(insn->code) == BPF_ABS) {
|
||||
/* mov %esi, imm32 */
|
||||
EMIT1_off32(0xBE, imm32);
|
||||
} else {
|
||||
/* mov %rsi, src_reg */
|
||||
EMIT_mov(BPF_REG_2, src_reg);
|
||||
if (imm32) {
|
||||
if (is_imm8(imm32))
|
||||
/* add %esi, imm8 */
|
||||
EMIT3(0x83, 0xC6, imm32);
|
||||
else
|
||||
/* add %esi, imm32 */
|
||||
EMIT2_off32(0x81, 0xC6, imm32);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* skb pointer is in R6 (%rbx), it will be copied into
|
||||
* %rdi if skb_copy_bits() call is necessary.
|
||||
* sk_load_* helpers also use %r10 and %r9d.
|
||||
* See bpf_jit.S
|
||||
*/
|
||||
if (seen_ax_reg)
|
||||
/* r10 = skb->data, mov %r10, off32(%rbx) */
|
||||
EMIT3_off32(0x4c, 0x8b, 0x93,
|
||||
offsetof(struct sk_buff, data));
|
||||
EMIT1_off32(0xE8, jmp_offset); /* call */
|
||||
break;
|
||||
|
||||
case BPF_LD | BPF_IND | BPF_H:
|
||||
func = sk_load_half;
|
||||
goto common_load;
|
||||
case BPF_LD | BPF_ABS | BPF_H:
|
||||
func = CHOOSE_LOAD_FUNC(imm32, sk_load_half);
|
||||
goto common_load;
|
||||
case BPF_LD | BPF_IND | BPF_B:
|
||||
func = sk_load_byte;
|
||||
goto common_load;
|
||||
case BPF_LD | BPF_ABS | BPF_B:
|
||||
func = CHOOSE_LOAD_FUNC(imm32, sk_load_byte);
|
||||
goto common_load;
|
||||
|
||||
case BPF_JMP | BPF_EXIT:
|
||||
if (seen_exit) {
|
||||
jmp_offset = ctx->cleanup_addr - addrs[i];
|
||||
|
Reference in New Issue
Block a user