Merge branch 'bpf-introduce-bpf-arena'
Alexei Starovoitov says: ==================== bpf: Introduce BPF arena. From: Alexei Starovoitov <ast@kernel.org> v2->v3: - contains bpf bits only, but cc-ing past audience for continuity - since prerequisite patches landed, this series focus on the main functionality of bpf_arena. - adopted Andrii's approach to support arena in libbpf. - simplified LLVM support. Instead of two instructions it's now only one. - switched to cond_break (instead of open coded iters) in selftests - implemented several follow-ups that will be sent after this set . remember first IP and bpf insn that faulted in arena. report to user space via bpftool . copy paste and tweak glob_match() aka mini-regex as a selftests/bpf - see patch 1 for detailed description of bpf_arena v1->v2: - Improved commit log with reasons for using vmap_pages_range() in arena. Thanks to Johannes - Added support for __arena global variables in bpf programs - Fixed race conditions spotted by Barret - Fixed wrap32 issue spotted by Barret - Fixed bpf_map_mmap_sz() the way Andrii suggested The work on bpf_arena was inspired by Barret's work: https://github.com/google/ghost-userspace/blob/main/lib/queue.bpf.h that implements queues, lists and AVL trees completely as bpf programs using giant bpf array map and integer indices instead of pointers. bpf_arena is a sparse array that allows to use normal C pointers to build such data structures. Last few patches implement page_frag allocator, link list and hash table as bpf programs. v1: bpf programs have multiple options to communicate with user space: - Various ring buffers (perf, ftrace, bpf): The data is streamed unidirectionally from bpf to user space. - Hash map: The bpf program populates elements, and user space consumes them via bpf syscall. - mmap()-ed array map: Libbpf creates an array map that is directly accessed by the bpf program and mmap-ed to user space. It's the fastest way. Its disadvantage is that memory for the whole array is reserved at the start. ==================== Link: https://lore.kernel.org/r/20240308010812.89848-1-alexei.starovoitov@gmail.com Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
This commit is contained in:
commit
08701e306e
@ -113,6 +113,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
|
||||
/* Pick a register outside of BPF range for JIT internal work */
|
||||
#define AUX_REG (MAX_BPF_JIT_REG + 1)
|
||||
#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
|
||||
#define X86_REG_R12 (MAX_BPF_JIT_REG + 3)
|
||||
|
||||
/*
|
||||
* The following table maps BPF registers to x86-64 registers.
|
||||
@ -139,6 +140,7 @@ static const int reg2hex[] = {
|
||||
[BPF_REG_AX] = 2, /* R10 temp register */
|
||||
[AUX_REG] = 3, /* R11 temp register */
|
||||
[X86_REG_R9] = 1, /* R9 register, 6th function argument */
|
||||
[X86_REG_R12] = 4, /* R12 callee saved */
|
||||
};
|
||||
|
||||
static const int reg2pt_regs[] = {
|
||||
@ -167,6 +169,7 @@ static bool is_ereg(u32 reg)
|
||||
BIT(BPF_REG_8) |
|
||||
BIT(BPF_REG_9) |
|
||||
BIT(X86_REG_R9) |
|
||||
BIT(X86_REG_R12) |
|
||||
BIT(BPF_REG_AX));
|
||||
}
|
||||
|
||||
@ -205,6 +208,17 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2)
|
||||
return byte;
|
||||
}
|
||||
|
||||
static u8 add_3mod(u8 byte, u32 r1, u32 r2, u32 index)
|
||||
{
|
||||
if (is_ereg(r1))
|
||||
byte |= 1;
|
||||
if (is_ereg(index))
|
||||
byte |= 2;
|
||||
if (is_ereg(r2))
|
||||
byte |= 4;
|
||||
return byte;
|
||||
}
|
||||
|
||||
/* Encode 'dst_reg' register into x86-64 opcode 'byte' */
|
||||
static u8 add_1reg(u8 byte, u32 dst_reg)
|
||||
{
|
||||
@ -645,6 +659,8 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
|
||||
pop_r12(&prog);
|
||||
} else {
|
||||
pop_callee_regs(&prog, callee_regs_used);
|
||||
if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
|
||||
pop_r12(&prog);
|
||||
}
|
||||
|
||||
EMIT1(0x58); /* pop rax */
|
||||
@ -704,6 +720,8 @@ static void emit_bpf_tail_call_direct(struct bpf_prog *bpf_prog,
|
||||
pop_r12(&prog);
|
||||
} else {
|
||||
pop_callee_regs(&prog, callee_regs_used);
|
||||
if (bpf_arena_get_kern_vm_start(bpf_prog->aux->arena))
|
||||
pop_r12(&prog);
|
||||
}
|
||||
|
||||
EMIT1(0x58); /* pop rax */
|
||||
@ -887,6 +905,18 @@ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off)
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_insn_suffix_SIB(u8 **pprog, u32 ptr_reg, u32 val_reg, u32 index_reg, int off)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
|
||||
if (is_imm8(off)) {
|
||||
EMIT3(add_2reg(0x44, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
|
||||
} else {
|
||||
EMIT2_off32(add_2reg(0x84, BPF_REG_0, val_reg), add_2reg(0, ptr_reg, index_reg) /* SIB */, off);
|
||||
}
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit a REX byte if it will be necessary to address these registers
|
||||
*/
|
||||
@ -968,6 +998,37 @@ static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
|
||||
switch (size) {
|
||||
case BPF_B:
|
||||
/* movzx rax, byte ptr [rax + r12 + off] */
|
||||
EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB6);
|
||||
break;
|
||||
case BPF_H:
|
||||
/* movzx rax, word ptr [rax + r12 + off] */
|
||||
EMIT3(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x0F, 0xB7);
|
||||
break;
|
||||
case BPF_W:
|
||||
/* mov eax, dword ptr [rax + r12 + off] */
|
||||
EMIT2(add_3mod(0x40, src_reg, dst_reg, index_reg), 0x8B);
|
||||
break;
|
||||
case BPF_DW:
|
||||
/* mov rax, qword ptr [rax + r12 + off] */
|
||||
EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x8B);
|
||||
break;
|
||||
}
|
||||
emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
{
|
||||
emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
|
||||
}
|
||||
|
||||
/* STX: *(u8*)(dst_reg + off) = src_reg */
|
||||
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
{
|
||||
@ -1002,6 +1063,71 @@ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
/* STX: *(u8*)(dst_reg + index_reg + off) = src_reg */
|
||||
static void emit_stx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
|
||||
switch (size) {
|
||||
case BPF_B:
|
||||
/* mov byte ptr [rax + r12 + off], al */
|
||||
EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x88);
|
||||
break;
|
||||
case BPF_H:
|
||||
/* mov word ptr [rax + r12 + off], ax */
|
||||
EMIT3(0x66, add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
|
||||
break;
|
||||
case BPF_W:
|
||||
/* mov dword ptr [rax + r12 + 1], eax */
|
||||
EMIT2(add_3mod(0x40, dst_reg, src_reg, index_reg), 0x89);
|
||||
break;
|
||||
case BPF_DW:
|
||||
/* mov qword ptr [rax + r12 + 1], rax */
|
||||
EMIT2(add_3mod(0x48, dst_reg, src_reg, index_reg), 0x89);
|
||||
break;
|
||||
}
|
||||
emit_insn_suffix_SIB(&prog, dst_reg, src_reg, index_reg, off);
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_stx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
|
||||
{
|
||||
emit_stx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
|
||||
}
|
||||
|
||||
/* ST: *(u8*)(dst_reg + index_reg + off) = imm32 */
|
||||
static void emit_st_index(u8 **pprog, u32 size, u32 dst_reg, u32 index_reg, int off, int imm)
|
||||
{
|
||||
u8 *prog = *pprog;
|
||||
|
||||
switch (size) {
|
||||
case BPF_B:
|
||||
/* mov byte ptr [rax + r12 + off], imm8 */
|
||||
EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC6);
|
||||
break;
|
||||
case BPF_H:
|
||||
/* mov word ptr [rax + r12 + off], imm16 */
|
||||
EMIT3(0x66, add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
|
||||
break;
|
||||
case BPF_W:
|
||||
/* mov dword ptr [rax + r12 + 1], imm32 */
|
||||
EMIT2(add_3mod(0x40, dst_reg, 0, index_reg), 0xC7);
|
||||
break;
|
||||
case BPF_DW:
|
||||
/* mov qword ptr [rax + r12 + 1], imm32 */
|
||||
EMIT2(add_3mod(0x48, dst_reg, 0, index_reg), 0xC7);
|
||||
break;
|
||||
}
|
||||
emit_insn_suffix_SIB(&prog, dst_reg, 0, index_reg, off);
|
||||
EMIT(imm, bpf_size_to_x86_bytes(size));
|
||||
*pprog = prog;
|
||||
}
|
||||
|
||||
static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
|
||||
{
|
||||
emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
|
||||
}
|
||||
|
||||
static int emit_atomic(u8 **pprog, u8 atomic_op,
|
||||
u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
|
||||
{
|
||||
@ -1043,12 +1169,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define DONT_CLEAR 1
|
||||
|
||||
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
|
||||
{
|
||||
u32 reg = x->fixup >> 8;
|
||||
|
||||
/* jump over faulting load and clear dest register */
|
||||
*(unsigned long *)((void *)regs + reg) = 0;
|
||||
if (reg != DONT_CLEAR)
|
||||
*(unsigned long *)((void *)regs + reg) = 0;
|
||||
regs->ip += x->fixup & 0xff;
|
||||
return true;
|
||||
}
|
||||
@ -1147,11 +1276,15 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
|
||||
bool tail_call_seen = false;
|
||||
bool seen_exit = false;
|
||||
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
|
||||
u64 arena_vm_start, user_vm_start;
|
||||
int i, excnt = 0;
|
||||
int ilen, proglen = 0;
|
||||
u8 *prog = temp;
|
||||
int err;
|
||||
|
||||
arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
|
||||
user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
|
||||
|
||||
detect_reg_usage(insn, insn_cnt, callee_regs_used,
|
||||
&tail_call_seen);
|
||||
|
||||
@ -1172,8 +1305,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
|
||||
push_r12(&prog);
|
||||
push_callee_regs(&prog, all_callee_regs_used);
|
||||
} else {
|
||||
if (arena_vm_start)
|
||||
push_r12(&prog);
|
||||
push_callee_regs(&prog, callee_regs_used);
|
||||
}
|
||||
if (arena_vm_start)
|
||||
emit_mov_imm64(&prog, X86_REG_R12,
|
||||
arena_vm_start >> 32, (u32) arena_vm_start);
|
||||
|
||||
ilen = prog - temp;
|
||||
if (rw_image)
|
||||
@ -1213,6 +1351,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
|
||||
break;
|
||||
|
||||
case BPF_ALU64 | BPF_MOV | BPF_X:
|
||||
if (insn->off == BPF_ADDR_SPACE_CAST &&
|
||||
insn->imm == 1U << 16) {
|
||||
if (dst_reg != src_reg)
|
||||
/* 32-bit mov */
|
||||
emit_mov_reg(&prog, false, dst_reg, src_reg);
|
||||
/* shl dst_reg, 32 */
|
||||
maybe_emit_1mod(&prog, dst_reg, true);
|
||||
EMIT3(0xC1, add_1reg(0xE0, dst_reg), 32);
|
||||
|
||||
/* or dst_reg, user_vm_start */
|
||||
maybe_emit_1mod(&prog, dst_reg, true);
|
||||
if (is_axreg(dst_reg))
|
||||
EMIT1_off32(0x0D, user_vm_start >> 32);
|
||||
else
|
||||
EMIT2_off32(0x81, add_1reg(0xC8, dst_reg), user_vm_start >> 32);
|
||||
|
||||
/* rol dst_reg, 32 */
|
||||
maybe_emit_1mod(&prog, dst_reg, true);
|
||||
EMIT3(0xC1, add_1reg(0xC0, dst_reg), 32);
|
||||
|
||||
/* xor r11, r11 */
|
||||
EMIT3(0x4D, 0x31, 0xDB);
|
||||
|
||||
/* test dst_reg32, dst_reg32; check if lower 32-bit are zero */
|
||||
maybe_emit_mod(&prog, dst_reg, dst_reg, false);
|
||||
EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
|
||||
|
||||
/* cmove r11, dst_reg; if so, set dst_reg to zero */
|
||||
/* WARNING: Intel swapped src/dst register encoding in CMOVcc !!! */
|
||||
maybe_emit_mod(&prog, AUX_REG, dst_reg, true);
|
||||
EMIT3(0x0F, 0x44, add_2reg(0xC0, AUX_REG, dst_reg));
|
||||
break;
|
||||
}
|
||||
fallthrough;
|
||||
case BPF_ALU | BPF_MOV | BPF_X:
|
||||
if (insn->off == 0)
|
||||
emit_mov_reg(&prog,
|
||||
@ -1564,6 +1736,56 @@ st: if (is_imm8(insn->off))
|
||||
emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
break;
|
||||
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
|
||||
start_of_ldx = prog;
|
||||
emit_st_r12(&prog, BPF_SIZE(insn->code), dst_reg, insn->off, insn->imm);
|
||||
goto populate_extable;
|
||||
|
||||
/* LDX: dst_reg = *(u8*)(src_reg + r12 + off) */
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
|
||||
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
|
||||
start_of_ldx = prog;
|
||||
if (BPF_CLASS(insn->code) == BPF_LDX)
|
||||
emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
else
|
||||
emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
|
||||
populate_extable:
|
||||
{
|
||||
struct exception_table_entry *ex;
|
||||
u8 *_insn = image + proglen + (start_of_ldx - temp);
|
||||
s64 delta;
|
||||
|
||||
if (!bpf_prog->aux->extable)
|
||||
break;
|
||||
|
||||
if (excnt >= bpf_prog->aux->num_exentries) {
|
||||
pr_err("mem32 extable bug\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
ex = &bpf_prog->aux->extable[excnt++];
|
||||
|
||||
delta = _insn - (u8 *)&ex->insn;
|
||||
/* switch ex to rw buffer for writes */
|
||||
ex = (void *)rw_image + ((void *)ex - (void *)image);
|
||||
|
||||
ex->insn = delta;
|
||||
|
||||
ex->data = EX_TYPE_BPF;
|
||||
|
||||
ex->fixup = (prog - start_of_ldx) |
|
||||
((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
|
||||
}
|
||||
break;
|
||||
|
||||
/* LDX: dst_reg = *(u8*)(src_reg + off) */
|
||||
case BPF_LDX | BPF_MEM | BPF_B:
|
||||
case BPF_LDX | BPF_PROBE_MEM | BPF_B:
|
||||
@ -2036,6 +2258,8 @@ emit_jmp:
|
||||
pop_r12(&prog);
|
||||
} else {
|
||||
pop_callee_regs(&prog, callee_regs_used);
|
||||
if (arena_vm_start)
|
||||
pop_r12(&prog);
|
||||
}
|
||||
EMIT1(0xC9); /* leave */
|
||||
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
|
||||
@ -3243,6 +3467,11 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
|
||||
}
|
||||
}
|
||||
|
||||
bool bpf_jit_supports_arena(void)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
bool bpf_jit_supports_ptr_xchg(void)
|
||||
{
|
||||
return true;
|
||||
|
@ -37,6 +37,7 @@ struct perf_event;
|
||||
struct bpf_prog;
|
||||
struct bpf_prog_aux;
|
||||
struct bpf_map;
|
||||
struct bpf_arena;
|
||||
struct sock;
|
||||
struct seq_file;
|
||||
struct btf;
|
||||
@ -528,8 +529,8 @@ void bpf_list_head_free(const struct btf_field *field, void *list_head,
|
||||
struct bpf_spin_lock *spin_lock);
|
||||
void bpf_rb_root_free(const struct btf_field *field, void *rb_root,
|
||||
struct bpf_spin_lock *spin_lock);
|
||||
|
||||
|
||||
u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena);
|
||||
u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena);
|
||||
int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
|
||||
|
||||
struct bpf_offload_dev;
|
||||
@ -711,6 +712,7 @@ enum bpf_arg_type {
|
||||
* on eBPF program stack
|
||||
*/
|
||||
ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */
|
||||
ARG_PTR_TO_ARENA,
|
||||
|
||||
ARG_CONST_SIZE, /* number of bytes accessed from memory */
|
||||
ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */
|
||||
@ -882,6 +884,7 @@ enum bpf_reg_type {
|
||||
* an explicit null check is required for this struct.
|
||||
*/
|
||||
PTR_TO_MEM, /* reg points to valid memory region */
|
||||
PTR_TO_ARENA,
|
||||
PTR_TO_BUF, /* reg points to a read/write buffer */
|
||||
PTR_TO_FUNC, /* reg points to a bpf program function */
|
||||
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
|
||||
@ -1457,6 +1460,7 @@ struct bpf_prog_aux {
|
||||
bool xdp_has_frags;
|
||||
bool exception_cb;
|
||||
bool exception_boundary;
|
||||
struct bpf_arena *arena;
|
||||
/* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
|
||||
const struct btf_type *attach_func_proto;
|
||||
/* function name for valid attach_btf_id */
|
||||
@ -2215,6 +2219,8 @@ int generic_map_delete_batch(struct bpf_map *map,
|
||||
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
|
||||
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
|
||||
|
||||
int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
|
||||
unsigned long nr_pages, struct page **page_array);
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
void *bpf_map_kmalloc_node(const struct bpf_map *map, size_t size, gfp_t flags,
|
||||
int node);
|
||||
|
@ -132,6 +132,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
|
||||
BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops)
|
||||
|
||||
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
||||
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
|
||||
|
@ -548,6 +548,7 @@ struct bpf_insn_aux_data {
|
||||
u32 seen; /* this insn was processed by the verifier at env->pass_cnt */
|
||||
bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
|
||||
bool zext_dst; /* this insn zero extends dst reg */
|
||||
bool needs_zext; /* alu op needs to clear upper bits */
|
||||
bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
|
||||
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
|
||||
bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
|
||||
|
@ -72,6 +72,9 @@ struct ctl_table_header;
|
||||
/* unused opcode to mark special ldsx instruction. Same as BPF_IND */
|
||||
#define BPF_PROBE_MEMSX 0x40
|
||||
|
||||
/* unused opcode to mark special load instruction. Same as BPF_MSH */
|
||||
#define BPF_PROBE_MEM32 0xa0
|
||||
|
||||
/* unused opcode to mark call to interpreter with arguments */
|
||||
#define BPF_CALL_ARGS 0xe0
|
||||
|
||||
@ -959,6 +962,7 @@ bool bpf_jit_supports_kfunc_call(void);
|
||||
bool bpf_jit_supports_far_kfunc_call(void);
|
||||
bool bpf_jit_supports_exceptions(void);
|
||||
bool bpf_jit_supports_ptr_xchg(void);
|
||||
bool bpf_jit_supports_arena(void);
|
||||
void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp), void *cookie);
|
||||
bool bpf_helper_changes_pkt_data(void *func);
|
||||
|
||||
|
@ -1009,6 +1009,7 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_BLOOM_FILTER,
|
||||
BPF_MAP_TYPE_USER_RINGBUF,
|
||||
BPF_MAP_TYPE_CGRP_STORAGE,
|
||||
BPF_MAP_TYPE_ARENA,
|
||||
__MAX_BPF_MAP_TYPE
|
||||
};
|
||||
|
||||
@ -1338,6 +1339,10 @@ enum {
|
||||
*/
|
||||
#define BPF_PSEUDO_KFUNC_CALL 2
|
||||
|
||||
enum bpf_addr_space_cast {
|
||||
BPF_ADDR_SPACE_CAST = 1,
|
||||
};
|
||||
|
||||
/* flags for BPF_MAP_UPDATE_ELEM command */
|
||||
enum {
|
||||
BPF_ANY = 0, /* create new element or update existing */
|
||||
@ -1396,6 +1401,12 @@ enum {
|
||||
|
||||
/* BPF token FD is passed in a corresponding command's token_fd field */
|
||||
BPF_F_TOKEN_FD = (1U << 16),
|
||||
|
||||
/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */
|
||||
BPF_F_SEGV_ON_FAULT = (1U << 17),
|
||||
|
||||
/* Do not translate kernel bpf_arena pointers to user pointers */
|
||||
BPF_F_NO_USER_CONV = (1U << 18),
|
||||
};
|
||||
|
||||
/* Flags for BPF_PROG_QUERY. */
|
||||
@ -1467,6 +1478,9 @@ union bpf_attr {
|
||||
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
|
||||
* number of hash functions (if 0, the bloom filter will default
|
||||
* to using 5 hash functions).
|
||||
*
|
||||
* BPF_MAP_TYPE_ARENA - contains the address where user space
|
||||
* is going to mmap() the arena. It has to be page aligned.
|
||||
*/
|
||||
__u64 map_extra;
|
||||
|
||||
|
@ -15,6 +15,9 @@ obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += disasm.o mprog.o
|
||||
obj-$(CONFIG_BPF_JIT) += trampoline.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += btf.o memalloc.o
|
||||
ifeq ($(CONFIG_MMU)$(CONFIG_64BIT),yy)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += arena.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_JIT) += dispatcher.o
|
||||
ifeq ($(CONFIG_NET),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += devmap.o
|
||||
|
558
kernel/bpf/arena.c
Normal file
558
kernel/bpf/arena.c
Normal file
@ -0,0 +1,558 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
/*
|
||||
* bpf_arena is a sparsely populated shared memory region between bpf program and
|
||||
* user space process.
|
||||
*
|
||||
* For example on x86-64 the values could be:
|
||||
* user_vm_start 7f7d26200000 // picked by mmap()
|
||||
* kern_vm_start ffffc90001e69000 // picked by get_vm_area()
|
||||
* For user space all pointers within the arena are normal 8-byte addresses.
|
||||
* In this example 7f7d26200000 is the address of the first page (pgoff=0).
|
||||
* The bpf program will access it as: kern_vm_start + lower_32bit_of_user_ptr
|
||||
* (u32)7f7d26200000 -> 26200000
|
||||
* hence
|
||||
* ffffc90001e69000 + 26200000 == ffffc90028069000 is "pgoff=0" within 4Gb
|
||||
* kernel memory region.
|
||||
*
|
||||
* BPF JITs generate the following code to access arena:
|
||||
* mov eax, eax // eax has lower 32-bit of user pointer
|
||||
* mov word ptr [rax + r12 + off], bx
|
||||
* where r12 == kern_vm_start and off is s16.
|
||||
* Hence allocate 4Gb + GUARD_SZ/2 on each side.
|
||||
*
|
||||
* Initially kernel vm_area and user vma are not populated.
|
||||
* User space can fault-in any address which will insert the page
|
||||
* into kernel and user vma.
|
||||
* bpf program can allocate a page via bpf_arena_alloc_pages() kfunc
|
||||
* which will insert it into kernel vm_area.
|
||||
* The later fault-in from user space will populate that page into user vma.
|
||||
*/
|
||||
|
||||
/* number of bytes addressable by LDX/STX insn with 16-bit 'off' field */
|
||||
#define GUARD_SZ (1ull << sizeof(((struct bpf_insn *)0)->off) * 8)
|
||||
#define KERN_VM_SZ ((1ull << 32) + GUARD_SZ)
|
||||
|
||||
struct bpf_arena {
|
||||
struct bpf_map map;
|
||||
u64 user_vm_start;
|
||||
u64 user_vm_end;
|
||||
struct vm_struct *kern_vm;
|
||||
struct maple_tree mt;
|
||||
struct list_head vma_list;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena)
|
||||
{
|
||||
return arena ? (u64) (long) arena->kern_vm->addr + GUARD_SZ / 2 : 0;
|
||||
}
|
||||
|
||||
u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena)
|
||||
{
|
||||
return arena ? arena->user_vm_start : 0;
|
||||
}
|
||||
|
||||
static long arena_map_peek_elem(struct bpf_map *map, void *value)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static long arena_map_push_elem(struct bpf_map *map, void *value, u64 flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static long arena_map_pop_elem(struct bpf_map *map, void *value)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static long arena_map_delete_elem(struct bpf_map *map, void *value)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int arena_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static long compute_pgoff(struct bpf_arena *arena, long uaddr)
|
||||
{
|
||||
return (u32)(uaddr - (u32)arena->user_vm_start) >> PAGE_SHIFT;
|
||||
}
|
||||
|
||||
static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct vm_struct *kern_vm;
|
||||
int numa_node = bpf_map_attr_numa_node(attr);
|
||||
struct bpf_arena *arena;
|
||||
u64 vm_range;
|
||||
int err = -ENOMEM;
|
||||
|
||||
if (attr->key_size || attr->value_size || attr->max_entries == 0 ||
|
||||
/* BPF_F_MMAPABLE must be set */
|
||||
!(attr->map_flags & BPF_F_MMAPABLE) ||
|
||||
/* No unsupported flags present */
|
||||
(attr->map_flags & ~(BPF_F_SEGV_ON_FAULT | BPF_F_MMAPABLE | BPF_F_NO_USER_CONV)))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (attr->map_extra & ~PAGE_MASK)
|
||||
/* If non-zero the map_extra is an expected user VMA start address */
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
vm_range = (u64)attr->max_entries * PAGE_SIZE;
|
||||
if (vm_range > (1ull << 32))
|
||||
return ERR_PTR(-E2BIG);
|
||||
|
||||
if ((attr->map_extra >> 32) != ((attr->map_extra + vm_range - 1) >> 32))
|
||||
/* user vma must not cross 32-bit boundary */
|
||||
return ERR_PTR(-ERANGE);
|
||||
|
||||
kern_vm = get_vm_area(KERN_VM_SZ, VM_SPARSE | VM_USERMAP);
|
||||
if (!kern_vm)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
arena = bpf_map_area_alloc(sizeof(*arena), numa_node);
|
||||
if (!arena)
|
||||
goto err;
|
||||
|
||||
arena->kern_vm = kern_vm;
|
||||
arena->user_vm_start = attr->map_extra;
|
||||
if (arena->user_vm_start)
|
||||
arena->user_vm_end = arena->user_vm_start + vm_range;
|
||||
|
||||
INIT_LIST_HEAD(&arena->vma_list);
|
||||
bpf_map_init_from_attr(&arena->map, attr);
|
||||
mt_init_flags(&arena->mt, MT_FLAGS_ALLOC_RANGE);
|
||||
mutex_init(&arena->lock);
|
||||
|
||||
return &arena->map;
|
||||
err:
|
||||
free_vm_area(kern_vm);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static int existing_page_cb(pte_t *ptep, unsigned long addr, void *data)
|
||||
{
|
||||
struct page *page;
|
||||
pte_t pte;
|
||||
|
||||
pte = ptep_get(ptep);
|
||||
if (!pte_present(pte)) /* sanity check */
|
||||
return 0;
|
||||
page = pte_page(pte);
|
||||
/*
|
||||
* We do not update pte here:
|
||||
* 1. Nobody should be accessing bpf_arena's range outside of a kernel bug
|
||||
* 2. TLB flushing is batched or deferred. Even if we clear pte,
|
||||
* the TLB entries can stick around and continue to permit access to
|
||||
* the freed page. So it all relies on 1.
|
||||
*/
|
||||
__free_page(page);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void arena_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
|
||||
/*
|
||||
* Check that user vma-s are not around when bpf map is freed.
|
||||
* mmap() holds vm_file which holds bpf_map refcnt.
|
||||
* munmap() must have happened on vma followed by arena_vm_close()
|
||||
* which would clear arena->vma_list.
|
||||
*/
|
||||
if (WARN_ON_ONCE(!list_empty(&arena->vma_list)))
|
||||
return;
|
||||
|
||||
/*
|
||||
* free_vm_area() calls remove_vm_area() that calls free_unmap_vmap_area().
|
||||
* It unmaps everything from vmalloc area and clears pgtables.
|
||||
* Call apply_to_existing_page_range() first to find populated ptes and
|
||||
* free those pages.
|
||||
*/
|
||||
apply_to_existing_page_range(&init_mm, bpf_arena_get_kern_vm_start(arena),
|
||||
KERN_VM_SZ - GUARD_SZ, existing_page_cb, NULL);
|
||||
free_vm_area(arena->kern_vm);
|
||||
mtree_destroy(&arena->mt);
|
||||
bpf_map_area_free(arena);
|
||||
}
|
||||
|
||||
static void *arena_map_lookup_elem(struct bpf_map *map, void *key)
|
||||
{
|
||||
return ERR_PTR(-EINVAL);
|
||||
}
|
||||
|
||||
static long arena_map_update_elem(struct bpf_map *map, void *key,
|
||||
void *value, u64 flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static int arena_map_check_btf(const struct bpf_map *map, const struct btf *btf,
|
||||
const struct btf_type *key_type, const struct btf_type *value_type)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 arena_map_mem_usage(const struct bpf_map *map)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct vma_list {
|
||||
struct vm_area_struct *vma;
|
||||
struct list_head head;
|
||||
};
|
||||
|
||||
static int remember_vma(struct bpf_arena *arena, struct vm_area_struct *vma)
|
||||
{
|
||||
struct vma_list *vml;
|
||||
|
||||
vml = kmalloc(sizeof(*vml), GFP_KERNEL);
|
||||
if (!vml)
|
||||
return -ENOMEM;
|
||||
vma->vm_private_data = vml;
|
||||
vml->vma = vma;
|
||||
list_add(&vml->head, &arena->vma_list);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void arena_vm_close(struct vm_area_struct *vma)
|
||||
{
|
||||
struct bpf_map *map = vma->vm_file->private_data;
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
struct vma_list *vml;
|
||||
|
||||
guard(mutex)(&arena->lock);
|
||||
vml = vma->vm_private_data;
|
||||
list_del(&vml->head);
|
||||
vma->vm_private_data = NULL;
|
||||
kfree(vml);
|
||||
}
|
||||
|
||||
#define MT_ENTRY ((void *)&arena_map_ops) /* unused. has to be valid pointer */
|
||||
|
||||
static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct bpf_map *map = vmf->vma->vm_file->private_data;
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
struct page *page;
|
||||
long kbase, kaddr;
|
||||
int ret;
|
||||
|
||||
kbase = bpf_arena_get_kern_vm_start(arena);
|
||||
kaddr = kbase + (u32)(vmf->address & PAGE_MASK);
|
||||
|
||||
guard(mutex)(&arena->lock);
|
||||
page = vmalloc_to_page((void *)kaddr);
|
||||
if (page)
|
||||
/* already have a page vmap-ed */
|
||||
goto out;
|
||||
|
||||
if (arena->map.map_flags & BPF_F_SEGV_ON_FAULT)
|
||||
/* User space requested to segfault when page is not allocated by bpf prog */
|
||||
return VM_FAULT_SIGSEGV;
|
||||
|
||||
ret = mtree_insert(&arena->mt, vmf->pgoff, MT_ENTRY, GFP_KERNEL);
|
||||
if (ret)
|
||||
return VM_FAULT_SIGSEGV;
|
||||
|
||||
/* Account into memcg of the process that created bpf_arena */
|
||||
ret = bpf_map_alloc_pages(map, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE, 1, &page);
|
||||
if (ret) {
|
||||
mtree_erase(&arena->mt, vmf->pgoff);
|
||||
return VM_FAULT_SIGSEGV;
|
||||
}
|
||||
|
||||
ret = vm_area_map_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE, &page);
|
||||
if (ret) {
|
||||
mtree_erase(&arena->mt, vmf->pgoff);
|
||||
__free_page(page);
|
||||
return VM_FAULT_SIGSEGV;
|
||||
}
|
||||
out:
|
||||
page_ref_add(page, 1);
|
||||
vmf->page = page;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct vm_operations_struct arena_vm_ops = {
|
||||
.close = arena_vm_close,
|
||||
.fault = arena_vm_fault,
|
||||
};
|
||||
|
||||
static unsigned long arena_get_unmapped_area(struct file *filp, unsigned long addr,
|
||||
unsigned long len, unsigned long pgoff,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct bpf_map *map = filp->private_data;
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
long ret;
|
||||
|
||||
if (pgoff)
|
||||
return -EINVAL;
|
||||
if (len > (1ull << 32))
|
||||
return -E2BIG;
|
||||
|
||||
/* if user_vm_start was specified at arena creation time */
|
||||
if (arena->user_vm_start) {
|
||||
if (len > arena->user_vm_end - arena->user_vm_start)
|
||||
return -E2BIG;
|
||||
if (len != arena->user_vm_end - arena->user_vm_start)
|
||||
return -EINVAL;
|
||||
if (addr != arena->user_vm_start)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = current->mm->get_unmapped_area(filp, addr, len * 2, 0, flags);
|
||||
if (IS_ERR_VALUE(ret))
|
||||
return ret;
|
||||
if ((ret >> 32) == ((ret + len - 1) >> 32))
|
||||
return ret;
|
||||
if (WARN_ON_ONCE(arena->user_vm_start))
|
||||
/* checks at map creation time should prevent this */
|
||||
return -EFAULT;
|
||||
return round_up(ret, 1ull << 32);
|
||||
}
|
||||
|
||||
static int arena_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
|
||||
{
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
|
||||
guard(mutex)(&arena->lock);
|
||||
if (arena->user_vm_start && arena->user_vm_start != vma->vm_start)
|
||||
/*
|
||||
* If map_extra was not specified at arena creation time then
|
||||
* 1st user process can do mmap(NULL, ...) to pick user_vm_start
|
||||
* 2nd user process must pass the same addr to mmap(addr, MAP_FIXED..);
|
||||
* or
|
||||
* specify addr in map_extra and
|
||||
* use the same addr later with mmap(addr, MAP_FIXED..);
|
||||
*/
|
||||
return -EBUSY;
|
||||
|
||||
if (arena->user_vm_end && arena->user_vm_end != vma->vm_end)
|
||||
/* all user processes must have the same size of mmap-ed region */
|
||||
return -EBUSY;
|
||||
|
||||
/* Earlier checks should prevent this */
|
||||
if (WARN_ON_ONCE(vma->vm_end - vma->vm_start > (1ull << 32) || vma->vm_pgoff))
|
||||
return -EFAULT;
|
||||
|
||||
if (remember_vma(arena, vma))
|
||||
return -ENOMEM;
|
||||
|
||||
arena->user_vm_start = vma->vm_start;
|
||||
arena->user_vm_end = vma->vm_end;
|
||||
/*
|
||||
* bpf_map_mmap() checks that it's being mmaped as VM_SHARED and
|
||||
* clears VM_MAYEXEC. Set VM_DONTEXPAND as well to avoid
|
||||
* potential change of user_vm_start.
|
||||
*/
|
||||
vm_flags_set(vma, VM_DONTEXPAND);
|
||||
vma->vm_ops = &arena_vm_ops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int arena_map_direct_value_addr(const struct bpf_map *map, u64 *imm, u32 off)
|
||||
{
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
|
||||
if ((u64)off > arena->user_vm_end - arena->user_vm_start)
|
||||
return -ERANGE;
|
||||
*imm = (unsigned long)arena->user_vm_start;
|
||||
return 0;
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(bpf_arena_map_btf_ids, struct, bpf_arena)
|
||||
const struct bpf_map_ops arena_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc = arena_map_alloc,
|
||||
.map_free = arena_map_free,
|
||||
.map_direct_value_addr = arena_map_direct_value_addr,
|
||||
.map_mmap = arena_map_mmap,
|
||||
.map_get_unmapped_area = arena_get_unmapped_area,
|
||||
.map_get_next_key = arena_map_get_next_key,
|
||||
.map_push_elem = arena_map_push_elem,
|
||||
.map_peek_elem = arena_map_peek_elem,
|
||||
.map_pop_elem = arena_map_pop_elem,
|
||||
.map_lookup_elem = arena_map_lookup_elem,
|
||||
.map_update_elem = arena_map_update_elem,
|
||||
.map_delete_elem = arena_map_delete_elem,
|
||||
.map_check_btf = arena_map_check_btf,
|
||||
.map_mem_usage = arena_map_mem_usage,
|
||||
.map_btf_id = &bpf_arena_map_btf_ids[0],
|
||||
};
|
||||
|
||||
static u64 clear_lo32(u64 val)
|
||||
{
|
||||
return val & ~(u64)~0U;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate pages and vmap them into kernel vmalloc area.
|
||||
* Later the pages will be mmaped into user space vma.
|
||||
*/
|
||||
static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt, int node_id)
|
||||
{
|
||||
/* user_vm_end/start are fixed before bpf prog runs */
|
||||
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
|
||||
u64 kern_vm_start = bpf_arena_get_kern_vm_start(arena);
|
||||
struct page **pages;
|
||||
long pgoff = 0;
|
||||
u32 uaddr32;
|
||||
int ret, i;
|
||||
|
||||
if (page_cnt > page_cnt_max)
|
||||
return 0;
|
||||
|
||||
if (uaddr) {
|
||||
if (uaddr & ~PAGE_MASK)
|
||||
return 0;
|
||||
pgoff = compute_pgoff(arena, uaddr);
|
||||
if (pgoff + page_cnt > page_cnt_max)
|
||||
/* requested address will be outside of user VMA */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* zeroing is needed, since alloc_pages_bulk_array() only fills in non-zero entries */
|
||||
pages = kvcalloc(page_cnt, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!pages)
|
||||
return 0;
|
||||
|
||||
guard(mutex)(&arena->lock);
|
||||
|
||||
if (uaddr)
|
||||
ret = mtree_insert_range(&arena->mt, pgoff, pgoff + page_cnt - 1,
|
||||
MT_ENTRY, GFP_KERNEL);
|
||||
else
|
||||
ret = mtree_alloc_range(&arena->mt, &pgoff, MT_ENTRY,
|
||||
page_cnt, 0, page_cnt_max - 1, GFP_KERNEL);
|
||||
if (ret)
|
||||
goto out_free_pages;
|
||||
|
||||
ret = bpf_map_alloc_pages(&arena->map, GFP_KERNEL | __GFP_ZERO,
|
||||
node_id, page_cnt, pages);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
uaddr32 = (u32)(arena->user_vm_start + pgoff * PAGE_SIZE);
|
||||
/* Earlier checks make sure that uaddr32 + page_cnt * PAGE_SIZE will not overflow 32-bit */
|
||||
ret = vm_area_map_pages(arena->kern_vm, kern_vm_start + uaddr32,
|
||||
kern_vm_start + uaddr32 + page_cnt * PAGE_SIZE, pages);
|
||||
if (ret) {
|
||||
for (i = 0; i < page_cnt; i++)
|
||||
__free_page(pages[i]);
|
||||
goto out;
|
||||
}
|
||||
kvfree(pages);
|
||||
return clear_lo32(arena->user_vm_start) + uaddr32;
|
||||
out:
|
||||
mtree_erase(&arena->mt, pgoff);
|
||||
out_free_pages:
|
||||
kvfree(pages);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If page is present in vmalloc area, unmap it from vmalloc area,
|
||||
* unmap it from all user space vma-s,
|
||||
* and free it.
|
||||
*/
|
||||
static void zap_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
|
||||
{
|
||||
struct vma_list *vml;
|
||||
|
||||
list_for_each_entry(vml, &arena->vma_list, head)
|
||||
zap_page_range_single(vml->vma, uaddr,
|
||||
PAGE_SIZE * page_cnt, NULL);
|
||||
}
|
||||
|
||||
static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
|
||||
{
|
||||
u64 full_uaddr, uaddr_end;
|
||||
long kaddr, pgoff, i;
|
||||
struct page *page;
|
||||
|
||||
/* only aligned lower 32-bit are relevant */
|
||||
uaddr = (u32)uaddr;
|
||||
uaddr &= PAGE_MASK;
|
||||
full_uaddr = clear_lo32(arena->user_vm_start) + uaddr;
|
||||
uaddr_end = min(arena->user_vm_end, full_uaddr + (page_cnt << PAGE_SHIFT));
|
||||
if (full_uaddr >= uaddr_end)
|
||||
return;
|
||||
|
||||
page_cnt = (uaddr_end - full_uaddr) >> PAGE_SHIFT;
|
||||
|
||||
guard(mutex)(&arena->lock);
|
||||
|
||||
pgoff = compute_pgoff(arena, uaddr);
|
||||
/* clear range */
|
||||
mtree_store_range(&arena->mt, pgoff, pgoff + page_cnt - 1, NULL, GFP_KERNEL);
|
||||
|
||||
if (page_cnt > 1)
|
||||
/* bulk zap if multiple pages being freed */
|
||||
zap_pages(arena, full_uaddr, page_cnt);
|
||||
|
||||
kaddr = bpf_arena_get_kern_vm_start(arena) + uaddr;
|
||||
for (i = 0; i < page_cnt; i++, kaddr += PAGE_SIZE, full_uaddr += PAGE_SIZE) {
|
||||
page = vmalloc_to_page((void *)kaddr);
|
||||
if (!page)
|
||||
continue;
|
||||
if (page_cnt == 1 && page_mapped(page)) /* mapped by some user process */
|
||||
zap_pages(arena, full_uaddr, 1);
|
||||
vm_area_unmap_pages(arena->kern_vm, kaddr, kaddr + PAGE_SIZE);
|
||||
__free_page(page);
|
||||
}
|
||||
}
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt,
|
||||
int node_id, u64 flags)
|
||||
{
|
||||
struct bpf_map *map = p__map;
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
|
||||
if (map->map_type != BPF_MAP_TYPE_ARENA || flags || !page_cnt)
|
||||
return NULL;
|
||||
|
||||
return (void *)arena_alloc_pages(arena, (long)addr__ign, page_cnt, node_id);
|
||||
}
|
||||
|
||||
__bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt)
|
||||
{
|
||||
struct bpf_map *map = p__map;
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
|
||||
if (map->map_type != BPF_MAP_TYPE_ARENA || !page_cnt || !ptr__ign)
|
||||
return;
|
||||
arena_free_pages(arena, (long)ptr__ign, page_cnt);
|
||||
}
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
BTF_KFUNCS_START(arena_kfuncs)
|
||||
BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
|
||||
BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE)
|
||||
BTF_KFUNCS_END(arena_kfuncs)
|
||||
|
||||
static const struct btf_kfunc_id_set common_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &arena_kfuncs,
|
||||
};
|
||||
|
||||
static int __init kfunc_init(void)
|
||||
{
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
|
||||
}
|
||||
late_initcall(kfunc_init);
|
@ -7111,10 +7111,11 @@ cand_cache_unlock:
|
||||
}
|
||||
|
||||
enum btf_arg_tag {
|
||||
ARG_TAG_CTX = 0x1,
|
||||
ARG_TAG_NONNULL = 0x2,
|
||||
ARG_TAG_TRUSTED = 0x4,
|
||||
ARG_TAG_NULLABLE = 0x8,
|
||||
ARG_TAG_CTX = BIT_ULL(0),
|
||||
ARG_TAG_NONNULL = BIT_ULL(1),
|
||||
ARG_TAG_TRUSTED = BIT_ULL(2),
|
||||
ARG_TAG_NULLABLE = BIT_ULL(3),
|
||||
ARG_TAG_ARENA = BIT_ULL(4),
|
||||
};
|
||||
|
||||
/* Process BTF of a function to produce high-level expectation of function
|
||||
@ -7226,6 +7227,8 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
|
||||
tags |= ARG_TAG_NONNULL;
|
||||
} else if (strcmp(tag, "nullable") == 0) {
|
||||
tags |= ARG_TAG_NULLABLE;
|
||||
} else if (strcmp(tag, "arena") == 0) {
|
||||
tags |= ARG_TAG_ARENA;
|
||||
} else {
|
||||
bpf_log(log, "arg#%d has unsupported set of tags\n", i);
|
||||
return -EOPNOTSUPP;
|
||||
@ -7280,6 +7283,14 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
|
||||
sub->args[i].btf_id = kern_type_id;
|
||||
continue;
|
||||
}
|
||||
if (tags & ARG_TAG_ARENA) {
|
||||
if (tags & ~ARG_TAG_ARENA) {
|
||||
bpf_log(log, "arg#%d arena cannot be combined with any other tags\n", i);
|
||||
return -EINVAL;
|
||||
}
|
||||
sub->args[i].arg_type = ARG_PTR_TO_ARENA;
|
||||
continue;
|
||||
}
|
||||
if (is_global) { /* generic user data pointer */
|
||||
u32 mem_size;
|
||||
|
||||
|
@ -2932,6 +2932,11 @@ bool __weak bpf_jit_supports_far_kfunc_call(void)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool __weak bpf_jit_supports_arena(void)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Return TRUE if the JIT backend satisfies the following two conditions:
|
||||
* 1) JIT backend supports atomic_xchg() on pointer-sized words.
|
||||
* 2) Under the specific arch, the implementation of xchg() is the same
|
||||
@ -2976,6 +2981,17 @@ void __weak arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp,
|
||||
{
|
||||
}
|
||||
|
||||
/* for configs without MMU or 32-bit */
|
||||
__weak const struct bpf_map_ops arena_map_ops;
|
||||
__weak u64 bpf_arena_get_user_vm_start(struct bpf_arena *arena)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
__weak u64 bpf_arena_get_kern_vm_start(struct bpf_arena *arena)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BPF_SYSCALL
|
||||
static int __init bpf_global_ma_init(void)
|
||||
{
|
||||
|
@ -166,6 +166,12 @@ static bool is_movsx(const struct bpf_insn *insn)
|
||||
(insn->off == 8 || insn->off == 16 || insn->off == 32);
|
||||
}
|
||||
|
||||
static bool is_addr_space_cast(const struct bpf_insn *insn)
|
||||
{
|
||||
return insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
|
||||
insn->off == BPF_ADDR_SPACE_CAST;
|
||||
}
|
||||
|
||||
void print_bpf_insn(const struct bpf_insn_cbs *cbs,
|
||||
const struct bpf_insn *insn,
|
||||
bool allow_ptr_leaks)
|
||||
@ -184,6 +190,10 @@ void print_bpf_insn(const struct bpf_insn_cbs *cbs,
|
||||
insn->code, class == BPF_ALU ? 'w' : 'r',
|
||||
insn->dst_reg, class == BPF_ALU ? 'w' : 'r',
|
||||
insn->dst_reg);
|
||||
} else if (is_addr_space_cast(insn)) {
|
||||
verbose(cbs->private_data, "(%02x) r%d = addr_space_cast(r%d, %d, %d)\n",
|
||||
insn->code, insn->dst_reg,
|
||||
insn->src_reg, ((u32)insn->imm) >> 16, (u16)insn->imm);
|
||||
} else if (BPF_SRC(insn->code) == BPF_X) {
|
||||
verbose(cbs->private_data, "(%02x) %c%d %s %s%c%d\n",
|
||||
insn->code, class == BPF_ALU ? 'w' : 'r',
|
||||
|
@ -458,6 +458,7 @@ const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
|
||||
[PTR_TO_XDP_SOCK] = "xdp_sock",
|
||||
[PTR_TO_BTF_ID] = "ptr_",
|
||||
[PTR_TO_MEM] = "mem",
|
||||
[PTR_TO_ARENA] = "arena",
|
||||
[PTR_TO_BUF] = "buf",
|
||||
[PTR_TO_FUNC] = "func",
|
||||
[PTR_TO_MAP_KEY] = "map_key",
|
||||
@ -693,6 +694,8 @@ static void print_reg_state(struct bpf_verifier_env *env,
|
||||
}
|
||||
|
||||
verbose(env, "%s", reg_type_str(env, t));
|
||||
if (t == PTR_TO_ARENA)
|
||||
return;
|
||||
if (t == PTR_TO_STACK) {
|
||||
if (state->frameno != reg->frameno)
|
||||
verbose(env, "[%d]", reg->frameno);
|
||||
|
@ -164,6 +164,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
|
||||
if (bpf_map_is_offloaded(map)) {
|
||||
return bpf_map_offload_update_elem(map, key, value, flags);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
|
||||
map->map_type == BPF_MAP_TYPE_ARENA ||
|
||||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
|
||||
return map->ops->map_update_elem(map, key, value, flags);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_SOCKHASH ||
|
||||
@ -479,6 +480,39 @@ static void bpf_map_release_memcg(struct bpf_map *map)
|
||||
}
|
||||
#endif
|
||||
|
||||
int bpf_map_alloc_pages(const struct bpf_map *map, gfp_t gfp, int nid,
|
||||
unsigned long nr_pages, struct page **pages)
|
||||
{
|
||||
unsigned long i, j;
|
||||
struct page *pg;
|
||||
int ret = 0;
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
struct mem_cgroup *memcg, *old_memcg;
|
||||
|
||||
memcg = bpf_map_get_memcg(map);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
#endif
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
pg = alloc_pages_node(nid, gfp | __GFP_ACCOUNT, 0);
|
||||
|
||||
if (pg) {
|
||||
pages[i] = pg;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < i; j++)
|
||||
__free_page(pages[j]);
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMCG_KMEM
|
||||
set_active_memcg(old_memcg);
|
||||
mem_cgroup_put(memcg);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static int btf_field_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct btf_field *f1 = a, *f2 = b;
|
||||
@ -1176,6 +1210,7 @@ static int map_create(union bpf_attr *attr)
|
||||
}
|
||||
|
||||
if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
|
||||
attr->map_type != BPF_MAP_TYPE_ARENA &&
|
||||
attr->map_extra != 0)
|
||||
return -EINVAL;
|
||||
|
||||
@ -1265,6 +1300,7 @@ static int map_create(union bpf_attr *attr)
|
||||
case BPF_MAP_TYPE_LRU_PERCPU_HASH:
|
||||
case BPF_MAP_TYPE_STRUCT_OPS:
|
||||
case BPF_MAP_TYPE_CPUMAP:
|
||||
case BPF_MAP_TYPE_ARENA:
|
||||
if (!bpf_token_capable(token, CAP_BPF))
|
||||
goto put_token;
|
||||
break;
|
||||
@ -4417,6 +4453,12 @@ static struct bpf_insn *bpf_insn_prepare_dump(const struct bpf_prog *prog,
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((BPF_CLASS(code) == BPF_LDX || BPF_CLASS(code) == BPF_STX ||
|
||||
BPF_CLASS(code) == BPF_ST) && BPF_MODE(code) == BPF_PROBE_MEM32) {
|
||||
insns[i].code = BPF_CLASS(code) | BPF_SIZE(code) | BPF_MEM;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (code != (BPF_LD | BPF_IMM | BPF_DW))
|
||||
continue;
|
||||
|
||||
|
@ -4386,6 +4386,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
|
||||
case PTR_TO_MEM:
|
||||
case PTR_TO_FUNC:
|
||||
case PTR_TO_MAP_KEY:
|
||||
case PTR_TO_ARENA:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -5828,6 +5829,8 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
|
||||
case PTR_TO_XDP_SOCK:
|
||||
pointer_desc = "xdp_sock ";
|
||||
break;
|
||||
case PTR_TO_ARENA:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -6937,6 +6940,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
||||
|
||||
if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ))
|
||||
mark_reg_unknown(env, regs, value_regno);
|
||||
} else if (reg->type == PTR_TO_ARENA) {
|
||||
if (t == BPF_READ && value_regno >= 0)
|
||||
mark_reg_unknown(env, regs, value_regno);
|
||||
} else {
|
||||
verbose(env, "R%d invalid mem access '%s'\n", regno,
|
||||
reg_type_str(env, reg->type));
|
||||
@ -8408,6 +8414,7 @@ static int check_func_arg_reg_off(struct bpf_verifier_env *env,
|
||||
case PTR_TO_MEM | MEM_RINGBUF:
|
||||
case PTR_TO_BUF:
|
||||
case PTR_TO_BUF | MEM_RDONLY:
|
||||
case PTR_TO_ARENA:
|
||||
case SCALAR_VALUE:
|
||||
return 0;
|
||||
/* All the rest must be rejected, except PTR_TO_BTF_ID which allows
|
||||
@ -9372,6 +9379,18 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog,
|
||||
bpf_log(log, "arg#%d is expected to be non-NULL\n", i);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
|
||||
/*
|
||||
* Can pass any value and the kernel won't crash, but
|
||||
* only PTR_TO_ARENA or SCALAR make sense. Everything
|
||||
* else is a bug in the bpf program. Point it out to
|
||||
* the user at the verification time instead of
|
||||
* run-time debug nightmare.
|
||||
*/
|
||||
if (reg->type != PTR_TO_ARENA && reg->type != SCALAR_VALUE) {
|
||||
bpf_log(log, "R%d is not a pointer to arena or scalar.\n", regno);
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (arg->arg_type == (ARG_PTR_TO_DYNPTR | MEM_RDONLY)) {
|
||||
ret = process_dynptr_func(env, regno, -1, arg->arg_type, 0);
|
||||
if (ret)
|
||||
@ -13852,6 +13871,21 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
|
||||
|
||||
dst_reg = ®s[insn->dst_reg];
|
||||
src_reg = NULL;
|
||||
|
||||
if (dst_reg->type == PTR_TO_ARENA) {
|
||||
struct bpf_insn_aux_data *aux = cur_aux(env);
|
||||
|
||||
if (BPF_CLASS(insn->code) == BPF_ALU64)
|
||||
/*
|
||||
* 32-bit operations zero upper bits automatically.
|
||||
* 64-bit operations need to be converted to 32.
|
||||
*/
|
||||
aux->needs_zext = true;
|
||||
|
||||
/* Any arithmetic operations are allowed on arena pointers */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (dst_reg->type != SCALAR_VALUE)
|
||||
ptr_reg = dst_reg;
|
||||
else
|
||||
@ -13969,19 +14003,20 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
||||
} else if (opcode == BPF_MOV) {
|
||||
|
||||
if (BPF_SRC(insn->code) == BPF_X) {
|
||||
if (insn->imm != 0) {
|
||||
verbose(env, "BPF_MOV uses reserved fields\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (BPF_CLASS(insn->code) == BPF_ALU) {
|
||||
if (insn->off != 0 && insn->off != 8 && insn->off != 16) {
|
||||
if ((insn->off != 0 && insn->off != 8 && insn->off != 16) ||
|
||||
insn->imm) {
|
||||
verbose(env, "BPF_MOV uses reserved fields\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (insn->off == BPF_ADDR_SPACE_CAST) {
|
||||
if (insn->imm != 1 && insn->imm != 1u << 16) {
|
||||
verbose(env, "addr_space_cast insn can only convert between address space 1 and 0\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
} else {
|
||||
if (insn->off != 0 && insn->off != 8 && insn->off != 16 &&
|
||||
insn->off != 32) {
|
||||
if ((insn->off != 0 && insn->off != 8 && insn->off != 16 &&
|
||||
insn->off != 32) || insn->imm) {
|
||||
verbose(env, "BPF_MOV uses reserved fields\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -14008,7 +14043,12 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
||||
struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
|
||||
|
||||
if (BPF_CLASS(insn->code) == BPF_ALU64) {
|
||||
if (insn->off == 0) {
|
||||
if (insn->imm) {
|
||||
/* off == BPF_ADDR_SPACE_CAST */
|
||||
mark_reg_unknown(env, regs, insn->dst_reg);
|
||||
if (insn->imm == 1) /* cast from as(1) to as(0) */
|
||||
dst_reg->type = PTR_TO_ARENA;
|
||||
} else if (insn->off == 0) {
|
||||
/* case: R1 = R2
|
||||
* copy register state to dest reg
|
||||
*/
|
||||
@ -15182,6 +15222,10 @@ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
|
||||
|
||||
if (insn->src_reg == BPF_PSEUDO_MAP_VALUE ||
|
||||
insn->src_reg == BPF_PSEUDO_MAP_IDX_VALUE) {
|
||||
if (map->map_type == BPF_MAP_TYPE_ARENA) {
|
||||
__mark_reg_unknown(env, dst_reg);
|
||||
return 0;
|
||||
}
|
||||
dst_reg->type = PTR_TO_MAP_VALUE;
|
||||
dst_reg->off = aux->map_off;
|
||||
WARN_ON_ONCE(map->max_entries != 1);
|
||||
@ -16568,6 +16612,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
|
||||
* the same stack frame, since fp-8 in foo != fp-8 in bar
|
||||
*/
|
||||
return regs_exact(rold, rcur, idmap) && rold->frameno == rcur->frameno;
|
||||
case PTR_TO_ARENA:
|
||||
return true;
|
||||
default:
|
||||
return regs_exact(rold, rcur, idmap);
|
||||
}
|
||||
@ -17443,6 +17489,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
|
||||
case PTR_TO_TCP_SOCK:
|
||||
case PTR_TO_XDP_SOCK:
|
||||
case PTR_TO_BTF_ID:
|
||||
case PTR_TO_ARENA:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
@ -18108,6 +18155,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
|
||||
case BPF_MAP_TYPE_CGRP_STORAGE:
|
||||
case BPF_MAP_TYPE_QUEUE:
|
||||
case BPF_MAP_TYPE_STACK:
|
||||
case BPF_MAP_TYPE_ARENA:
|
||||
break;
|
||||
default:
|
||||
verbose(env,
|
||||
@ -18295,6 +18343,31 @@ static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
|
||||
fdput(f);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (map->map_type == BPF_MAP_TYPE_ARENA) {
|
||||
if (env->prog->aux->arena) {
|
||||
verbose(env, "Only one arena per program\n");
|
||||
fdput(f);
|
||||
return -EBUSY;
|
||||
}
|
||||
if (!env->allow_ptr_leaks || !env->bpf_capable) {
|
||||
verbose(env, "CAP_BPF and CAP_PERFMON are required to use arena\n");
|
||||
fdput(f);
|
||||
return -EPERM;
|
||||
}
|
||||
if (!env->prog->jit_requested) {
|
||||
verbose(env, "JIT is required to use arena\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
if (!bpf_jit_supports_arena()) {
|
||||
verbose(env, "JIT doesn't support arena\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
env->prog->aux->arena = (void *)map;
|
||||
if (!bpf_arena_get_user_vm_start(env->prog->aux->arena)) {
|
||||
verbose(env, "arena's user address must be set via map_extra or mmap()\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
fdput(f);
|
||||
next_insn:
|
||||
@ -18916,6 +18989,14 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
||||
env->prog->aux->num_exentries++;
|
||||
}
|
||||
continue;
|
||||
case PTR_TO_ARENA:
|
||||
if (BPF_MODE(insn->code) == BPF_MEMSX) {
|
||||
verbose(env, "sign extending loads from arena are not supported yet\n");
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
insn->code = BPF_CLASS(insn->code) | BPF_PROBE_MEM32 | BPF_SIZE(insn->code);
|
||||
env->prog->aux->num_exentries++;
|
||||
continue;
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
@ -19101,13 +19182,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
|
||||
func[i]->aux->nr_linfo = prog->aux->nr_linfo;
|
||||
func[i]->aux->jited_linfo = prog->aux->jited_linfo;
|
||||
func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
|
||||
func[i]->aux->arena = prog->aux->arena;
|
||||
num_exentries = 0;
|
||||
insn = func[i]->insnsi;
|
||||
for (j = 0; j < func[i]->len; j++, insn++) {
|
||||
if (BPF_CLASS(insn->code) == BPF_LDX &&
|
||||
(BPF_MODE(insn->code) == BPF_PROBE_MEM ||
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEM32 ||
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEMSX))
|
||||
num_exentries++;
|
||||
if ((BPF_CLASS(insn->code) == BPF_STX ||
|
||||
BPF_CLASS(insn->code) == BPF_ST) &&
|
||||
BPF_MODE(insn->code) == BPF_PROBE_MEM32)
|
||||
num_exentries++;
|
||||
}
|
||||
func[i]->aux->num_exentries = num_exentries;
|
||||
func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
|
||||
@ -19506,6 +19593,21 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
|
||||
}
|
||||
|
||||
for (i = 0; i < insn_cnt;) {
|
||||
if (insn->code == (BPF_ALU64 | BPF_MOV | BPF_X) && insn->imm) {
|
||||
if ((insn->off == BPF_ADDR_SPACE_CAST && insn->imm == 1) ||
|
||||
(((struct bpf_map *)env->prog->aux->arena)->map_flags & BPF_F_NO_USER_CONV)) {
|
||||
/* convert to 32-bit mov that clears upper 32-bit */
|
||||
insn->code = BPF_ALU | BPF_MOV | BPF_X;
|
||||
/* clear off, so it's a normal 'wX = wY' from JIT pov */
|
||||
insn->off = 0;
|
||||
} /* cast from as(0) to as(1) should be handled by JIT */
|
||||
goto next_insn;
|
||||
}
|
||||
|
||||
if (env->insn_aux_data[i + delta].needs_zext)
|
||||
/* Convert BPF_CLASS(insn->code) == BPF_ALU64 to 32-bit ALU */
|
||||
insn->code = BPF_ALU | BPF_OP(insn->code) | BPF_SRC(insn->code);
|
||||
|
||||
/* Make divide-by-zero exceptions impossible. */
|
||||
if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
|
||||
insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
|
||||
@ -20358,6 +20460,9 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
|
||||
reg->btf = bpf_get_btf_vmlinux(); /* can't fail at this point */
|
||||
reg->btf_id = arg->btf_id;
|
||||
reg->id = ++env->id_gen;
|
||||
} else if (base_type(arg->arg_type) == ARG_PTR_TO_ARENA) {
|
||||
/* caller can pass either PTR_TO_ARENA or SCALAR */
|
||||
mark_reg_unknown(env, regs, i);
|
||||
} else {
|
||||
WARN_ONCE(1, "BUG: unhandled arg#%d type %d\n",
|
||||
i - BPF_REG_1, arg->arg_type);
|
||||
|
@ -55,7 +55,7 @@ MAP COMMANDS
|
||||
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
|
||||
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
|
||||
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
|
||||
| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** }
|
||||
| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** }
|
||||
|
||||
DESCRIPTION
|
||||
===========
|
||||
|
@ -120,6 +120,12 @@ static bool get_datasec_ident(const char *sec_name, char *buf, size_t buf_sz)
|
||||
static const char *pfxs[] = { ".data", ".rodata", ".bss", ".kconfig" };
|
||||
int i, n;
|
||||
|
||||
/* recognize hard coded LLVM section name */
|
||||
if (strcmp(sec_name, ".arena.1") == 0) {
|
||||
/* this is the name to use in skeleton */
|
||||
snprintf(buf, buf_sz, "arena");
|
||||
return true;
|
||||
}
|
||||
for (i = 0, n = ARRAY_SIZE(pfxs); i < n; i++) {
|
||||
const char *pfx = pfxs[i];
|
||||
|
||||
@ -250,6 +256,13 @@ static const struct btf_type *find_type_for_map(struct btf *btf, const char *map
|
||||
|
||||
static bool is_mmapable_map(const struct bpf_map *map, char *buf, size_t sz)
|
||||
{
|
||||
size_t tmp_sz;
|
||||
|
||||
if (bpf_map__type(map) == BPF_MAP_TYPE_ARENA && bpf_map__initial_value(map, &tmp_sz)) {
|
||||
snprintf(buf, sz, "arena");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!bpf_map__is_internal(map) || !(bpf_map__map_flags(map) & BPF_F_MMAPABLE))
|
||||
return false;
|
||||
|
||||
|
@ -1463,7 +1463,7 @@ static int do_help(int argc, char **argv)
|
||||
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
|
||||
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
|
||||
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
|
||||
" task_storage | bloom_filter | user_ringbuf | cgrp_storage }\n"
|
||||
" task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n"
|
||||
" " HELP_SPEC_OPTIONS " |\n"
|
||||
" {-f|--bpffs} | {-n|--nomount} }\n"
|
||||
"",
|
||||
|
@ -1009,6 +1009,7 @@ enum bpf_map_type {
|
||||
BPF_MAP_TYPE_BLOOM_FILTER,
|
||||
BPF_MAP_TYPE_USER_RINGBUF,
|
||||
BPF_MAP_TYPE_CGRP_STORAGE,
|
||||
BPF_MAP_TYPE_ARENA,
|
||||
__MAX_BPF_MAP_TYPE
|
||||
};
|
||||
|
||||
@ -1338,6 +1339,10 @@ enum {
|
||||
*/
|
||||
#define BPF_PSEUDO_KFUNC_CALL 2
|
||||
|
||||
enum bpf_addr_space_cast {
|
||||
BPF_ADDR_SPACE_CAST = 1,
|
||||
};
|
||||
|
||||
/* flags for BPF_MAP_UPDATE_ELEM command */
|
||||
enum {
|
||||
BPF_ANY = 0, /* create new element or update existing */
|
||||
@ -1396,6 +1401,12 @@ enum {
|
||||
|
||||
/* BPF token FD is passed in a corresponding command's token_fd field */
|
||||
BPF_F_TOKEN_FD = (1U << 16),
|
||||
|
||||
/* When user space page faults in bpf_arena send SIGSEGV instead of inserting new page */
|
||||
BPF_F_SEGV_ON_FAULT = (1U << 17),
|
||||
|
||||
/* Do not translate kernel bpf_arena pointers to user pointers */
|
||||
BPF_F_NO_USER_CONV = (1U << 18),
|
||||
};
|
||||
|
||||
/* Flags for BPF_PROG_QUERY. */
|
||||
@ -1467,6 +1478,9 @@ union bpf_attr {
|
||||
* BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
|
||||
* number of hash functions (if 0, the bloom filter will default
|
||||
* to using 5 hash functions).
|
||||
*
|
||||
* BPF_MAP_TYPE_ARENA - contains the address where user space
|
||||
* is going to mmap() the arena. It has to be page aligned.
|
||||
*/
|
||||
__u64 map_extra;
|
||||
|
||||
|
@ -193,6 +193,7 @@ enum libbpf_tristate {
|
||||
#define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull")))
|
||||
#define __arg_nullable __attribute((btf_decl_tag("arg:nullable")))
|
||||
#define __arg_trusted __attribute((btf_decl_tag("arg:trusted")))
|
||||
#define __arg_arena __attribute((btf_decl_tag("arg:arena")))
|
||||
|
||||
#ifndef ___bpf_concat
|
||||
#define ___bpf_concat(a, b) a ## b
|
||||
|
@ -185,6 +185,7 @@ static const char * const map_type_name[] = {
|
||||
[BPF_MAP_TYPE_BLOOM_FILTER] = "bloom_filter",
|
||||
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
|
||||
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
|
||||
[BPF_MAP_TYPE_ARENA] = "arena",
|
||||
};
|
||||
|
||||
static const char * const prog_type_name[] = {
|
||||
@ -497,6 +498,7 @@ struct bpf_struct_ops {
|
||||
#define KSYMS_SEC ".ksyms"
|
||||
#define STRUCT_OPS_SEC ".struct_ops"
|
||||
#define STRUCT_OPS_LINK_SEC ".struct_ops.link"
|
||||
#define ARENA_SEC ".arena.1"
|
||||
|
||||
enum libbpf_map_type {
|
||||
LIBBPF_MAP_UNSPEC,
|
||||
@ -628,6 +630,7 @@ struct elf_state {
|
||||
Elf *elf;
|
||||
Elf64_Ehdr *ehdr;
|
||||
Elf_Data *symbols;
|
||||
Elf_Data *arena_data;
|
||||
size_t shstrndx; /* section index for section name strings */
|
||||
size_t strtabidx;
|
||||
struct elf_sec_desc *secs;
|
||||
@ -637,6 +640,7 @@ struct elf_state {
|
||||
int text_shndx;
|
||||
int symbols_shndx;
|
||||
bool has_st_ops;
|
||||
int arena_data_shndx;
|
||||
};
|
||||
|
||||
struct usdt_manager;
|
||||
@ -696,6 +700,10 @@ struct bpf_object {
|
||||
|
||||
struct usdt_manager *usdt_man;
|
||||
|
||||
struct bpf_map *arena_map;
|
||||
void *arena_data;
|
||||
size_t arena_data_sz;
|
||||
|
||||
struct kern_feature_cache *feat_cache;
|
||||
char *token_path;
|
||||
int token_fd;
|
||||
@ -1442,6 +1450,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
|
||||
elf_end(obj->efile.elf);
|
||||
obj->efile.elf = NULL;
|
||||
obj->efile.symbols = NULL;
|
||||
obj->efile.arena_data = NULL;
|
||||
|
||||
zfree(&obj->efile.secs);
|
||||
obj->efile.sec_cnt = 0;
|
||||
@ -1684,7 +1693,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)
|
||||
return map;
|
||||
}
|
||||
|
||||
static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
|
||||
static size_t array_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
|
||||
{
|
||||
const long page_sz = sysconf(_SC_PAGE_SIZE);
|
||||
size_t map_sz;
|
||||
@ -1694,6 +1703,20 @@ static size_t bpf_map_mmap_sz(unsigned int value_sz, unsigned int max_entries)
|
||||
return map_sz;
|
||||
}
|
||||
|
||||
static size_t bpf_map_mmap_sz(const struct bpf_map *map)
|
||||
{
|
||||
const long page_sz = sysconf(_SC_PAGE_SIZE);
|
||||
|
||||
switch (map->def.type) {
|
||||
case BPF_MAP_TYPE_ARRAY:
|
||||
return array_map_mmap_sz(map->def.value_size, map->def.max_entries);
|
||||
case BPF_MAP_TYPE_ARENA:
|
||||
return page_sz * map->def.max_entries;
|
||||
default:
|
||||
return 0; /* not supported */
|
||||
}
|
||||
}
|
||||
|
||||
static int bpf_map_mmap_resize(struct bpf_map *map, size_t old_sz, size_t new_sz)
|
||||
{
|
||||
void *mmaped;
|
||||
@ -1836,7 +1859,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
|
||||
def->value_size = data_sz;
|
||||
def->max_entries = 1;
|
||||
def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG
|
||||
? BPF_F_RDONLY_PROG : 0;
|
||||
? BPF_F_RDONLY_PROG : 0;
|
||||
|
||||
/* failures are fine because of maps like .rodata.str1.1 */
|
||||
(void) map_fill_btf_type_info(obj, map);
|
||||
@ -1847,7 +1870,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,
|
||||
pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n",
|
||||
map->name, map->sec_idx, map->sec_offset, def->map_flags);
|
||||
|
||||
mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
|
||||
mmap_sz = bpf_map_mmap_sz(map);
|
||||
map->mmaped = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
|
||||
if (map->mmaped == MAP_FAILED) {
|
||||
@ -2828,6 +2851,32 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map,
|
||||
const char *sec_name, int sec_idx,
|
||||
void *data, size_t data_sz)
|
||||
{
|
||||
const long page_sz = sysconf(_SC_PAGE_SIZE);
|
||||
size_t mmap_sz;
|
||||
|
||||
mmap_sz = bpf_map_mmap_sz(obj->arena_map);
|
||||
if (roundup(data_sz, page_sz) > mmap_sz) {
|
||||
pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n",
|
||||
sec_name, mmap_sz, data_sz);
|
||||
return -E2BIG;
|
||||
}
|
||||
|
||||
obj->arena_data = malloc(data_sz);
|
||||
if (!obj->arena_data)
|
||||
return -ENOMEM;
|
||||
memcpy(obj->arena_data, data, data_sz);
|
||||
obj->arena_data_sz = data_sz;
|
||||
|
||||
/* make bpf_map__init_value() work for ARENA maps */
|
||||
map->mmaped = obj->arena_data;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
|
||||
const char *pin_root_path)
|
||||
{
|
||||
@ -2877,6 +2926,33 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
|
||||
return err;
|
||||
}
|
||||
|
||||
for (i = 0; i < obj->nr_maps; i++) {
|
||||
struct bpf_map *map = &obj->maps[i];
|
||||
|
||||
if (map->def.type != BPF_MAP_TYPE_ARENA)
|
||||
continue;
|
||||
|
||||
if (obj->arena_map) {
|
||||
pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n",
|
||||
map->name, obj->arena_map->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
obj->arena_map = map;
|
||||
|
||||
if (obj->efile.arena_data) {
|
||||
err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx,
|
||||
obj->efile.arena_data->d_buf,
|
||||
obj->efile.arena_data->d_size);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
}
|
||||
if (obj->efile.arena_data && !obj->arena_map) {
|
||||
pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n",
|
||||
ARENA_SEC);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3756,6 +3832,9 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
|
||||
sec_desc->shdr = sh;
|
||||
sec_desc->data = data;
|
||||
obj->efile.has_st_ops = true;
|
||||
} else if (strcmp(name, ARENA_SEC) == 0) {
|
||||
obj->efile.arena_data = data;
|
||||
obj->efile.arena_data_shndx = idx;
|
||||
} else {
|
||||
pr_info("elf: skipping unrecognized data section(%d) %s\n",
|
||||
idx, name);
|
||||
@ -4385,6 +4464,15 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
|
||||
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
|
||||
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
|
||||
|
||||
/* arena data relocation */
|
||||
if (shdr_idx == obj->efile.arena_data_shndx) {
|
||||
reloc_desc->type = RELO_DATA;
|
||||
reloc_desc->insn_idx = insn_idx;
|
||||
reloc_desc->map_idx = obj->arena_map - obj->maps;
|
||||
reloc_desc->sym_off = sym->st_value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* generic map reference relocation */
|
||||
if (type == LIBBPF_MAP_UNSPEC) {
|
||||
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
|
||||
@ -4925,6 +5013,7 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
|
||||
bpf_gen__map_freeze(obj->gen_loader, map - obj->maps);
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
@ -5017,6 +5106,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
|
||||
case BPF_MAP_TYPE_SOCKHASH:
|
||||
case BPF_MAP_TYPE_QUEUE:
|
||||
case BPF_MAP_TYPE_STACK:
|
||||
case BPF_MAP_TYPE_ARENA:
|
||||
create_attr.btf_fd = 0;
|
||||
create_attr.btf_key_type_id = 0;
|
||||
create_attr.btf_value_type_id = 0;
|
||||
@ -5261,7 +5351,23 @@ retry:
|
||||
if (err < 0)
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
if (map->def.type == BPF_MAP_TYPE_ARENA) {
|
||||
map->mmaped = mmap((void *)map->map_extra, bpf_map_mmap_sz(map),
|
||||
PROT_READ | PROT_WRITE,
|
||||
map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED,
|
||||
map->fd, 0);
|
||||
if (map->mmaped == MAP_FAILED) {
|
||||
err = -errno;
|
||||
map->mmaped = NULL;
|
||||
pr_warn("map '%s': failed to mmap arena: %d\n",
|
||||
map->name, err);
|
||||
return err;
|
||||
}
|
||||
if (obj->arena_data) {
|
||||
memcpy(map->mmaped, obj->arena_data, obj->arena_data_sz);
|
||||
zfree(&obj->arena_data);
|
||||
}
|
||||
}
|
||||
if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) {
|
||||
err = init_map_in_map_slots(obj, map);
|
||||
if (err < 0)
|
||||
@ -8758,13 +8864,9 @@ static void bpf_map__destroy(struct bpf_map *map)
|
||||
zfree(&map->init_slots);
|
||||
map->init_slots_sz = 0;
|
||||
|
||||
if (map->mmaped) {
|
||||
size_t mmap_sz;
|
||||
|
||||
mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
|
||||
munmap(map->mmaped, mmap_sz);
|
||||
map->mmaped = NULL;
|
||||
}
|
||||
if (map->mmaped && map->mmaped != map->obj->arena_data)
|
||||
munmap(map->mmaped, bpf_map_mmap_sz(map));
|
||||
map->mmaped = NULL;
|
||||
|
||||
if (map->st_ops) {
|
||||
zfree(&map->st_ops->data);
|
||||
@ -8824,6 +8926,8 @@ void bpf_object__close(struct bpf_object *obj)
|
||||
if (obj->token_fd > 0)
|
||||
close(obj->token_fd);
|
||||
|
||||
zfree(&obj->arena_data);
|
||||
|
||||
free(obj);
|
||||
}
|
||||
|
||||
@ -9995,11 +10099,14 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
|
||||
return libbpf_err(-EBUSY);
|
||||
|
||||
if (map->mmaped) {
|
||||
int err;
|
||||
size_t mmap_old_sz, mmap_new_sz;
|
||||
int err;
|
||||
|
||||
mmap_old_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
|
||||
mmap_new_sz = bpf_map_mmap_sz(size, map->def.max_entries);
|
||||
if (map->def.type != BPF_MAP_TYPE_ARRAY)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
mmap_old_sz = bpf_map_mmap_sz(map);
|
||||
mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries);
|
||||
err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz);
|
||||
if (err) {
|
||||
pr_warn("map '%s': failed to resize memory-mapped region: %d\n",
|
||||
@ -10032,18 +10139,26 @@ __u32 bpf_map__btf_value_type_id(const struct bpf_map *map)
|
||||
int bpf_map__set_initial_value(struct bpf_map *map,
|
||||
const void *data, size_t size)
|
||||
{
|
||||
size_t actual_sz;
|
||||
|
||||
if (map->obj->loaded || map->reused)
|
||||
return libbpf_err(-EBUSY);
|
||||
|
||||
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
|
||||
size != map->def.value_size)
|
||||
if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG)
|
||||
return libbpf_err(-EINVAL);
|
||||
|
||||
if (map->def.type == BPF_MAP_TYPE_ARENA)
|
||||
actual_sz = map->obj->arena_data_sz;
|
||||
else
|
||||
actual_sz = map->def.value_size;
|
||||
if (size != actual_sz)
|
||||
return libbpf_err(-EINVAL);
|
||||
|
||||
memcpy(map->mmaped, data, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
|
||||
void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize)
|
||||
{
|
||||
if (bpf_map__is_struct_ops(map)) {
|
||||
if (psize)
|
||||
@ -10053,7 +10168,12 @@ void *bpf_map__initial_value(struct bpf_map *map, size_t *psize)
|
||||
|
||||
if (!map->mmaped)
|
||||
return NULL;
|
||||
*psize = map->def.value_size;
|
||||
|
||||
if (map->def.type == BPF_MAP_TYPE_ARENA)
|
||||
*psize = map->obj->arena_data_sz;
|
||||
else
|
||||
*psize = map->def.value_size;
|
||||
|
||||
return map->mmaped;
|
||||
}
|
||||
|
||||
@ -13530,7 +13650,7 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
|
||||
|
||||
for (i = 0; i < s->map_cnt; i++) {
|
||||
struct bpf_map *map = *s->maps[i].map;
|
||||
size_t mmap_sz = bpf_map_mmap_sz(map->def.value_size, map->def.max_entries);
|
||||
size_t mmap_sz = bpf_map_mmap_sz(map);
|
||||
int prot, map_fd = map->fd;
|
||||
void **mmaped = s->maps[i].mmaped;
|
||||
|
||||
@ -13542,6 +13662,11 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s)
|
||||
continue;
|
||||
}
|
||||
|
||||
if (map->def.type == BPF_MAP_TYPE_ARENA) {
|
||||
*mmaped = map->mmaped;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (map->def.map_flags & BPF_F_RDONLY_PROG)
|
||||
prot = PROT_READ;
|
||||
else
|
||||
|
@ -1014,7 +1014,7 @@ LIBBPF_API int bpf_map__set_map_extra(struct bpf_map *map, __u64 map_extra);
|
||||
|
||||
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
|
||||
const void *data, size_t size);
|
||||
LIBBPF_API void *bpf_map__initial_value(struct bpf_map *map, size_t *psize);
|
||||
LIBBPF_API void *bpf_map__initial_value(const struct bpf_map *map, size_t *psize);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map__is_internal()** tells the caller whether or not the
|
||||
|
@ -338,6 +338,13 @@ static int probe_map_create(enum bpf_map_type map_type)
|
||||
key_size = 0;
|
||||
max_entries = 1;
|
||||
break;
|
||||
case BPF_MAP_TYPE_ARENA:
|
||||
key_size = 0;
|
||||
value_size = 0;
|
||||
max_entries = 1; /* one page */
|
||||
opts.map_extra = 0; /* can mmap() at any address */
|
||||
opts.map_flags = BPF_F_MMAPABLE;
|
||||
break;
|
||||
case BPF_MAP_TYPE_HASH:
|
||||
case BPF_MAP_TYPE_ARRAY:
|
||||
case BPF_MAP_TYPE_PROG_ARRAY:
|
||||
|
@ -10,3 +10,5 @@ fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_mu
|
||||
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
|
||||
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
|
||||
missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
|
||||
verifier_arena # JIT does not support arena
|
||||
arena_htab # JIT does not support arena
|
||||
|
@ -4,3 +4,5 @@ exceptions # JIT does not support calling kfunc bpf_throw (excepti
|
||||
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
|
||||
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
|
||||
verifier_iterating_callbacks
|
||||
verifier_arena # JIT does not support arena
|
||||
arena_htab # JIT does not support arena
|
||||
|
67
tools/testing/selftests/bpf/bpf_arena_alloc.h
Normal file
67
tools/testing/selftests/bpf/bpf_arena_alloc.h
Normal file
@ -0,0 +1,67 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#pragma once
|
||||
#include "bpf_arena_common.h"
|
||||
|
||||
#ifndef __round_mask
|
||||
#define __round_mask(x, y) ((__typeof__(x))((y)-1))
|
||||
#endif
|
||||
#ifndef round_up
|
||||
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
|
||||
#endif
|
||||
|
||||
#ifdef __BPF__
|
||||
#define NR_CPUS (sizeof(struct cpumask) * 8)
|
||||
|
||||
static void __arena * __arena page_frag_cur_page[NR_CPUS];
|
||||
static int __arena page_frag_cur_offset[NR_CPUS];
|
||||
|
||||
/* Simple page_frag allocator */
|
||||
static inline void __arena* bpf_alloc(unsigned int size)
|
||||
{
|
||||
__u64 __arena *obj_cnt;
|
||||
__u32 cpu = bpf_get_smp_processor_id();
|
||||
void __arena *page = page_frag_cur_page[cpu];
|
||||
int __arena *cur_offset = &page_frag_cur_offset[cpu];
|
||||
int offset;
|
||||
|
||||
size = round_up(size, 8);
|
||||
if (size >= PAGE_SIZE - 8)
|
||||
return NULL;
|
||||
if (!page) {
|
||||
refill:
|
||||
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
|
||||
if (!page)
|
||||
return NULL;
|
||||
cast_kern(page);
|
||||
page_frag_cur_page[cpu] = page;
|
||||
*cur_offset = PAGE_SIZE - 8;
|
||||
obj_cnt = page + PAGE_SIZE - 8;
|
||||
*obj_cnt = 0;
|
||||
} else {
|
||||
cast_kern(page);
|
||||
obj_cnt = page + PAGE_SIZE - 8;
|
||||
}
|
||||
|
||||
offset = *cur_offset - size;
|
||||
if (offset < 0)
|
||||
goto refill;
|
||||
|
||||
(*obj_cnt)++;
|
||||
*cur_offset = offset;
|
||||
return page + offset;
|
||||
}
|
||||
|
||||
static inline void bpf_free(void __arena *addr)
|
||||
{
|
||||
__u64 __arena *obj_cnt;
|
||||
|
||||
addr = (void __arena *)(((long)addr) & ~(PAGE_SIZE - 1));
|
||||
obj_cnt = addr + PAGE_SIZE - 8;
|
||||
if (--(*obj_cnt) == 0)
|
||||
bpf_arena_free_pages(&arena, addr, 1);
|
||||
}
|
||||
#else
|
||||
static inline void __arena* bpf_alloc(unsigned int size) { return NULL; }
|
||||
static inline void bpf_free(void __arena *addr) {}
|
||||
#endif
|
70
tools/testing/selftests/bpf/bpf_arena_common.h
Normal file
70
tools/testing/selftests/bpf/bpf_arena_common.h
Normal file
@ -0,0 +1,70 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#pragma once
|
||||
|
||||
#ifndef WRITE_ONCE
|
||||
#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
|
||||
#endif
|
||||
|
||||
#ifndef NUMA_NO_NODE
|
||||
#define NUMA_NO_NODE (-1)
|
||||
#endif
|
||||
|
||||
#ifndef arena_container_of
|
||||
#define arena_container_of(ptr, type, member) \
|
||||
({ \
|
||||
void __arena *__mptr = (void __arena *)(ptr); \
|
||||
((type *)(__mptr - offsetof(type, member))); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifdef __BPF__ /* when compiled as bpf program */
|
||||
|
||||
#ifndef PAGE_SIZE
|
||||
#define PAGE_SIZE __PAGE_SIZE
|
||||
/*
|
||||
* for older kernels try sizeof(struct genradix_node)
|
||||
* or flexible:
|
||||
* static inline long __bpf_page_size(void) {
|
||||
* return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
|
||||
* }
|
||||
* but generated code is not great.
|
||||
*/
|
||||
#endif
|
||||
|
||||
#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
|
||||
#define __arena __attribute__((address_space(1)))
|
||||
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
|
||||
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
|
||||
#else
|
||||
#define __arena
|
||||
#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
|
||||
#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
|
||||
#endif
|
||||
|
||||
void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
|
||||
int node_id, __u64 flags) __ksym __weak;
|
||||
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
|
||||
|
||||
#else /* when compiled as user space code */
|
||||
|
||||
#define __arena
|
||||
#define __arg_arena
|
||||
#define cast_kern(ptr) /* nop for user space */
|
||||
#define cast_user(ptr) /* nop for user space */
|
||||
__weak char arena[1];
|
||||
|
||||
#ifndef offsetof
|
||||
#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
|
||||
#endif
|
||||
|
||||
static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
|
||||
int node_id, __u64 flags)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
100
tools/testing/selftests/bpf/bpf_arena_htab.h
Normal file
100
tools/testing/selftests/bpf/bpf_arena_htab.h
Normal file
@ -0,0 +1,100 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#pragma once
|
||||
#include <errno.h>
|
||||
#include "bpf_arena_alloc.h"
|
||||
#include "bpf_arena_list.h"
|
||||
|
||||
struct htab_bucket {
|
||||
struct arena_list_head head;
|
||||
};
|
||||
typedef struct htab_bucket __arena htab_bucket_t;
|
||||
|
||||
struct htab {
|
||||
htab_bucket_t *buckets;
|
||||
int n_buckets;
|
||||
};
|
||||
typedef struct htab __arena htab_t;
|
||||
|
||||
static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash)
|
||||
{
|
||||
htab_bucket_t *b = htab->buckets;
|
||||
|
||||
cast_kern(b);
|
||||
return &b[hash & (htab->n_buckets - 1)];
|
||||
}
|
||||
|
||||
static inline arena_list_head_t *select_bucket(htab_t *htab, __u32 hash)
|
||||
{
|
||||
return &__select_bucket(htab, hash)->head;
|
||||
}
|
||||
|
||||
struct hashtab_elem {
|
||||
int hash;
|
||||
int key;
|
||||
int value;
|
||||
struct arena_list_node hash_node;
|
||||
};
|
||||
typedef struct hashtab_elem __arena hashtab_elem_t;
|
||||
|
||||
static hashtab_elem_t *lookup_elem_raw(arena_list_head_t *head, __u32 hash, int key)
|
||||
{
|
||||
hashtab_elem_t *l;
|
||||
|
||||
list_for_each_entry(l, head, hash_node)
|
||||
if (l->hash == hash && l->key == key)
|
||||
return l;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int htab_hash(int key)
|
||||
{
|
||||
return key;
|
||||
}
|
||||
|
||||
__weak int htab_lookup_elem(htab_t *htab __arg_arena, int key)
|
||||
{
|
||||
hashtab_elem_t *l_old;
|
||||
arena_list_head_t *head;
|
||||
|
||||
cast_kern(htab);
|
||||
head = select_bucket(htab, key);
|
||||
l_old = lookup_elem_raw(head, htab_hash(key), key);
|
||||
if (l_old)
|
||||
return l_old->value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
__weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value)
|
||||
{
|
||||
hashtab_elem_t *l_new = NULL, *l_old;
|
||||
arena_list_head_t *head;
|
||||
|
||||
cast_kern(htab);
|
||||
head = select_bucket(htab, key);
|
||||
l_old = lookup_elem_raw(head, htab_hash(key), key);
|
||||
|
||||
l_new = bpf_alloc(sizeof(*l_new));
|
||||
if (!l_new)
|
||||
return -ENOMEM;
|
||||
l_new->key = key;
|
||||
l_new->hash = htab_hash(key);
|
||||
l_new->value = value;
|
||||
|
||||
list_add_head(&l_new->hash_node, head);
|
||||
if (l_old) {
|
||||
list_del(&l_old->hash_node);
|
||||
bpf_free(l_old);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void htab_init(htab_t *htab)
|
||||
{
|
||||
void __arena *buckets = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
|
||||
|
||||
cast_user(buckets);
|
||||
htab->buckets = buckets;
|
||||
htab->n_buckets = 2 * PAGE_SIZE / sizeof(struct htab_bucket);
|
||||
}
|
92
tools/testing/selftests/bpf/bpf_arena_list.h
Normal file
92
tools/testing/selftests/bpf/bpf_arena_list.h
Normal file
@ -0,0 +1,92 @@
|
||||
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#pragma once
|
||||
#include "bpf_arena_common.h"
|
||||
|
||||
struct arena_list_node;
|
||||
|
||||
typedef struct arena_list_node __arena arena_list_node_t;
|
||||
|
||||
struct arena_list_node {
|
||||
arena_list_node_t *next;
|
||||
arena_list_node_t * __arena *pprev;
|
||||
};
|
||||
|
||||
struct arena_list_head {
|
||||
struct arena_list_node __arena *first;
|
||||
};
|
||||
typedef struct arena_list_head __arena arena_list_head_t;
|
||||
|
||||
#define list_entry(ptr, type, member) arena_container_of(ptr, type, member)
|
||||
|
||||
#define list_entry_safe(ptr, type, member) \
|
||||
({ typeof(*ptr) * ___ptr = (ptr); \
|
||||
___ptr ? ({ cast_kern(___ptr); list_entry(___ptr, type, member); }) : NULL; \
|
||||
})
|
||||
|
||||
#ifndef __BPF__
|
||||
static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { return NULL; }
|
||||
static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {}
|
||||
static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
|
||||
#define cond_break ({})
|
||||
#endif
|
||||
|
||||
/* Safely walk link list elements. Deletion of elements is allowed. */
|
||||
#define list_for_each_entry(pos, head, member) \
|
||||
for (void * ___tmp = (pos = list_entry_safe((head)->first, \
|
||||
typeof(*(pos)), member), \
|
||||
(void *)0); \
|
||||
pos && ({ ___tmp = (void *)pos->member.next; 1; }); \
|
||||
cond_break, \
|
||||
pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))
|
||||
|
||||
static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
|
||||
{
|
||||
arena_list_node_t *first = h->first, * __arena *tmp;
|
||||
|
||||
cast_user(first);
|
||||
cast_kern(n);
|
||||
WRITE_ONCE(n->next, first);
|
||||
cast_kern(first);
|
||||
if (first) {
|
||||
tmp = &n->next;
|
||||
cast_user(tmp);
|
||||
WRITE_ONCE(first->pprev, tmp);
|
||||
}
|
||||
cast_user(n);
|
||||
WRITE_ONCE(h->first, n);
|
||||
|
||||
tmp = &h->first;
|
||||
cast_user(tmp);
|
||||
cast_kern(n);
|
||||
WRITE_ONCE(n->pprev, tmp);
|
||||
}
|
||||
|
||||
static inline void __list_del(arena_list_node_t *n)
|
||||
{
|
||||
arena_list_node_t *next = n->next, *tmp;
|
||||
arena_list_node_t * __arena *pprev = n->pprev;
|
||||
|
||||
cast_user(next);
|
||||
cast_kern(pprev);
|
||||
tmp = *pprev;
|
||||
cast_kern(tmp);
|
||||
WRITE_ONCE(tmp, next);
|
||||
if (next) {
|
||||
cast_user(pprev);
|
||||
cast_kern(next);
|
||||
WRITE_ONCE(next->pprev, pprev);
|
||||
}
|
||||
}
|
||||
|
||||
#define POISON_POINTER_DELTA 0
|
||||
|
||||
#define LIST_POISON1 ((void __arena *) 0x100 + POISON_POINTER_DELTA)
|
||||
#define LIST_POISON2 ((void __arena *) 0x122 + POISON_POINTER_DELTA)
|
||||
|
||||
static inline void list_del(arena_list_node_t *n)
|
||||
{
|
||||
__list_del(n);
|
||||
n->next = LIST_POISON1;
|
||||
n->pprev = LIST_POISON2;
|
||||
}
|
@ -343,6 +343,49 @@ l_true: \
|
||||
asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
|
||||
#endif
|
||||
|
||||
/* emit instruction:
|
||||
* rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
|
||||
*/
|
||||
#ifndef bpf_addr_space_cast
|
||||
#define bpf_addr_space_cast(var, dst_as, src_as)\
|
||||
asm volatile(".byte 0xBF; \
|
||||
.ifc %[reg], r0; \
|
||||
.byte 0x00; \
|
||||
.endif; \
|
||||
.ifc %[reg], r1; \
|
||||
.byte 0x11; \
|
||||
.endif; \
|
||||
.ifc %[reg], r2; \
|
||||
.byte 0x22; \
|
||||
.endif; \
|
||||
.ifc %[reg], r3; \
|
||||
.byte 0x33; \
|
||||
.endif; \
|
||||
.ifc %[reg], r4; \
|
||||
.byte 0x44; \
|
||||
.endif; \
|
||||
.ifc %[reg], r5; \
|
||||
.byte 0x55; \
|
||||
.endif; \
|
||||
.ifc %[reg], r6; \
|
||||
.byte 0x66; \
|
||||
.endif; \
|
||||
.ifc %[reg], r7; \
|
||||
.byte 0x77; \
|
||||
.endif; \
|
||||
.ifc %[reg], r8; \
|
||||
.byte 0x88; \
|
||||
.endif; \
|
||||
.ifc %[reg], r9; \
|
||||
.byte 0x99; \
|
||||
.endif; \
|
||||
.short %[off]; \
|
||||
.long %[as]" \
|
||||
: [reg]"+r"(var) \
|
||||
: [off]"i"(BPF_ADDR_SPACE_CAST) \
|
||||
, [as]"i"((dst_as << 16) | src_as));
|
||||
#endif
|
||||
|
||||
/* Description
|
||||
* Assert that a conditional expression is true.
|
||||
* Returns
|
||||
|
88
tools/testing/selftests/bpf/prog_tests/arena_htab.c
Normal file
88
tools/testing/selftests/bpf/prog_tests/arena_htab.c
Normal file
@ -0,0 +1,88 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#include <test_progs.h>
|
||||
#include <sys/mman.h>
|
||||
#include <network_helpers.h>
|
||||
|
||||
#include "arena_htab_asm.skel.h"
|
||||
#include "arena_htab.skel.h"
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#include "bpf_arena_htab.h"
|
||||
|
||||
static void test_arena_htab_common(struct htab *htab)
|
||||
{
|
||||
int i;
|
||||
|
||||
printf("htab %p buckets %p n_buckets %d\n", htab, htab->buckets, htab->n_buckets);
|
||||
ASSERT_OK_PTR(htab->buckets, "htab->buckets shouldn't be NULL");
|
||||
for (i = 0; htab->buckets && i < 16; i += 4) {
|
||||
/*
|
||||
* Walk htab buckets and link lists since all pointers are correct,
|
||||
* though they were written by bpf program.
|
||||
*/
|
||||
int val = htab_lookup_elem(htab, i);
|
||||
|
||||
ASSERT_EQ(i, val, "key == value");
|
||||
}
|
||||
}
|
||||
|
||||
static void test_arena_htab_llvm(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts);
|
||||
struct arena_htab *skel;
|
||||
struct htab *htab;
|
||||
size_t arena_sz;
|
||||
void *area;
|
||||
int ret;
|
||||
|
||||
skel = arena_htab__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "arena_htab__open_and_load"))
|
||||
return;
|
||||
|
||||
area = bpf_map__initial_value(skel->maps.arena, &arena_sz);
|
||||
/* fault-in a page with pgoff == 0 as sanity check */
|
||||
*(volatile int *)area = 0x55aa;
|
||||
|
||||
/* bpf prog will allocate more pages */
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_llvm), &opts);
|
||||
ASSERT_OK(ret, "ret");
|
||||
ASSERT_OK(opts.retval, "retval");
|
||||
if (skel->bss->skip) {
|
||||
printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
|
||||
test__skip();
|
||||
goto out;
|
||||
}
|
||||
htab = skel->bss->htab_for_user;
|
||||
test_arena_htab_common(htab);
|
||||
out:
|
||||
arena_htab__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_arena_htab_asm(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts);
|
||||
struct arena_htab_asm *skel;
|
||||
struct htab *htab;
|
||||
int ret;
|
||||
|
||||
skel = arena_htab_asm__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "arena_htab_asm__open_and_load"))
|
||||
return;
|
||||
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_asm), &opts);
|
||||
ASSERT_OK(ret, "ret");
|
||||
ASSERT_OK(opts.retval, "retval");
|
||||
htab = skel->bss->htab_for_user;
|
||||
test_arena_htab_common(htab);
|
||||
arena_htab_asm__destroy(skel);
|
||||
}
|
||||
|
||||
void test_arena_htab(void)
|
||||
{
|
||||
if (test__start_subtest("arena_htab_llvm"))
|
||||
test_arena_htab_llvm();
|
||||
if (test__start_subtest("arena_htab_asm"))
|
||||
test_arena_htab_asm();
|
||||
}
|
68
tools/testing/selftests/bpf/prog_tests/arena_list.c
Normal file
68
tools/testing/selftests/bpf/prog_tests/arena_list.c
Normal file
@ -0,0 +1,68 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#include <test_progs.h>
|
||||
#include <sys/mman.h>
|
||||
#include <network_helpers.h>
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
|
||||
#include "bpf_arena_list.h"
|
||||
#include "arena_list.skel.h"
|
||||
|
||||
struct elem {
|
||||
struct arena_list_node node;
|
||||
__u64 value;
|
||||
};
|
||||
|
||||
static int list_sum(struct arena_list_head *head)
|
||||
{
|
||||
struct elem __arena *n;
|
||||
int sum = 0;
|
||||
|
||||
list_for_each_entry(n, head, node)
|
||||
sum += n->value;
|
||||
return sum;
|
||||
}
|
||||
|
||||
static void test_arena_list_add_del(int cnt)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts);
|
||||
struct arena_list *skel;
|
||||
int expected_sum = (u64)cnt * (cnt - 1) / 2;
|
||||
int ret, sum;
|
||||
|
||||
skel = arena_list__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load"))
|
||||
return;
|
||||
|
||||
skel->bss->cnt = cnt;
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts);
|
||||
ASSERT_OK(ret, "ret_add");
|
||||
ASSERT_OK(opts.retval, "retval");
|
||||
if (skel->bss->skip) {
|
||||
printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
|
||||
test__skip();
|
||||
goto out;
|
||||
}
|
||||
sum = list_sum(skel->bss->list_head);
|
||||
ASSERT_EQ(sum, expected_sum, "sum of elems");
|
||||
ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
|
||||
ASSERT_EQ(skel->arena->test_val, cnt + 1, "num of elems");
|
||||
|
||||
ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_del), &opts);
|
||||
ASSERT_OK(ret, "ret_del");
|
||||
sum = list_sum(skel->bss->list_head);
|
||||
ASSERT_EQ(sum, 0, "sum of list elems after del");
|
||||
ASSERT_EQ(skel->bss->list_sum, expected_sum, "sum of list elems computed by prog");
|
||||
ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
|
||||
out:
|
||||
arena_list__destroy(skel);
|
||||
}
|
||||
|
||||
void test_arena_list(void)
|
||||
{
|
||||
if (test__start_subtest("arena_list_1"))
|
||||
test_arena_list_add_del(1);
|
||||
if (test__start_subtest("arena_list_1000"))
|
||||
test_arena_list_add_del(1000);
|
||||
}
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include "cap_helpers.h"
|
||||
#include "verifier_and.skel.h"
|
||||
#include "verifier_arena.skel.h"
|
||||
#include "verifier_array_access.skel.h"
|
||||
#include "verifier_basic_stack.skel.h"
|
||||
#include "verifier_bitfield_write.skel.h"
|
||||
@ -118,6 +119,7 @@ static void run_tests_aux(const char *skel_name,
|
||||
#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
|
||||
|
||||
void test_verifier_and(void) { RUN(verifier_and); }
|
||||
void test_verifier_arena(void) { RUN(verifier_arena); }
|
||||
void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
|
||||
void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
|
||||
void test_verifier_bounds(void) { RUN(verifier_bounds); }
|
||||
|
48
tools/testing/selftests/bpf/progs/arena_htab.c
Normal file
48
tools/testing/selftests/bpf/progs/arena_htab.c
Normal file
@ -0,0 +1,48 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_experimental.h"
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARENA);
|
||||
__uint(map_flags, BPF_F_MMAPABLE);
|
||||
__uint(max_entries, 100); /* number of pages */
|
||||
} arena SEC(".maps");
|
||||
|
||||
#include "bpf_arena_htab.h"
|
||||
|
||||
void __arena *htab_for_user;
|
||||
bool skip = false;
|
||||
|
||||
int zero = 0;
|
||||
|
||||
SEC("syscall")
|
||||
int arena_htab_llvm(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
|
||||
struct htab __arena *htab;
|
||||
__u64 i;
|
||||
|
||||
htab = bpf_alloc(sizeof(*htab));
|
||||
cast_kern(htab);
|
||||
htab_init(htab);
|
||||
|
||||
/* first run. No old elems in the table */
|
||||
for (i = zero; i < 1000; i++)
|
||||
htab_update_elem(htab, i, i);
|
||||
|
||||
/* should replace all elems with new ones */
|
||||
for (i = zero; i < 1000; i++)
|
||||
htab_update_elem(htab, i, i);
|
||||
cast_user(htab);
|
||||
htab_for_user = htab;
|
||||
#else
|
||||
skip = true;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
5
tools/testing/selftests/bpf/progs/arena_htab_asm.c
Normal file
5
tools/testing/selftests/bpf/progs/arena_htab_asm.c
Normal file
@ -0,0 +1,5 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#define BPF_ARENA_FORCE_ASM
|
||||
#define arena_htab_llvm arena_htab_asm
|
||||
#include "arena_htab.c"
|
87
tools/testing/selftests/bpf/progs/arena_list.c
Normal file
87
tools/testing/selftests/bpf/progs/arena_list.c
Normal file
@ -0,0 +1,87 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_experimental.h"
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARENA);
|
||||
__uint(map_flags, BPF_F_MMAPABLE);
|
||||
__uint(max_entries, 100); /* number of pages */
|
||||
#ifdef __TARGET_ARCH_arm64
|
||||
__ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
|
||||
#else
|
||||
__ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
|
||||
#endif
|
||||
} arena SEC(".maps");
|
||||
|
||||
#include "bpf_arena_alloc.h"
|
||||
#include "bpf_arena_list.h"
|
||||
|
||||
struct elem {
|
||||
struct arena_list_node node;
|
||||
__u64 value;
|
||||
};
|
||||
|
||||
struct arena_list_head __arena *list_head;
|
||||
int list_sum;
|
||||
int cnt;
|
||||
bool skip = false;
|
||||
|
||||
#ifdef __BPF_FEATURE_ARENA_CAST
|
||||
long __arena arena_sum;
|
||||
int __arena test_val = 1;
|
||||
struct arena_list_head __arena global_head;
|
||||
#else
|
||||
long arena_sum SEC(".arena.1");
|
||||
int test_val SEC(".arena.1");
|
||||
#endif
|
||||
|
||||
int zero;
|
||||
|
||||
SEC("syscall")
|
||||
int arena_list_add(void *ctx)
|
||||
{
|
||||
#ifdef __BPF_FEATURE_ARENA_CAST
|
||||
__u64 i;
|
||||
|
||||
list_head = &global_head;
|
||||
|
||||
for (i = zero; i < cnt; cond_break, i++) {
|
||||
struct elem __arena *n = bpf_alloc(sizeof(*n));
|
||||
|
||||
test_val++;
|
||||
n->value = i;
|
||||
arena_sum += i;
|
||||
list_add_head(&n->node, list_head);
|
||||
}
|
||||
#else
|
||||
skip = true;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("syscall")
|
||||
int arena_list_del(void *ctx)
|
||||
{
|
||||
#ifdef __BPF_FEATURE_ARENA_CAST
|
||||
struct elem __arena *n;
|
||||
int sum = 0;
|
||||
|
||||
arena_sum = 0;
|
||||
list_for_each_entry(n, list_head, node) {
|
||||
sum += n->value;
|
||||
arena_sum += n->value;
|
||||
list_del(&n->node);
|
||||
bpf_free(n);
|
||||
}
|
||||
list_sum = sum;
|
||||
#else
|
||||
skip = true;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
146
tools/testing/selftests/bpf/progs/verifier_arena.c
Normal file
146
tools/testing/selftests/bpf/progs/verifier_arena.c
Normal file
@ -0,0 +1,146 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bpf_misc.h"
|
||||
#include "bpf_experimental.h"
|
||||
#include "bpf_arena_common.h"
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARENA);
|
||||
__uint(map_flags, BPF_F_MMAPABLE);
|
||||
__uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
|
||||
__ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
|
||||
} arena SEC(".maps");
|
||||
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int basic_alloc1(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ARENA_CAST)
|
||||
volatile int __arena *page1, *page2, *no_page, *page3;
|
||||
|
||||
page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
|
||||
if (!page1)
|
||||
return 1;
|
||||
*page1 = 1;
|
||||
page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
|
||||
if (!page2)
|
||||
return 2;
|
||||
*page2 = 2;
|
||||
no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
|
||||
if (no_page)
|
||||
return 3;
|
||||
if (*page1 != 1)
|
||||
return 4;
|
||||
if (*page2 != 2)
|
||||
return 5;
|
||||
bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
|
||||
if (*page1 != 1)
|
||||
return 6;
|
||||
if (*page2 != 0) /* use-after-free should return 0 */
|
||||
return 7;
|
||||
page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
|
||||
if (!page3)
|
||||
return 8;
|
||||
*page3 = 3;
|
||||
if (page2 != page3)
|
||||
return 9;
|
||||
if (*page1 != 1)
|
||||
return 10;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int basic_alloc2(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ARENA_CAST)
|
||||
volatile char __arena *page1, *page2, *page3, *page4;
|
||||
|
||||
page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
|
||||
if (!page1)
|
||||
return 1;
|
||||
page2 = page1 + __PAGE_SIZE;
|
||||
page3 = page1 + __PAGE_SIZE * 2;
|
||||
page4 = page1 - __PAGE_SIZE;
|
||||
*page1 = 1;
|
||||
*page2 = 2;
|
||||
*page3 = 3;
|
||||
*page4 = 4;
|
||||
if (*page1 != 1)
|
||||
return 1;
|
||||
if (*page2 != 2)
|
||||
return 2;
|
||||
if (*page3 != 0)
|
||||
return 3;
|
||||
if (*page4 != 0)
|
||||
return 4;
|
||||
bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
|
||||
if (*page1 != 0)
|
||||
return 5;
|
||||
if (*page2 != 0)
|
||||
return 6;
|
||||
if (*page3 != 0)
|
||||
return 7;
|
||||
if (*page4 != 0)
|
||||
return 8;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct bpf_arena___l {
|
||||
struct bpf_map map;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
SEC("syscall")
|
||||
__success __retval(0) __log_level(2)
|
||||
int basic_alloc3(void *ctx)
|
||||
{
|
||||
struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena;
|
||||
volatile char __arena *pages;
|
||||
|
||||
pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0);
|
||||
if (!pages)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("iter.s/bpf_map")
|
||||
__success __log_level(2)
|
||||
int iter_maps1(struct bpf_iter__bpf_map *ctx)
|
||||
{
|
||||
struct bpf_map *map = ctx->map;
|
||||
|
||||
if (!map)
|
||||
return 0;
|
||||
bpf_arena_alloc_pages(map, NULL, map->max_entries, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("iter.s/bpf_map")
|
||||
__failure __msg("expected pointer to STRUCT bpf_map")
|
||||
int iter_maps2(struct bpf_iter__bpf_map *ctx)
|
||||
{
|
||||
struct seq_file *seq = ctx->meta->seq;
|
||||
|
||||
bpf_arena_alloc_pages((void *)seq, NULL, 1, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("iter.s/bpf_map")
|
||||
__failure __msg("untrusted_ptr_bpf_map")
|
||||
int iter_maps3(struct bpf_iter__bpf_map *ctx)
|
||||
{
|
||||
struct bpf_map *map = ctx->map;
|
||||
|
||||
if (!map)
|
||||
return 0;
|
||||
bpf_arena_alloc_pages(map->inner_map_meta, NULL, map->max_entries, 0, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
@ -501,7 +501,7 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
|
||||
}
|
||||
}
|
||||
|
||||
static int do_prog_test_run(int fd_prog, int *retval)
|
||||
static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
|
||||
{
|
||||
__u8 tmp_out[TEST_DATA_LEN << 2] = {};
|
||||
__u8 tmp_in[TEST_DATA_LEN] = {};
|
||||
@ -514,6 +514,10 @@ static int do_prog_test_run(int fd_prog, int *retval)
|
||||
.repeat = 1,
|
||||
);
|
||||
|
||||
if (empty_opts) {
|
||||
memset(&topts, 0, sizeof(struct bpf_test_run_opts));
|
||||
topts.sz = sizeof(struct bpf_test_run_opts);
|
||||
}
|
||||
err = bpf_prog_test_run_opts(fd_prog, &topts);
|
||||
saved_errno = errno;
|
||||
|
||||
@ -649,7 +653,8 @@ void run_subtest(struct test_loader *tester,
|
||||
}
|
||||
}
|
||||
|
||||
do_prog_test_run(bpf_program__fd(tprog), &retval);
|
||||
do_prog_test_run(bpf_program__fd(tprog), &retval,
|
||||
bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
|
||||
if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
|
||||
PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
|
||||
goto tobj_cleanup;
|
||||
|
Loading…
Reference in New Issue
Block a user