From 511bb0085c6fe48353c35cd3d25f4f8720579a6d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 11:28:45 -0700 Subject: [PATCH 01/52] libbpf: Update BTF reloc support to latest Clang format BTF offset reloc was generalized in recent Clang into field relocation, capturing extra u32 field, specifying what aspect of captured field needs to be relocated. This changes .BTF.ext's record size for this relocation from 12 bytes to 16 bytes. Given these format changes happened in Clang before official released version, it's ok to not support outdated 12-byte record size w/o breaking ABI. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191015182849.3922287-2-andriin@fb.com --- tools/lib/bpf/btf.c | 16 ++++++++-------- tools/lib/bpf/btf.h | 4 ++-- tools/lib/bpf/libbpf.c | 24 ++++++++++++------------ tools/lib/bpf/libbpf_internal.h | 25 ++++++++++++++++++------- 4 files changed, 40 insertions(+), 29 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1aa189a9112a..3eae8d1addfa 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -888,14 +888,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext) return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_setup_offset_reloc(struct btf_ext *btf_ext) +static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext) { struct btf_ext_sec_setup_param param = { - .off = btf_ext->hdr->offset_reloc_off, - .len = btf_ext->hdr->offset_reloc_len, - .min_rec_size = sizeof(struct bpf_offset_reloc), - .ext_info = &btf_ext->offset_reloc_info, - .desc = "offset_reloc", + .off = btf_ext->hdr->field_reloc_off, + .len = btf_ext->hdr->field_reloc_len, + .min_rec_size = sizeof(struct bpf_field_reloc), + .ext_info = &btf_ext->field_reloc_info, + .desc = "field_reloc", }; return btf_ext_setup_info(btf_ext, ¶m); @@ -975,9 +975,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size) goto done; if (btf_ext->hdr->hdr_len < - offsetofend(struct btf_ext_header, offset_reloc_len)) + offsetofend(struct btf_ext_header, field_reloc_len)) goto done; - err = btf_ext_setup_offset_reloc(btf_ext); + err = btf_ext_setup_field_reloc(btf_ext); if (err) goto done; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 9cb44b4fbf60..b18994116a44 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -60,8 +60,8 @@ struct btf_ext_header { __u32 line_info_len; /* optional part of .BTF.ext header */ - __u32 offset_reloc_off; - __u32 offset_reloc_len; + __u32 field_reloc_off; + __u32 field_reloc_len; }; LIBBPF_API void btf__free(struct btf *btf); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a02cdedc4e3f..d6c7699de405 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2326,7 +2326,7 @@ static bool str_is_empty(const char *s) } /* - * Turn bpf_offset_reloc into a low- and high-level spec representation, + * Turn bpf_field_reloc into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting * field offset (in bytes), specified by accessor string. Low-level spec * captures every single level of nestedness, including traversing anonymous @@ -2977,7 +2977,7 @@ static void *u32_as_hash_key(__u32 x) * types should be compatible (see bpf_core_fields_are_compat for details). * 3. It is supported and expected that there might be multiple flavors * matching the spec. As long as all the specs resolve to the same set of - * offsets across all candidates, there is not error. If there is any + * offsets across all candidates, there is no error. If there is any * ambiguity, CO-RE relocation will fail. This is necessary to accomodate * imprefection of BTF deduplication, which can cause slight duplication of * the same BTF type, if some directly or indirectly referenced (by @@ -2992,12 +2992,12 @@ static void *u32_as_hash_key(__u32 x) * CPU-wise compared to prebuilding a map from all local type names to * a list of candidate type names. It's also sped up by caching resolved * list of matching candidates per each local "root" type ID, that has at - * least one bpf_offset_reloc associated with it. This list is shared + * least one bpf_field_reloc associated with it. This list is shared * between multiple relocations for the same type ID and is updated as some * of the candidates are pruned due to structural incompatibility. */ -static int bpf_core_reloc_offset(struct bpf_program *prog, - const struct bpf_offset_reloc *relo, +static int bpf_core_reloc_field(struct bpf_program *prog, + const struct bpf_field_reloc *relo, int relo_idx, const struct btf *local_btf, const struct btf *targ_btf, @@ -3106,10 +3106,10 @@ static int bpf_core_reloc_offset(struct bpf_program *prog, } static int -bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) +bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) { const struct btf_ext_info_sec *sec; - const struct bpf_offset_reloc *rec; + const struct bpf_field_reloc *rec; const struct btf_ext_info *seg; struct hashmap_entry *entry; struct hashmap *cand_cache = NULL; @@ -3134,7 +3134,7 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) goto out; } - seg = &obj->btf_ext->offset_reloc_info; + seg = &obj->btf_ext->field_reloc_info; for_each_btf_ext_sec(seg, sec) { sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off); if (str_is_empty(sec_name)) { @@ -3153,8 +3153,8 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) sec_name, sec->num_info); for_each_btf_ext_rec(seg, sec, i, rec) { - err = bpf_core_reloc_offset(prog, rec, i, obj->btf, - targ_btf, cand_cache); + err = bpf_core_reloc_field(prog, rec, i, obj->btf, + targ_btf, cand_cache); if (err) { pr_warning("prog '%s': relo #%d: failed to relocate: %d\n", sec_name, i, err); @@ -3179,8 +3179,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) { int err = 0; - if (obj->btf_ext->offset_reloc_info.len) - err = bpf_core_reloc_offsets(obj, targ_btf_path); + if (obj->btf_ext->field_reloc_info.len) + err = bpf_core_reloc_fields(obj, targ_btf_path); return err; } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index f51444fc7eb7..5bfe85d4f8aa 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -110,7 +110,7 @@ struct btf_ext { }; struct btf_ext_info func_info; struct btf_ext_info line_info; - struct btf_ext_info offset_reloc_info; + struct btf_ext_info field_reloc_info; __u32 data_size; }; @@ -135,13 +135,23 @@ struct bpf_line_info_min { __u32 line_col; }; -/* The minimum bpf_offset_reloc checked by the loader +/* bpf_field_info_kind encodes which aspect of captured field has to be + * adjusted by relocations. Currently supported values are: + * - BPF_FIELD_BYTE_OFFSET: field offset (in bytes); + * - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise); + */ +enum bpf_field_info_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ +}; + +/* The minimum bpf_field_reloc checked by the loader * - * Offset relocation captures the following data: + * Field relocation captures the following data: * - insn_off - instruction offset (in bytes) within a BPF program that needs - * its insn->imm field to be relocated with actual offset; + * its insn->imm field to be relocated with actual field info; * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - * offset; + * field; * - access_str_off - offset into corresponding .BTF string section. String * itself encodes an accessed field using a sequence of field and array * indicies, separated by colon (:). It's conceptually very close to LLVM's @@ -172,15 +182,16 @@ struct bpf_line_info_min { * bpf_probe_read(&dst, sizeof(dst), * __builtin_preserve_access_index(&src->a.b.c)); * - * In this case Clang will emit offset relocation recording necessary data to + * In this case Clang will emit field relocation recording necessary data to * be able to find offset of embedded `a.b.c` field within `src` struct. * * [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction */ -struct bpf_offset_reloc { +struct bpf_field_reloc { __u32 insn_off; __u32 type_id; __u32 access_str_off; + enum bpf_field_info_kind kind; }; #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ From 291ee02b5e407eb1eb99c9eeaa968ef8a0c16949 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 11:28:46 -0700 Subject: [PATCH 02/52] libbpf: Refactor bpf_object__open APIs to use common opts Refactor all the various bpf_object__open variations to ultimately specify common bpf_object_open_opts struct. This makes it easy to keep extending this common struct w/ extra parameters without having to update all the legacy APIs. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191015182849.3922287-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 71 +++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 36 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d6c7699de405..db3308b91806 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1322,9 +1322,9 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) return 0; } -static int bpf_object__init_maps(struct bpf_object *obj, int flags) +static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps) { - bool strict = !(flags & MAPS_RELAX_COMPAT); + bool strict = !relaxed_maps; int err; err = bpf_object__init_user_maps(obj, strict); @@ -1521,7 +1521,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } -static int bpf_object__elf_collect(struct bpf_object *obj, int flags) +static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1652,7 +1652,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) } err = bpf_object__init_btf(obj, btf_data, btf_ext_data); if (!err) - err = bpf_object__init_maps(obj, flags); + err = bpf_object__init_maps(obj, relaxed_maps); if (!err) err = bpf_object__sanitize_and_load_btf(obj); if (!err) @@ -3554,24 +3554,45 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - const char *obj_name, int flags) + struct bpf_object_open_opts *opts) { struct bpf_object *obj; + const char *obj_name; + char tmp_name[64]; + bool relaxed_maps; int err; if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warning("failed to init libelf for %s\n", path); + pr_warning("failed to init libelf for %s\n", + path ? : "(mem buf)"); return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } + if (!OPTS_VALID(opts, bpf_object_open_opts)) + return ERR_PTR(-EINVAL); + + obj_name = OPTS_GET(opts, object_name, path); + if (obj_buf) { + if (!obj_name) { + snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", + (unsigned long)obj_buf, + (unsigned long)obj_buf_sz); + obj_name = tmp_name; + } + path = obj_name; + pr_debug("loading object '%s' from buffer\n", obj_name); + } + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; + relaxed_maps = OPTS_GET(opts, relaxed_maps, false); + CHECK_ERR(bpf_object__elf_init(obj), err, out); CHECK_ERR(bpf_object__check_endianness(obj), err, out); CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); + CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); bpf_object__elf_finish(obj); @@ -3584,13 +3605,16 @@ out: static struct bpf_object * __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) { + LIBBPF_OPTS(bpf_object_open_opts, opts, + .relaxed_maps = flags & MAPS_RELAX_COMPAT, + ); + /* param validation */ if (!attr->file) return NULL; pr_debug("loading %s\n", attr->file); - - return __bpf_object__open(attr->file, NULL, 0, NULL, flags); + return __bpf_object__open(attr->file, NULL, 0, &opts); } struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -3611,47 +3635,22 @@ struct bpf_object *bpf_object__open(const char *path) struct bpf_object * bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) { - const char *obj_name; - bool relaxed_maps; - - if (!OPTS_VALID(opts, bpf_object_open_opts)) - return ERR_PTR(-EINVAL); if (!path) return ERR_PTR(-EINVAL); pr_debug("loading %s\n", path); - obj_name = OPTS_GET(opts, object_name, path); - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); - return __bpf_object__open(path, NULL, 0, obj_name, - relaxed_maps ? MAPS_RELAX_COMPAT : 0); + return __bpf_object__open(path, NULL, 0, opts); } struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, struct bpf_object_open_opts *opts) { - char tmp_name[64]; - const char *obj_name; - bool relaxed_maps; - - if (!OPTS_VALID(opts, bpf_object_open_opts)) - return ERR_PTR(-EINVAL); if (!obj_buf || obj_buf_sz == 0) return ERR_PTR(-EINVAL); - obj_name = OPTS_GET(opts, object_name, NULL); - if (!obj_name) { - snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", - (unsigned long)obj_buf, - (unsigned long)obj_buf_sz); - obj_name = tmp_name; - } - pr_debug("loading object '%s' from buffer\n", obj_name); - - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); - return __bpf_object__open(obj_name, obj_buf, obj_buf_sz, obj_name, - relaxed_maps ? MAPS_RELAX_COMPAT : 0); + return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts); } struct bpf_object * From 62561eb442bd095f06534ce637b116b278e5e912 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 11:28:47 -0700 Subject: [PATCH 03/52] libbpf: Add support for field existance CO-RE relocation Add support for BPF_FRK_EXISTS relocation kind to detect existence of captured field in a destination BTF, allowing conditional logic to handle incompatible differences between kernels. Also introduce opt-in relaxed CO-RE relocation handling option, which makes libbpf emit warning for failed relocations, but proceed with other relocations. Instruction, for which relocation failed, is patched with (u32)-1 value. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191015182849.3922287-4-andriin@fb.com --- tools/lib/bpf/libbpf.c | 74 +++++++++++++++++++++++++++++++++--------- tools/lib/bpf/libbpf.h | 4 ++- 2 files changed, 61 insertions(+), 17 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index db3308b91806..8d565590ce05 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -249,6 +249,7 @@ struct bpf_object { bool loaded; bool has_pseudo_calls; + bool relaxed_core_relocs; /* * Information when doing elf related work. Only valid if fd @@ -2771,26 +2772,54 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, /* * Patch relocatable BPF instruction. - * Expected insn->imm value is provided for validation, as well as the new - * relocated value. + * + * Patched value is determined by relocation kind and target specification. + * For field existence relocation target spec will be NULL if field is not + * found. + * Expected insn->imm value is determined using relocation kind and local + * spec, and is checked before patching instruction. If actual insn->imm value + * is wrong, bail out with error. * * Currently three kinds of BPF instructions are supported: * 1. rX = (assignment with immediate operand); * 2. rX += (arithmetic operations with immediate operand); - * 3. *(rX) = (indirect memory assignment with immediate operand). - * - * If actual insn->imm value is wrong, bail out. */ -static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, - __u32 orig_off, __u32 new_off) +static int bpf_core_reloc_insn(struct bpf_program *prog, + const struct bpf_field_reloc *relo, + const struct bpf_core_spec *local_spec, + const struct bpf_core_spec *targ_spec) { + __u32 orig_val, new_val; struct bpf_insn *insn; int insn_idx; __u8 class; - if (insn_off % sizeof(struct bpf_insn)) + if (relo->insn_off % sizeof(struct bpf_insn)) return -EINVAL; - insn_idx = insn_off / sizeof(struct bpf_insn); + insn_idx = relo->insn_off / sizeof(struct bpf_insn); + + switch (relo->kind) { + case BPF_FIELD_BYTE_OFFSET: + orig_val = local_spec->offset; + if (targ_spec) { + new_val = targ_spec->offset; + } else { + pr_warning("prog '%s': patching insn #%d w/ failed reloc, imm %d -> %d\n", + bpf_program__title(prog, false), insn_idx, + orig_val, -1); + new_val = (__u32)-1; + } + break; + case BPF_FIELD_EXISTS: + orig_val = 1; /* can't generate EXISTS relo w/o local field */ + new_val = targ_spec ? 1 : 0; + break; + default: + pr_warning("prog '%s': unknown relo %d at insn #%d'\n", + bpf_program__title(prog, false), + relo->kind, insn_idx); + return -EINVAL; + } insn = &prog->insns[insn_idx]; class = BPF_CLASS(insn->code); @@ -2798,12 +2827,12 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, if (class == BPF_ALU || class == BPF_ALU64) { if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; - if (insn->imm != orig_off) + if (insn->imm != orig_val) return -EINVAL; - insn->imm = new_off; + insn->imm = new_val; pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n", bpf_program__title(prog, false), - insn_idx, orig_off, new_off); + insn_idx, orig_val, new_val); } else { pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", bpf_program__title(prog, false), @@ -2811,6 +2840,7 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, insn->off, insn->imm); return -EINVAL; } + return 0; } @@ -3087,15 +3117,26 @@ static int bpf_core_reloc_field(struct bpf_program *prog, cand_ids->data[j++] = cand_spec.spec[0].type_id; } - cand_ids->len = j; - if (cand_ids->len == 0) { + /* + * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is + * requested, it's expected that we might not find any candidates. + * In this case, if field wasn't found in any candidate, the list of + * candidates shouldn't change at all, we'll just handle relocating + * appropriately, depending on relo's kind. + */ + if (j > 0) + cand_ids->len = j; + + if (j == 0 && !prog->obj->relaxed_core_relocs && + relo->kind != BPF_FIELD_EXISTS) { pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", prog_name, relo_idx, local_id, local_name, spec_str); return -ESRCH; } - err = bpf_core_reloc_insn(prog, relo->insn_off, - local_spec.offset, targ_spec.offset); + /* bpf_core_reloc_insn should know how to handle missing targ_spec */ + err = bpf_core_reloc_insn(prog, relo, &local_spec, + j ? &targ_spec : NULL); if (err) { pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", prog_name, relo_idx, relo->insn_off, err); @@ -3587,6 +3628,7 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, if (IS_ERR(obj)) return obj; + obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); relaxed_maps = OPTS_GET(opts, relaxed_maps, false); CHECK_ERR(bpf_object__elf_init(obj), err, out); diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 667e6853e51f..53ce212764e0 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -96,8 +96,10 @@ struct bpf_object_open_opts { const char *object_name; /* parse map definitions non-strictly, allowing extra attributes/data */ bool relaxed_maps; + /* process CO-RE relocations non-strictly, allowing them to fail */ + bool relaxed_core_relocs; }; -#define bpf_object_open_opts__last_field relaxed_maps +#define bpf_object_open_opts__last_field relaxed_core_relocs LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * From 01340e31915bc73bf33a8f912ff1b74d514b8d79 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 11:28:48 -0700 Subject: [PATCH 04/52] libbpf: Add BPF-side definitions of supported field relocation kinds Add enum definition for Clang's __builtin_preserve_field_info() second argument (info_kind). Currently only byte offset and existence are supported. Corresponding Clang changes introducing this built-in can be found at [0] [0] https://reviews.llvm.org/D67980 Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191015182849.3922287-5-andriin@fb.com --- tools/lib/bpf/bpf_core_read.h | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 4daf04c25493..a273df3784f4 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -2,6 +2,28 @@ #ifndef __BPF_CORE_READ_H__ #define __BPF_CORE_READ_H__ +/* + * enum bpf_field_info_kind is passed as a second argument into + * __builtin_preserve_field_info() built-in to get a specific aspect of + * a field, captured as a first argument. __builtin_preserve_field_info(field, + * info_kind) returns __u32 integer and produces BTF field relocation, which + * is understood and processed by libbpf during BPF object loading. See + * selftests/bpf for examples. + */ +enum bpf_field_info_kind { + BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ + BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ +}; + +/* + * Convenience macro to check that field actually exists in target kernel's. + * Returns: + * 1, if matching field is present in target kernel; + * 0, if no matching field found. + */ +#define bpf_core_field_exists(field) \ + __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) + /* * bpf_core_read() abstracts away bpf_probe_read() call and captures offset * relocation for source address using __builtin_preserve_access_index() @@ -12,7 +34,7 @@ * a relocation, which records BTF type ID describing root struct/union and an * accessor string which describes exact embedded field that was used to take * an address. See detailed description of this relocation format and - * semantics in comments to struct bpf_offset_reloc in libbpf_internal.h. + * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. * * This relocation allows libbpf to adjust BPF instruction to use correct * actual field offset, based on target kernel BTF type that matches original From c7566a69695cd3d8fe876c0da38a03a7472d3f56 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 11:28:49 -0700 Subject: [PATCH 05/52] selftests/bpf: Add field existence CO-RE relocs tests Add a bunch of tests validating CO-RE is handling field existence relocation. Relaxed CO-RE relocation mode is activated for these new tests to prevent libbpf from rejecting BPF object for no-match relocation, even though test BPF program is not going to use that relocation, if field is missing. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191015182849.3922287-6-andriin@fb.com --- .../selftests/bpf/prog_tests/core_reloc.c | 76 +++++++++++++++++- .../bpf/progs/btf__core_reloc_existence.c | 3 + ...ore_reloc_existence___err_wrong_arr_kind.c | 3 + ...loc_existence___err_wrong_arr_value_type.c | 3 + ...ore_reloc_existence___err_wrong_int_kind.c | 3 + ..._core_reloc_existence___err_wrong_int_sz.c | 3 + ...ore_reloc_existence___err_wrong_int_type.c | 3 + ..._reloc_existence___err_wrong_struct_type.c | 3 + .../btf__core_reloc_existence___minimal.c | 3 + .../selftests/bpf/progs/core_reloc_types.h | 56 +++++++++++++ .../bpf/progs/test_core_reloc_existence.c | 79 +++++++++++++++++++ 11 files changed, 233 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c create mode 100644 tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c create mode 100644 tools/testing/selftests/bpf/progs/test_core_reloc_existence.c diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 21a0dff66241..7e2f5b4bf7f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -174,6 +174,21 @@ .fails = true, \ } +#define EXISTENCE_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ + .a = 42, \ +} + +#define EXISTENCE_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_existence.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o", \ + .relaxed_core_relocs = true \ + +#define EXISTENCE_ERR_CASE(name) { \ + EXISTENCE_CASE_COMMON(name), \ + .fails = true, \ +} + struct core_reloc_test_case { const char *case_name; const char *bpf_obj_file; @@ -183,6 +198,7 @@ struct core_reloc_test_case { const char *output; int output_len; bool fails; + bool relaxed_core_relocs; }; static struct core_reloc_test_case test_cases[] = { @@ -283,6 +299,59 @@ static struct core_reloc_test_case test_cases[] = { }, .output_len = sizeof(struct core_reloc_misc_output), }, + + /* validate field existence checks */ + { + EXISTENCE_CASE_COMMON(existence), + .input = STRUCT_TO_CHAR_PTR(core_reloc_existence) { + .a = 1, + .b = 2, + .c = 3, + .arr = { 4 }, + .s = { .x = 5 }, + }, + .input_len = sizeof(struct core_reloc_existence), + .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) { + .a_exists = 1, + .b_exists = 1, + .c_exists = 1, + .arr_exists = 1, + .s_exists = 1, + .a_value = 1, + .b_value = 2, + .c_value = 3, + .arr_value = 4, + .s_value = 5, + }, + .output_len = sizeof(struct core_reloc_existence_output), + }, + { + EXISTENCE_CASE_COMMON(existence___minimal), + .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) { + .a = 42, + }, + .input_len = sizeof(struct core_reloc_existence), + .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) { + .a_exists = 1, + .b_exists = 0, + .c_exists = 0, + .arr_exists = 0, + .s_exists = 0, + .a_value = 42, + .b_value = 0xff000002u, + .c_value = 0xff000003u, + .arr_value = 0xff000004u, + .s_value = 0xff000005u, + }, + .output_len = sizeof(struct core_reloc_existence_output), + }, + + EXISTENCE_ERR_CASE(existence__err_int_sz), + EXISTENCE_ERR_CASE(existence__err_int_type), + EXISTENCE_ERR_CASE(existence__err_int_kind), + EXISTENCE_ERR_CASE(existence__err_arr_kind), + EXISTENCE_ERR_CASE(existence__err_arr_value_type), + EXISTENCE_ERR_CASE(existence__err_struct_type), }; struct data { @@ -305,11 +374,14 @@ void test_core_reloc(void) for (i = 0; i < ARRAY_SIZE(test_cases); i++) { test_case = &test_cases[i]; - if (!test__start_subtest(test_case->case_name)) continue; - obj = bpf_object__open(test_case->bpf_obj_file); + LIBBPF_OPTS(bpf_object_open_opts, opts, + .relaxed_core_relocs = test_case->relaxed_core_relocs, + ); + + obj = bpf_object__open_file(test_case->bpf_obj_file, &opts); if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", "failed to open '%s': %ld\n", test_case->bpf_obj_file, PTR_ERR(obj))) diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c new file mode 100644 index 000000000000..0b62315ad46c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c new file mode 100644 index 000000000000..dd0ffa518f36 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_arr_kind x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c new file mode 100644 index 000000000000..bc83372088ad --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_arr_value_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c new file mode 100644 index 000000000000..917bec41be08 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_int_kind x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c new file mode 100644 index 000000000000..6ec7e6ec1c91 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_int_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c new file mode 100644 index 000000000000..7bbcacf2b0d1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_int_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c new file mode 100644 index 000000000000..f384dd38ec70 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___err_wrong_struct_type x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c new file mode 100644 index 000000000000..aec2dec20e90 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___minimal.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_existence___minimal x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 9a6bdeb4894c..ad763ec0ba8f 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -674,3 +674,59 @@ struct core_reloc_misc_extensible { int c; int d; }; + +/* + * EXISTENCE + */ +struct core_reloc_existence_output { + int a_exists; + int a_value; + int b_exists; + int b_value; + int c_exists; + int c_value; + int arr_exists; + int arr_value; + int s_exists; + int s_value; +}; + +struct core_reloc_existence { + int a; + struct { + int b; + }; + int c; + int arr[1]; + struct { + int x; + } s; +}; + +struct core_reloc_existence___minimal { + int a; +}; + +struct core_reloc_existence___err_wrong_int_sz { + short a; +}; + +struct core_reloc_existence___err_wrong_int_type { + int b[1]; +}; + +struct core_reloc_existence___err_wrong_int_kind { + struct{ int x; } c; +}; + +struct core_reloc_existence___err_wrong_arr_kind { + int arr; +}; + +struct core_reloc_existence___err_wrong_arr_value_type { + short arr[1]; +}; + +struct core_reloc_existence___err_wrong_struct_type { + int s; +}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c new file mode 100644 index 000000000000..c3cac95a19f1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook + +#include +#include +#include "bpf_helpers.h" +#include "bpf_core_read.h" + +char _license[] SEC("license") = "GPL"; + +static volatile struct data { + char in[256]; + char out[256]; +} data; + +struct core_reloc_existence_output { + int a_exists; + int a_value; + int b_exists; + int b_value; + int c_exists; + int c_value; + int arr_exists; + int arr_value; + int s_exists; + int s_value; +}; + +struct core_reloc_existence { + struct { + int x; + } s; + int arr[1]; + int a; + struct { + int b; + }; + int c; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_existence(void *ctx) +{ + struct core_reloc_existence *in = (void *)&data.in; + struct core_reloc_existence_output *out = (void *)&data.out; + + out->a_exists = bpf_core_field_exists(in->a); + if (bpf_core_field_exists(in->a)) + out->a_value = BPF_CORE_READ(in, a); + else + out->a_value = 0xff000001u; + + out->b_exists = bpf_core_field_exists(in->b); + if (bpf_core_field_exists(in->b)) + out->b_value = BPF_CORE_READ(in, b); + else + out->b_value = 0xff000002u; + + out->c_exists = bpf_core_field_exists(in->c); + if (bpf_core_field_exists(in->c)) + out->c_value = BPF_CORE_READ(in, c); + else + out->c_value = 0xff000003u; + + out->arr_exists = bpf_core_field_exists(in->arr); + if (bpf_core_field_exists(in->arr)) + out->arr_value = BPF_CORE_READ(in, arr[0]); + else + out->arr_value = 0xff000004u; + + out->s_exists = bpf_core_field_exists(in->s); + if (bpf_core_field_exists(in->s)) + out->s_value = BPF_CORE_READ(in, s.x); + else + out->s_value = 0xff000005u; + + return 0; +} + From ba94094818a811758570990648160a6ba2ca05cb Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 15 Oct 2019 11:31:24 -0700 Subject: [PATCH 06/52] bpf: Allow __sk_buff tstamp in BPF_PROG_TEST_RUN It's useful for implementing EDT related tests (set tstamp, run the test, see how the tstamp is changed or observe some other parameter). Note that bpf_ktime_get_ns() helper is using monotonic clock, so for the BPF programs that compare tstamp against it, tstamp should be derived from clock_gettime(CLOCK_MONOTONIC, ...). Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191015183125.124413-1-sdf@google.com --- net/bpf/test_run.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 1153bbcdff72..0be4497cb832 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -218,10 +218,18 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) + FIELD_SIZEOF(struct __sk_buff, cb), + offsetof(struct __sk_buff, tstamp))) + return -EINVAL; + + /* tstamp is allowed */ + + if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) + + FIELD_SIZEOF(struct __sk_buff, tstamp), sizeof(struct __sk_buff))) return -EINVAL; skb->priority = __skb->priority; + skb->tstamp = __skb->tstamp; memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); return 0; @@ -235,6 +243,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) return; __skb->priority = skb->priority; + __skb->tstamp = skb->tstamp; memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); } From 95fbda1e37384b9c270218b52718c83ddf4bb34e Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Tue, 15 Oct 2019 11:31:25 -0700 Subject: [PATCH 07/52] selftests: bpf: Add selftest for __sk_buff tstamp Make sure BPF_PROG_TEST_RUN accepts tstamp and exports any modifications that BPF program does. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191015183125.124413-2-sdf@google.com --- tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 5 +++++ tools/testing/selftests/bpf/progs/test_skb_ctx.c | 1 + 2 files changed, 6 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index e95baa32e277..a2eb8db8dafb 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -10,6 +10,7 @@ void test_skb_ctx(void) .cb[3] = 4, .cb[4] = 5, .priority = 6, + .tstamp = 7, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, @@ -86,4 +87,8 @@ void test_skb_ctx(void) "ctx_out_priority", "skb->priority == %d, expected %d\n", skb.priority, 7); + CHECK_ATTR(skb.tstamp != 8, + "ctx_out_tstamp", + "skb->tstamp == %lld, expected %d\n", + skb.tstamp, 8); } diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index 7a80960d7df1..2a9f4c736ebc 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -16,6 +16,7 @@ int process(struct __sk_buff *skb) skb->cb[i]++; } skb->priority++; + skb->tstamp++; return 0; } From 5bc60de50dfea235634fdf38cbc992fb968d113b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 15 Oct 2019 12:00:56 +0200 Subject: [PATCH 08/52] selftests: bpf: Don't try to read files without read permission Recently couple of files that are write only were added to netdevsim debugfs. Don't read these files and avoid error. Reported-by: kernel test robot Signed-off-by: Jiri Pirko Signed-off-by: Alexei Starovoitov Acked-by: Jakub Kicinski --- tools/testing/selftests/bpf/test_offload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 15a666329a34..c44c650bde3a 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -312,7 +312,7 @@ class DebugfsDir: if f == "ports": continue p = os.path.join(path, f) - if os.path.isfile(p): + if os.path.isfile(p) and os.access(p, os.R_OK): _, out = cmd('cat %s/%s' % (path, f)) dfs[f] = out.strip() elif os.path.isdir(p): From 456a513bb5d494f169271b16334049d50c7e630b Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 16 Oct 2019 10:58:11 +0200 Subject: [PATCH 09/52] scripts/bpf: Emit an #error directive known types list needs updating Make the compiler report a clear error when bpf_helpers_doc.py needs updating rather than rely on the fact that Clang fails to compile English: ../../../lib/bpf/bpf_helper_defs.h:2707:1: error: unknown type name 'Unrecognized' Unrecognized type 'struct bpf_inet_lookup', please add it to known types! Signed-off-by: Jakub Sitnicki Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191016085811.11700-1-jakub@cloudflare.com --- scripts/bpf_helpers_doc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 7df9ce598ff9..08300bc024da 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -489,7 +489,7 @@ class PrinterHelpers(Printer): if t in self.mapped_types: return self.mapped_types[t] print("") - print("Unrecognized type '%s', please add it to known types!" % t) + print("#error \"Unrecognized type '%s', please add it to known types!\"" % t) sys.exit(1) seen_helpers = set() From eac9153f2b584c702cea02c1f1a57d85aa9aea42 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 14 Oct 2019 10:12:23 -0700 Subject: [PATCH 10/52] bpf/stackmap: Fix deadlock with rq_lock in bpf_get_stack() bpf stackmap with build-id lookup (BPF_F_STACK_BUILD_ID) can trigger A-A deadlock on rq_lock(): rcu: INFO: rcu_sched detected stalls on CPUs/tasks: [...] Call Trace: try_to_wake_up+0x1ad/0x590 wake_up_q+0x54/0x80 rwsem_wake+0x8a/0xb0 bpf_get_stack+0x13c/0x150 bpf_prog_fbdaf42eded9fe46_on_event+0x5e3/0x1000 bpf_overflow_handler+0x60/0x100 __perf_event_overflow+0x4f/0xf0 perf_swevent_overflow+0x99/0xc0 ___perf_sw_event+0xe7/0x120 __schedule+0x47d/0x620 schedule+0x29/0x90 futex_wait_queue_me+0xb9/0x110 futex_wait+0x139/0x230 do_futex+0x2ac/0xa50 __x64_sys_futex+0x13c/0x180 do_syscall_64+0x42/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 This can be reproduced by: 1. Start a multi-thread program that does parallel mmap() and malloc(); 2. taskset the program to 2 CPUs; 3. Attach bpf program to trace_sched_switch and gather stackmap with build-id, e.g. with trace.py from bcc tools: trace.py -U -p -s t:sched:sched_switch A sample reproducer is attached at the end. This could also trigger deadlock with other locks that are nested with rq_lock. Fix this by checking whether irqs are disabled. Since rq_lock and all other nested locks are irq safe, it is safe to do up_read() when irqs are not disable. If the irqs are disabled, postpone up_read() in irq_work. Fixes: 615755a77b24 ("bpf: extend stackmap to save binary_build_id+offset instead of address") Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Cc: Peter Zijlstra Cc: Alexei Starovoitov Cc: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191014171223.357174-1-songliubraving@fb.com Reproducer: ============================ 8< ============================ char *filename; void *worker(void *p) { void *ptr; int fd; char *pptr; fd = open(filename, O_RDONLY); if (fd < 0) return NULL; while (1) { struct timespec ts = {0, 1000 + rand() % 2000}; ptr = mmap(NULL, 4096 * 64, PROT_READ, MAP_PRIVATE, fd, 0); usleep(1); if (ptr == MAP_FAILED) { printf("failed to mmap\n"); break; } munmap(ptr, 4096 * 64); usleep(1); pptr = malloc(1); usleep(1); pptr[0] = 1; usleep(1); free(pptr); usleep(1); nanosleep(&ts, NULL); } close(fd); return NULL; } int main(int argc, char *argv[]) { void *ptr; int i; pthread_t threads[THREAD_COUNT]; if (argc < 2) return 0; filename = argv[1]; for (i = 0; i < THREAD_COUNT; i++) { if (pthread_create(threads + i, NULL, worker, NULL)) { fprintf(stderr, "Error creating thread\n"); return 0; } } for (i = 0; i < THREAD_COUNT; i++) pthread_join(threads[i], NULL); return 0; } ============================ 8< ============================ --- kernel/bpf/stackmap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 052580c33d26..173e983619d7 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -287,7 +287,7 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, bool irq_work_busy = false; struct stack_map_irq_work *work = NULL; - if (in_nmi()) { + if (irqs_disabled()) { work = this_cpu_ptr(&up_read_work); if (work->irq_work.flags & IRQ_WORK_BUSY) /* cannot queue more up_read, fallback */ @@ -295,8 +295,9 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs, } /* - * We cannot do up_read() in nmi context. To do build_id lookup - * in nmi context, we need to run up_read() in irq_work. We use + * We cannot do up_read() when the irq is disabled, because of + * risk to deadlock with rq_lock. To do build_id lookup when the + * irqs are disabled, we need to run up_read() in irq_work. We use * a percpu variable to do the irq_work. If the irq_work is * already used by another lookup, we fall back to report ips. * From e8c423fb31fa8b1ef6d7cd14a168de33e7c0d702 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:55 -0700 Subject: [PATCH 11/52] bpf: Add typecast to raw_tracepoints to help BTF generation When pahole converts dwarf to btf it emits only used types. Wrap existing __bpf_trace_##template() function into btf_trace_##template typedef and use it in type cast to make gcc emits this type into dwarf. Then pahole will convert it to btf. The "btf_trace_" prefix will be used to identify BTF enabled raw tracepoints. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-2-ast@kernel.org --- include/trace/bpf_probe.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h index d6e556c0a085..b04c29270973 100644 --- a/include/trace/bpf_probe.h +++ b/include/trace/bpf_probe.h @@ -74,11 +74,12 @@ static inline void bpf_test_probe_##call(void) \ { \ check_trace_callback_type_##call(__bpf_trace_##template); \ } \ +typedef void (*btf_trace_##call)(void *__data, proto); \ static struct bpf_raw_event_map __used \ __attribute__((section("__bpf_raw_tp_map"))) \ __bpf_trace_tp_map_##call = { \ .tp = &__tracepoint_##call, \ - .bpf_func = (void *)__bpf_trace_##template, \ + .bpf_func = (void *)(btf_trace_##call)__bpf_trace_##template, \ .num_args = COUNT_ARGS(args), \ .writable_size = size, \ }; From 7c6a469e3416fa23568c2395a3faa7dd6e376dcb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:56 -0700 Subject: [PATCH 12/52] bpf: Add typecast to bpf helpers to help BTF generation When pahole converts dwarf to btf it emits only used types. Wrap existing bpf helper functions into typedef and use it in typecast to make gcc emits this type into dwarf. Then pahole will convert it to btf. The "btf_#name_of_helper" types will be used to figure out types of arguments of bpf helpers. The generated code before and after is the same. Only dwarf and btf sections are different. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: John Fastabend Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-3-ast@kernel.org --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 2ce57645f3cd..d3d51d7aff2c 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -464,10 +464,11 @@ static inline bool insn_is_zext(const struct bpf_insn *insn) #define BPF_CALL_x(x, name, ...) \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + typedef u64 (*btf_##name)(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ { \ - return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + return ((btf_##name)____##name)(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ } \ static __always_inline \ u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) From 8580ac9404f6240668a026785d7d8856f0530409 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:57 -0700 Subject: [PATCH 13/52] bpf: Process in-kernel BTF If in-kernel BTF exists parse it and prepare 'struct btf *btf_vmlinux' for further use by the verifier. In-kernel BTF is trusted just like kallsyms and other build artifacts embedded into vmlinux. Yet run this BTF image through BTF verifier to make sure that it is valid and it wasn't mangled during the build. If in-kernel BTF is incorrect it means either gcc or pahole or kernel are buggy. In such case disallow loading BPF programs. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-4-ast@kernel.org --- include/linux/bpf_verifier.h | 4 +- include/linux/btf.h | 1 + kernel/bpf/btf.c | 71 +++++++++++++++++++++++++++++++++++- kernel/bpf/verifier.c | 20 ++++++++++ 4 files changed, 94 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 26a6d58ca78c..713efae62e96 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -330,10 +330,12 @@ static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log) #define BPF_LOG_STATS 4 #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2) #define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS) +#define BPF_LOG_KERNEL (BPF_LOG_MASK + 1) /* kernel internal flag */ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log) { - return log->level && log->ubuf && !bpf_verifier_log_full(log); + return (log->level && log->ubuf && !bpf_verifier_log_full(log)) || + log->level == BPF_LOG_KERNEL; } #define BPF_MAX_SUBPROGS 256 diff --git a/include/linux/btf.h b/include/linux/btf.h index 64cdf2a23d42..55d43bc856be 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -56,6 +56,7 @@ bool btf_type_is_void(const struct btf_type *t); #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); +struct btf *btf_parse_vmlinux(void); #else static inline const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 29c7c06c6bd6..ddeab1e8d21e 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -698,6 +698,13 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env, if (!bpf_verifier_log_needed(log)) return; + /* btf verifier prints all types it is processing via + * btf_verifier_log_type(..., fmt = NULL). + * Skip those prints for in-kernel BTF verification. + */ + if (log->level == BPF_LOG_KERNEL && !fmt) + return; + __btf_verifier_log(log, "[%u] %s %s%s", env->log_type_id, btf_kind_str[kind], @@ -735,6 +742,8 @@ static void btf_verifier_log_member(struct btf_verifier_env *env, if (!bpf_verifier_log_needed(log)) return; + if (log->level == BPF_LOG_KERNEL && !fmt) + return; /* The CHECK_META phase already did a btf dump. * * If member is logged again, it must hit an error in @@ -777,6 +786,8 @@ static void btf_verifier_log_vsi(struct btf_verifier_env *env, if (!bpf_verifier_log_needed(log)) return; + if (log->level == BPF_LOG_KERNEL && !fmt) + return; if (env->phase != CHECK_META) btf_verifier_log_type(env, datasec_type, NULL); @@ -802,6 +813,8 @@ static void btf_verifier_log_hdr(struct btf_verifier_env *env, if (!bpf_verifier_log_needed(log)) return; + if (log->level == BPF_LOG_KERNEL) + return; hdr = &btf->hdr; __btf_verifier_log(log, "magic: 0x%x\n", hdr->magic); __btf_verifier_log(log, "version: %u\n", hdr->version); @@ -2405,7 +2418,8 @@ static s32 btf_enum_check_meta(struct btf_verifier_env *env, return -EINVAL; } - + if (env->log.level == BPF_LOG_KERNEL) + continue; btf_verifier_log(env, "\t%s val=%d\n", __btf_name_by_offset(btf, enums[i].name_off), enums[i].val); @@ -3367,6 +3381,61 @@ errout: return ERR_PTR(err); } +extern char __weak _binary__btf_vmlinux_bin_start[]; +extern char __weak _binary__btf_vmlinux_bin_end[]; + +struct btf *btf_parse_vmlinux(void) +{ + struct btf_verifier_env *env = NULL; + struct bpf_verifier_log *log; + struct btf *btf = NULL; + int err; + + env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); + if (!env) + return ERR_PTR(-ENOMEM); + + log = &env->log; + log->level = BPF_LOG_KERNEL; + + btf = kzalloc(sizeof(*btf), GFP_KERNEL | __GFP_NOWARN); + if (!btf) { + err = -ENOMEM; + goto errout; + } + env->btf = btf; + + btf->data = _binary__btf_vmlinux_bin_start; + btf->data_size = _binary__btf_vmlinux_bin_end - + _binary__btf_vmlinux_bin_start; + + err = btf_parse_hdr(env); + if (err) + goto errout; + + btf->nohdr_data = btf->data + btf->hdr.hdr_len; + + err = btf_parse_str_sec(env); + if (err) + goto errout; + + err = btf_check_all_metas(env); + if (err) + goto errout; + + btf_verifier_env_free(env); + refcount_set(&btf->refcnt, 1); + return btf; + +errout: + btf_verifier_env_free(env); + if (btf) { + kvfree(btf->types); + kfree(btf); + } + return ERR_PTR(err); +} + void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d3446f018b9a..466b3b19de4d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -207,6 +207,8 @@ struct bpf_call_arg_meta { int func_id; }; +struct btf *btf_vmlinux; + static DEFINE_MUTEX(bpf_verifier_lock); static const struct bpf_line_info * @@ -243,6 +245,10 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, n = min(log->len_total - log->len_used - 1, n); log->kbuf[n] = '\0'; + if (log->level == BPF_LOG_KERNEL) { + pr_err("BPF:%s\n", log->kbuf); + return; + } if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) log->len_used += n; else @@ -9294,6 +9300,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->ops = bpf_verifier_ops[env->prog->type]; is_priv = capable(CAP_SYS_ADMIN); + if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { + mutex_lock(&bpf_verifier_lock); + if (!btf_vmlinux) + btf_vmlinux = btf_parse_vmlinux(); + mutex_unlock(&bpf_verifier_lock); + } + /* grab the mutex to protect few globals used by verifier */ if (!is_priv) mutex_lock(&bpf_verifier_lock); @@ -9313,6 +9326,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, goto err_unlock; } + if (IS_ERR(btf_vmlinux)) { + /* Either gcc or pahole or kernel are broken. */ + verbose(env, "in-kernel BTF is malformed\n"); + ret = PTR_ERR(btf_vmlinux); + goto err_unlock; + } + env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; From ccfe29eb29c2edcea6552072ef00ff4117f53e83 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:58 -0700 Subject: [PATCH 14/52] bpf: Add attach_btf_id attribute to program load Add attach_btf_id attribute to prog_load command. It's similar to existing expected_attach_type attribute which is used in several cgroup based program types. Unfortunately expected_attach_type is ignored for tracing programs and cannot be reused for new purpose. Hence introduce attach_btf_id to verify bpf programs against given in-kernel BTF type id at load time. It is strictly checked to be valid for raw_tp programs only. In a later patches it will become: btf_id == 0 semantics of existing raw_tp progs. btd_id > 0 raw_tp with BTF and additional type safety. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-5-ast@kernel.org --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/syscall.c | 18 ++++++++++++++---- tools/include/uapi/linux/bpf.h | 1 + 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 282e28bf41ec..f916380675dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -375,6 +375,7 @@ struct bpf_prog_aux { u32 id; u32 func_cnt; /* used by non-func prog as the number of func progs */ u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ + u32 attach_btf_id; /* in-kernel BTF type id to attach to */ bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; struct bpf_prog **func; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a65c3b0c6935..3bb2cd1de341 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -420,6 +420,7 @@ union bpf_attr { __u32 line_info_rec_size; /* userspace bpf_line_info size */ __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ + __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 82eabd4e38ad..b56c482c9760 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -23,6 +23,7 @@ #include #include #include +#include #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ (map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -1565,8 +1566,9 @@ static void bpf_prog_load_fixup_attach_type(union bpf_attr *attr) } static int -bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, - enum bpf_attach_type expected_attach_type) +bpf_prog_load_check_attach(enum bpf_prog_type prog_type, + enum bpf_attach_type expected_attach_type, + u32 btf_id) { switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK: @@ -1608,13 +1610,19 @@ bpf_prog_load_check_attach_type(enum bpf_prog_type prog_type, default: return -EINVAL; } + case BPF_PROG_TYPE_RAW_TRACEPOINT: + if (btf_id > BTF_MAX_TYPE) + return -EINVAL; + return 0; default: + if (btf_id) + return -EINVAL; return 0; } } /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD line_info_cnt +#define BPF_PROG_LOAD_LAST_FIELD attach_btf_id static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -1656,7 +1664,8 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) return -EPERM; bpf_prog_load_fixup_attach_type(attr); - if (bpf_prog_load_check_attach_type(type, attr->expected_attach_type)) + if (bpf_prog_load_check_attach(type, attr->expected_attach_type, + attr->attach_btf_id)) return -EINVAL; /* plain bpf_prog allocation */ @@ -1665,6 +1674,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) return -ENOMEM; prog->expected_attach_type = attr->expected_attach_type; + prog->aux->attach_btf_id = attr->attach_btf_id; prog->aux->offload_requested = !!attr->prog_ifindex; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index a65c3b0c6935..3bb2cd1de341 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -420,6 +420,7 @@ union bpf_attr { __u32 line_info_rec_size; /* userspace bpf_line_info size */ __aligned_u64 line_info; /* line info */ __u32 line_info_cnt; /* number of bpf_line_info records */ + __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ }; struct { /* anonymous struct used by BPF_OBJ_* commands */ From f75a697e091375c96c35f733a664e3557171f28b Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:24:59 -0700 Subject: [PATCH 15/52] libbpf: Auto-detect btf_id of BTF-based raw_tracepoints It's a responsiblity of bpf program author to annotate the program with SEC("tp_btf/name") where "name" is a valid raw tracepoint. The libbpf will try to find "name" in vmlinux BTF and error out in case vmlinux BTF is not available or "name" is not found. If "name" is indeed a valid raw tracepoint then in-kernel BTF will have "btf_trace_##name" typedef that points to function prototype of that raw tracepoint. BTF description captures exact argument the kernel C code is passing into raw tracepoint. The kernel verifier will check the types while loading bpf program. libbpf keeps BTF type id in expected_attach_type, but since kernel ignores this attribute for tracing programs copy it into attach_btf_id attribute before loading. Later the kernel will use prog->attach_btf_id to select raw tracepoint during bpf_raw_tracepoint_open syscall command. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-6-ast@kernel.org --- tools/lib/bpf/bpf.c | 3 +++ tools/lib/bpf/libbpf.c | 38 ++++++++++++++++++++++++++++++++------ 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index cbb933532981..79046067720f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -228,6 +228,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; + if (attr.prog_type == BPF_PROG_TYPE_RAW_TRACEPOINT) + /* expected_attach_type is ignored for tracing progs */ + attr.attach_btf_id = attr.expected_attach_type; attr.insn_cnt = (__u32)load_attr->insns_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8d565590ce05..22bf3b189947 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4489,19 +4489,22 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, prog->expected_attach_type = type; } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \ - { string, sizeof(string) - 1, ptype, eatype, is_attachable, atype } +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \ + { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype } /* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0) +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0) /* Programs that can be attached. */ #define BPF_APROG_SEC(string, ptype, atype) \ - BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype) + BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype) /* Programs that must specify expected attach type at load time. */ #define BPF_EAPROG_SEC(string, ptype, eatype) \ - BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype) + BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype) + +/* Programs that use BTF to identify attach point */ +#define BPF_PROG_BTF(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 1, 0) /* Programs that can be attached but attach type can't be identified by section * name. Kept for backward compatibility. @@ -4513,7 +4516,8 @@ static const struct { size_t len; enum bpf_prog_type prog_type; enum bpf_attach_type expected_attach_type; - int is_attachable; + bool is_attachable; + bool is_attach_btf; enum bpf_attach_type attach_type; } section_names[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), @@ -4523,6 +4527,7 @@ static const struct { BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_RAW_TRACEPOINT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -4627,6 +4632,27 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, continue; *prog_type = section_names[i].prog_type; *expected_attach_type = section_names[i].expected_attach_type; + if (section_names[i].is_attach_btf) { + struct btf *btf = bpf_core_find_kernel_btf(); + char raw_tp_btf_name[128] = "btf_trace_"; + char *dst = raw_tp_btf_name + sizeof("btf_trace_") - 1; + int ret; + + if (IS_ERR(btf)) { + pr_warning("vmlinux BTF is not found\n"); + return -EINVAL; + } + /* prepend "btf_trace_" prefix per kernel convention */ + strncat(dst, name + section_names[i].len, + sizeof(raw_tp_btf_name) - (dst - raw_tp_btf_name)); + ret = btf__find_by_name(btf, raw_tp_btf_name); + btf__free(btf); + if (ret <= 0) { + pr_warning("%s is not found in vmlinux BTF\n", dst); + return -EINVAL; + } + *expected_attach_type = ret; + } return 0; } pr_warning("failed to guess program type based on ELF section name '%s'\n", name); From 9e15db66136a14cde3f35691f1d839d950118826 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:00 -0700 Subject: [PATCH 16/52] bpf: Implement accurate raw_tp context access via BTF libbpf analyzes bpf C program, searches in-kernel BTF for given type name and stores it into expected_attach_type. The kernel verifier expects this btf_id to point to something like: typedef void (*btf_trace_kfree_skb)(void *, struct sk_buff *skb, void *loc); which represents signature of raw_tracepoint "kfree_skb". Then btf_ctx_access() matches ctx+0 access in bpf program with 'skb' and 'ctx+8' access with 'loc' arguments of "kfree_skb" tracepoint. In first case it passes btf_id of 'struct sk_buff *' back to the verifier core and 'void *' in second case. Then the verifier tracks PTR_TO_BTF_ID as any other pointer type. Like PTR_TO_SOCKET points to 'struct bpf_sock', PTR_TO_TCP_SOCK points to 'struct bpf_tcp_sock', and so on. PTR_TO_BTF_ID points to in-kernel structs. If 1234 is btf_id of 'struct sk_buff' in vmlinux's BTF then PTR_TO_BTF_ID#1234 points to one of in kernel skbs. When PTR_TO_BTF_ID#1234 is dereferenced (like r2 = *(u64 *)r1 + 32) the btf_struct_access() checks which field of 'struct sk_buff' is at offset 32. Checks that size of access matches type definition of the field and continues to track the dereferenced type. If that field was a pointer to 'struct net_device' the r2's type will be PTR_TO_BTF_ID#456. Where 456 is btf_id of 'struct net_device' in vmlinux's BTF. Such verifier analysis prevents "cheating" in BPF C program. The program cannot cast arbitrary pointer to 'struct sk_buff *' and access it. C compiler would allow type cast, of course, but the verifier will notice type mismatch based on BPF assembly and in-kernel BTF. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-7-ast@kernel.org --- include/linux/bpf.h | 17 +++- include/linux/bpf_verifier.h | 4 + kernel/bpf/btf.c | 190 +++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 88 +++++++++++++++- kernel/trace/bpf_trace.c | 2 +- 5 files changed, 296 insertions(+), 5 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f916380675dd..028555fcd10d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -16,6 +16,7 @@ #include struct bpf_verifier_env; +struct bpf_verifier_log; struct perf_event; struct bpf_prog; struct bpf_map; @@ -281,6 +282,7 @@ enum bpf_reg_type { PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ + PTR_TO_BTF_ID, /* reg points to kernel struct */ }; /* The information passed from prog-specific *_is_valid_access @@ -288,7 +290,11 @@ enum bpf_reg_type { */ struct bpf_insn_access_aux { enum bpf_reg_type reg_type; - int ctx_field_size; + union { + int ctx_field_size; + u32 btf_id; + }; + struct bpf_verifier_log *log; /* for verbose logs */ }; static inline void @@ -483,6 +489,7 @@ struct bpf_event_entry { bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); +const char *kernel_type_name(u32 btf_type_id); const struct bpf_func_proto *bpf_get_trace_printk_proto(void); @@ -748,6 +755,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +bool btf_ctx_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info); +int btf_struct_access(struct bpf_verifier_log *log, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 713efae62e96..6e7284ea1468 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -52,6 +52,8 @@ struct bpf_reg_state { */ struct bpf_map *map_ptr; + u32 btf_id; /* for PTR_TO_BTF_ID */ + /* Max size from any of the above. */ unsigned long raw; }; @@ -399,6 +401,8 @@ __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args); __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...); +__printf(2, 3) void bpf_log(struct bpf_verifier_log *log, + const char *fmt, ...); static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env) { diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index ddeab1e8d21e..271d27cd427f 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3436,6 +3436,196 @@ errout: return ERR_PTR(err); } +extern struct btf *btf_vmlinux; + +bool btf_ctx_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + struct bpf_verifier_log *log = info->log; + u32 btf_id = prog->aux->attach_btf_id; + const struct btf_param *args; + const struct btf_type *t; + const char prefix[] = "btf_trace_"; + const char *tname; + u32 nr_args, arg; + + if (!btf_id) + return true; + + if (IS_ERR(btf_vmlinux)) { + bpf_log(log, "btf_vmlinux is malformed\n"); + return false; + } + + t = btf_type_by_id(btf_vmlinux, btf_id); + if (!t || BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) { + bpf_log(log, "btf_id is invalid\n"); + return false; + } + + tname = __btf_name_by_offset(btf_vmlinux, t->name_off); + if (strncmp(prefix, tname, sizeof(prefix) - 1)) { + bpf_log(log, "btf_id points to wrong type name %s\n", tname); + return false; + } + tname += sizeof(prefix) - 1; + + t = btf_type_by_id(btf_vmlinux, t->type); + if (!btf_type_is_ptr(t)) + return false; + t = btf_type_by_id(btf_vmlinux, t->type); + if (!btf_type_is_func_proto(t)) + return false; + + if (off % 8) { + bpf_log(log, "raw_tp '%s' offset %d is not multiple of 8\n", + tname, off); + return false; + } + arg = off / 8; + args = (const struct btf_param *)(t + 1); + /* skip first 'void *__data' argument in btf_trace_##name typedef */ + args++; + nr_args = btf_type_vlen(t) - 1; + if (arg >= nr_args) { + bpf_log(log, "raw_tp '%s' doesn't have %d-th argument\n", + tname, arg); + return false; + } + + t = btf_type_by_id(btf_vmlinux, args[arg].type); + /* skip modifiers */ + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf_vmlinux, t->type); + if (btf_type_is_int(t)) + /* accessing a scalar */ + return true; + if (!btf_type_is_ptr(t)) { + bpf_log(log, + "raw_tp '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", + tname, arg, + __btf_name_by_offset(btf_vmlinux, t->name_off), + btf_kind_str[BTF_INFO_KIND(t->info)]); + return false; + } + if (t->type == 0) + /* This is a pointer to void. + * It is the same as scalar from the verifier safety pov. + * No further pointer walking is allowed. + */ + return true; + + /* this is a pointer to another type */ + info->reg_type = PTR_TO_BTF_ID; + info->btf_id = t->type; + + t = btf_type_by_id(btf_vmlinux, t->type); + /* skip modifiers */ + while (btf_type_is_modifier(t)) + t = btf_type_by_id(btf_vmlinux, t->type); + if (!btf_type_is_struct(t)) { + bpf_log(log, + "raw_tp '%s' arg%d type %s is not a struct\n", + tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]); + return false; + } + bpf_log(log, "raw_tp '%s' arg%d has btf_id %d type %s '%s'\n", + tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], + __btf_name_by_offset(btf_vmlinux, t->name_off)); + return true; +} + +int btf_struct_access(struct bpf_verifier_log *log, + const struct btf_type *t, int off, int size, + enum bpf_access_type atype, + u32 *next_btf_id) +{ + const struct btf_member *member; + const struct btf_type *mtype; + const char *tname, *mname; + int i, moff = 0, msize; + +again: + tname = __btf_name_by_offset(btf_vmlinux, t->name_off); + if (!btf_type_is_struct(t)) { + bpf_log(log, "Type '%s' is not a struct", tname); + return -EINVAL; + } + + for_each_member(i, t, member) { + /* offset of the field in bits */ + moff = btf_member_bit_offset(t, member); + + if (btf_member_bitfield_size(t, member)) + /* bitfields are not supported yet */ + continue; + + if (off + size <= moff / 8) + /* won't find anything, field is already too far */ + break; + + /* type of the field */ + mtype = btf_type_by_id(btf_vmlinux, member->type); + mname = __btf_name_by_offset(btf_vmlinux, member->name_off); + + /* skip modifiers */ + while (btf_type_is_modifier(mtype)) + mtype = btf_type_by_id(btf_vmlinux, mtype->type); + + if (btf_type_is_array(mtype)) + /* array deref is not supported yet */ + continue; + + if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) { + bpf_log(log, "field %s doesn't have size\n", mname); + return -EFAULT; + } + if (btf_type_is_ptr(mtype)) + msize = 8; + else + msize = mtype->size; + if (off >= moff / 8 + msize) + /* no overlap with member, keep iterating */ + continue; + /* the 'off' we're looking for is either equal to start + * of this field or inside of this struct + */ + if (btf_type_is_struct(mtype)) { + /* our field must be inside that union or struct */ + t = mtype; + + /* adjust offset we're looking for */ + off -= moff / 8; + goto again; + } + if (msize != size) { + /* field access size doesn't match */ + bpf_log(log, + "cannot access %d bytes in struct %s field %s that has size %d\n", + size, tname, mname, msize); + return -EACCES; + } + + if (btf_type_is_ptr(mtype)) { + const struct btf_type *stype; + + stype = btf_type_by_id(btf_vmlinux, mtype->type); + /* skip modifiers */ + while (btf_type_is_modifier(stype)) + stype = btf_type_by_id(btf_vmlinux, stype->type); + if (btf_type_is_struct(stype)) { + *next_btf_id = mtype->type; + return PTR_TO_BTF_ID; + } + } + /* all other fields are treated as scalars */ + return SCALAR_VALUE; + } + bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off); + return -EINVAL; +} + void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 466b3b19de4d..42a463e09761 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -286,6 +286,19 @@ __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...) va_end(args); } +__printf(2, 3) void bpf_log(struct bpf_verifier_log *log, + const char *fmt, ...) +{ + va_list args; + + if (!bpf_verifier_log_needed(log)) + return; + + va_start(args, fmt); + bpf_verifier_vlog(log, fmt, args); + va_end(args); +} + static const char *ltrim(const char *s) { while (isspace(*s)) @@ -406,6 +419,7 @@ static const char * const reg_type_str[] = { [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", [PTR_TO_TP_BUFFER] = "tp_buffer", [PTR_TO_XDP_SOCK] = "xdp_sock", + [PTR_TO_BTF_ID] = "ptr_", }; static char slot_type_char[] = { @@ -436,6 +450,12 @@ static struct bpf_func_state *func(struct bpf_verifier_env *env, return cur->frame[reg->frameno]; } +const char *kernel_type_name(u32 id) +{ + return btf_name_by_offset(btf_vmlinux, + btf_type_by_id(btf_vmlinux, id)->name_off); +} + static void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state) { @@ -460,6 +480,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, /* reg->off should be 0 for SCALAR_VALUE */ verbose(env, "%lld", reg->var_off.value + reg->off); } else { + if (t == PTR_TO_BTF_ID) + verbose(env, "%s", kernel_type_name(reg->btf_id)); verbose(env, "(id=%d", reg->id); if (reg_type_may_be_refcounted_or_null(t)) verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); @@ -2337,10 +2359,12 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, /* check access to 'struct bpf_context' fields. Supports fixed offsets only */ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, - enum bpf_access_type t, enum bpf_reg_type *reg_type) + enum bpf_access_type t, enum bpf_reg_type *reg_type, + u32 *btf_id) { struct bpf_insn_access_aux info = { .reg_type = *reg_type, + .log = &env->log, }; if (env->ops->is_valid_access && @@ -2354,7 +2378,10 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, */ *reg_type = info.reg_type; - env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; + if (*reg_type == PTR_TO_BTF_ID) + *btf_id = info.btf_id; + else + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) env->prog->aux->max_ctx_offset = off + size; @@ -2780,6 +2807,53 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) return 0; } +static int check_ptr_to_btf_access(struct bpf_verifier_env *env, + struct bpf_reg_state *regs, + int regno, int off, int size, + enum bpf_access_type atype, + int value_regno) +{ + struct bpf_reg_state *reg = regs + regno; + const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id); + const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off); + u32 btf_id; + int ret; + + if (atype != BPF_READ) { + verbose(env, "only read is supported\n"); + return -EACCES; + } + + if (off < 0) { + verbose(env, + "R%d is ptr_%s invalid negative access: off=%d\n", + regno, tname, off); + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose(env, + "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", + regno, tname, off, tn_buf); + return -EACCES; + } + + ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id); + if (ret < 0) + return ret; + + if (ret == SCALAR_VALUE) { + mark_reg_unknown(env, regs, value_regno); + return 0; + } + mark_reg_known_zero(env, regs, value_regno); + regs[value_regno].type = PTR_TO_BTF_ID; + regs[value_regno].btf_id = btf_id; + return 0; +} + /* check whether memory at (regno + off) is accessible for t = (read | write) * if t==write, value_regno is a register which value is stored into memory * if t==read, value_regno is a register which will receive the value from memory @@ -2840,6 +2914,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; + u32 btf_id = 0; if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { @@ -2851,7 +2926,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (err < 0) return err; - err = check_ctx_access(env, insn_idx, off, size, t, ®_type); + err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf_id); + if (err) + verbose_linfo(env, insn_idx, "; "); if (!err && t == BPF_READ && value_regno >= 0) { /* ctx access returns either a scalar, or a * PTR_TO_PACKET[_META,_END]. In the latter @@ -2870,6 +2947,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * a sub-register. */ regs[value_regno].subreg_def = DEF_NOT_SUBREG; + if (reg_type == PTR_TO_BTF_ID) + regs[value_regno].btf_id = btf_id; } regs[value_regno].type = reg_type; } @@ -2929,6 +3008,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_tp_buffer_access(env, reg, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_BTF_ID) { + err = check_ptr_to_btf_access(env, regs, regno, off, size, t, + value_regno); } else { verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 44bd08f2443b..6221e8c6ecc3 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1074,7 +1074,7 @@ static bool raw_tp_prog_is_valid_access(int off, int size, return false; if (off % size != 0) return false; - return true; + return btf_ctx_access(off, size, type, prog, info); } const struct bpf_verifier_ops raw_tracepoint_verifier_ops = { From ac4414b5ca47d16c8de3134cc1b868056c4a68ea Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:01 -0700 Subject: [PATCH 17/52] bpf: Attach raw_tp program with BTF via type name BTF type id specified at program load time has all necessary information to attach that program to raw tracepoint. Use kernel type name to find raw tracepoint. Add missing CHECK_ATTR() condition. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-8-ast@kernel.org --- kernel/bpf/syscall.c | 70 +++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 23 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index b56c482c9760..523e3ac15a08 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1816,17 +1816,52 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) struct bpf_raw_tracepoint *raw_tp; struct bpf_raw_event_map *btp; struct bpf_prog *prog; - char tp_name[128]; + const char *tp_name; + char buf[128]; int tp_fd, err; - if (strncpy_from_user(tp_name, u64_to_user_ptr(attr->raw_tracepoint.name), - sizeof(tp_name) - 1) < 0) - return -EFAULT; - tp_name[sizeof(tp_name) - 1] = 0; + if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) + return -EINVAL; + + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && + prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { + err = -EINVAL; + goto out_put_prog; + } + + if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT && + prog->aux->attach_btf_id) { + if (attr->raw_tracepoint.name) { + /* raw_tp name should not be specified in raw_tp + * programs that were verified via in-kernel BTF info + */ + err = -EINVAL; + goto out_put_prog; + } + /* raw_tp name is taken from type name instead */ + tp_name = kernel_type_name(prog->aux->attach_btf_id); + /* skip the prefix */ + tp_name += sizeof("btf_trace_") - 1; + } else { + if (strncpy_from_user(buf, + u64_to_user_ptr(attr->raw_tracepoint.name), + sizeof(buf) - 1) < 0) { + err = -EFAULT; + goto out_put_prog; + } + buf[sizeof(buf) - 1] = 0; + tp_name = buf; + } btp = bpf_get_raw_tracepoint(tp_name); - if (!btp) - return -ENOENT; + if (!btp) { + err = -ENOENT; + goto out_put_prog; + } raw_tp = kzalloc(sizeof(*raw_tp), GFP_USER); if (!raw_tp) { @@ -1834,38 +1869,27 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) goto out_put_btp; } raw_tp->btp = btp; - - prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); - if (IS_ERR(prog)) { - err = PTR_ERR(prog); - goto out_free_tp; - } - if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT && - prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) { - err = -EINVAL; - goto out_put_prog; - } + raw_tp->prog = prog; err = bpf_probe_register(raw_tp->btp, prog); if (err) - goto out_put_prog; + goto out_free_tp; - raw_tp->prog = prog; tp_fd = anon_inode_getfd("bpf-raw-tracepoint", &bpf_raw_tp_fops, raw_tp, O_CLOEXEC); if (tp_fd < 0) { bpf_probe_unregister(raw_tp->btp, prog); err = tp_fd; - goto out_put_prog; + goto out_free_tp; } return tp_fd; -out_put_prog: - bpf_prog_put(prog); out_free_tp: kfree(raw_tp); out_put_btp: bpf_put_raw_tracepoint(btp); +out_put_prog: + bpf_prog_put(prog); return err; } From 2a02759ef5f8a34792df22b41d5e10658fd7bbd3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:02 -0700 Subject: [PATCH 18/52] bpf: Add support for BTF pointers to interpreter Pointer to BTF object is a pointer to kernel object or NULL. The memory access in the interpreter has to be done via probe_kernel_read to avoid page faults. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-9-ast@kernel.org --- include/linux/filter.h | 3 +++ kernel/bpf/core.c | 19 +++++++++++++++++++ kernel/bpf/verifier.c | 8 ++++++++ 3 files changed, 30 insertions(+) diff --git a/include/linux/filter.h b/include/linux/filter.h index d3d51d7aff2c..22ebea2e64ea 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -65,6 +65,9 @@ struct ctl_table_header; /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 +/* unused opcode to mark special load instruction. Same as BPF_ABS */ +#define BPF_PROBE_MEM 0x20 + /* unused opcode to mark call to interpreter with arguments */ #define BPF_CALL_ARGS 0xe0 diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 66088a9e9b9e..8a765bbd33f0 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1291,6 +1291,11 @@ bool bpf_opcode_in_insntable(u8 code) } #ifndef CONFIG_BPF_JIT_ALWAYS_ON +u64 __weak bpf_probe_read(void * dst, u32 size, const void * unsafe_ptr) +{ + memset(dst, 0, size); + return -EFAULT; +} /** * __bpf_prog_run - run eBPF program on a given context * @regs: is the array of MAX_BPF_EXT_REG eBPF pseudo-registers @@ -1310,6 +1315,10 @@ static u64 __no_fgcse ___bpf_prog_run(u64 *regs, const struct bpf_insn *insn, u6 /* Non-UAPI available opcodes. */ [BPF_JMP | BPF_CALL_ARGS] = &&JMP_CALL_ARGS, [BPF_JMP | BPF_TAIL_CALL] = &&JMP_TAIL_CALL, + [BPF_LDX | BPF_PROBE_MEM | BPF_B] = &&LDX_PROBE_MEM_B, + [BPF_LDX | BPF_PROBE_MEM | BPF_H] = &&LDX_PROBE_MEM_H, + [BPF_LDX | BPF_PROBE_MEM | BPF_W] = &&LDX_PROBE_MEM_W, + [BPF_LDX | BPF_PROBE_MEM | BPF_DW] = &&LDX_PROBE_MEM_DW, }; #undef BPF_INSN_3_LBL #undef BPF_INSN_2_LBL @@ -1542,6 +1551,16 @@ out: LDST(W, u32) LDST(DW, u64) #undef LDST +#define LDX_PROBE(SIZEOP, SIZE) \ + LDX_PROBE_MEM_##SIZEOP: \ + bpf_probe_read(&DST, SIZE, (const void *)(long) SRC); \ + CONT; + LDX_PROBE(B, 1) + LDX_PROBE(H, 2) + LDX_PROBE(W, 4) + LDX_PROBE(DW, 8) +#undef LDX_PROBE + STX_XADD_W: /* lock xadd *(u32 *)(dst_reg + off16) += src_reg */ atomic_add((u32) SRC, (atomic_t *)(unsigned long) (DST + insn->off)); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 42a463e09761..c4b6a2cfcd47 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -7581,6 +7581,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type) case PTR_TO_TCP_SOCK: case PTR_TO_TCP_SOCK_OR_NULL: case PTR_TO_XDP_SOCK: + case PTR_TO_BTF_ID: return false; default: return true; @@ -8722,6 +8723,13 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) case PTR_TO_XDP_SOCK: convert_ctx_access = bpf_xdp_sock_convert_ctx_access; break; + case PTR_TO_BTF_ID: + if (type == BPF_WRITE) { + verbose(env, "Writes through BTF pointers are not allowed\n"); + return -EINVAL; + } + insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); + continue; default: continue; } From 3dec541b2e632d630fe7142ed44f0b3702ef1f8c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:03 -0700 Subject: [PATCH 19/52] bpf: Add support for BTF pointers to x86 JIT Pointer to BTF object is a pointer to kernel object or NULL. Such pointers can only be used by BPF_LDX instructions. The verifier changed their opcode from LDX|MEM|size to LDX|PROBE_MEM|size to make JITing easier. The number of entries in extable is the number of BPF_LDX insns that access kernel memory via "pointer to BTF type". Only these load instructions can fault. Since x86 extable is relative it has to be allocated in the same memory region as JITed code. Allocate it prior to last pass of JITing and let the last pass populate it. Pointer to extable in bpf_prog_aux is necessary to make page fault handling fast. Page fault handling is done in two steps: 1. bpf_prog_kallsyms_find() finds BPF program that page faulted. It's done by walking rb tree. 2. then extable for given bpf program is binary searched. This process is similar to how page faulting is done for kernel modules. The exception handler skips over faulting x86 instruction and initializes destination register with zero. This mimics exact behavior of bpf_probe_read (when probe_kernel_read faults dest is zeroed). JITs for other architectures can add support in similar way. Until then they will reject unknown opcode and fallback to interpreter. Since extable should be aligned and placed near JITed code make bpf_jit_binary_alloc() return 4 byte aligned image offset, so that extable aligning formula in bpf_int_jit_compile() doesn't need to rely on internal implementation of bpf_jit_binary_alloc(). On x86 gcc defaults to 16-byte alignment for regular kernel functions due to better performance. JITed code may be aligned to 16 in the future, but it will use 4 in the meantime. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-10-ast@kernel.org --- arch/x86/net/bpf_jit_comp.c | 97 +++++++++++++++++++++++++++++++++++-- include/linux/bpf.h | 3 ++ include/linux/extable.h | 10 ++++ kernel/bpf/core.c | 20 +++++++- kernel/bpf/verifier.c | 1 + kernel/extable.c | 2 + 6 files changed, 128 insertions(+), 5 deletions(-) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 3ad2ba1ad855..8cd23d8309bf 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -9,7 +9,7 @@ #include #include #include - +#include #include #include @@ -123,6 +123,19 @@ static const int reg2hex[] = { [AUX_REG] = 3, /* R11 temp register */ }; +static const int reg2pt_regs[] = { + [BPF_REG_0] = offsetof(struct pt_regs, ax), + [BPF_REG_1] = offsetof(struct pt_regs, di), + [BPF_REG_2] = offsetof(struct pt_regs, si), + [BPF_REG_3] = offsetof(struct pt_regs, dx), + [BPF_REG_4] = offsetof(struct pt_regs, cx), + [BPF_REG_5] = offsetof(struct pt_regs, r8), + [BPF_REG_6] = offsetof(struct pt_regs, bx), + [BPF_REG_7] = offsetof(struct pt_regs, r13), + [BPF_REG_8] = offsetof(struct pt_regs, r14), + [BPF_REG_9] = offsetof(struct pt_regs, r15), +}; + /* * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 * which need extra byte of encoding. @@ -377,6 +390,19 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) *pprog = prog; } + +static bool ex_handler_bpf(const struct exception_table_entry *x, + struct pt_regs *regs, int trapnr, + unsigned long error_code, unsigned long fault_addr) +{ + u32 reg = x->fixup >> 8; + + /* jump over faulting load and clear dest register */ + *(unsigned long *)((void *)regs + reg) = 0; + regs->ip += x->fixup & 0xff; + return true; +} + static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int oldproglen, struct jit_context *ctx) { @@ -384,7 +410,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, int insn_cnt = bpf_prog->len; bool seen_exit = false; u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; - int i, cnt = 0; + int i, cnt = 0, excnt = 0; int proglen = 0; u8 *prog = temp; @@ -778,14 +804,17 @@ stx: if (is_imm8(insn->off)) /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: /* Emit 'movzx rax, byte ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); goto ldx; case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: /* Emit 'movzx rax, word ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); goto ldx; case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: /* Emit 'mov eax, dword ptr [rax+0x14]' */ if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); @@ -793,6 +822,7 @@ stx: if (is_imm8(insn->off)) EMIT1(0x8B); goto ldx; case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: /* Emit 'mov rax, qword ptr [rax+0x14]' */ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); ldx: /* @@ -805,6 +835,48 @@ ldx: /* else EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), insn->off); + if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { + struct exception_table_entry *ex; + u8 *_insn = image + proglen; + s64 delta; + + if (!bpf_prog->aux->extable) + break; + + if (excnt >= bpf_prog->aux->num_exentries) { + pr_err("ex gen bug\n"); + return -EFAULT; + } + ex = &bpf_prog->aux->extable[excnt++]; + + delta = _insn - (u8 *)&ex->insn; + if (!is_simm32(delta)) { + pr_err("extable->insn doesn't fit into 32-bit\n"); + return -EFAULT; + } + ex->insn = delta; + + delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler; + if (!is_simm32(delta)) { + pr_err("extable->handler doesn't fit into 32-bit\n"); + return -EFAULT; + } + ex->handler = delta; + + if (dst_reg > BPF_REG_9) { + pr_err("verifier error\n"); + return -EFAULT; + } + /* + * Compute size of x86 insn and its target dest x86 register. + * ex_handler_bpf() will use lower 8 bits to adjust + * pt_regs->ip to jump over this x86 instruction + * and upper bits to figure out which pt_regs to zero out. + * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" + * of 4 bytes will be ignored and rbx will be zero inited. + */ + ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8); + } break; /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ @@ -1058,6 +1130,11 @@ emit_jmp: addrs[i] = proglen; prog = temp; } + + if (image && excnt != bpf_prog->aux->num_exentries) { + pr_err("extable is not populated\n"); + return -EFAULT; + } return proglen; } @@ -1158,12 +1235,24 @@ out_image: break; } if (proglen == oldproglen) { - header = bpf_jit_binary_alloc(proglen, &image, - 1, jit_fill_hole); + /* + * The number of entries in extable is the number of BPF_LDX + * insns that access kernel memory via "pointer to BTF type". + * The verifier changed their opcode from LDX|MEM|size + * to LDX|PROBE_MEM|size to make JITing easier. + */ + u32 align = __alignof__(struct exception_table_entry); + u32 extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + + /* allocate module memory for x86 insns and extable */ + header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size, + &image, align, jit_fill_hole); if (!header) { prog = orig_prog; goto out_addrs; } + prog->aux->extable = (void *) image + roundup(proglen, align); } oldproglen = proglen; cond_resched(); diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 028555fcd10d..a7330d75bb94 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -24,6 +24,7 @@ struct sock; struct seq_file; struct btf; struct btf_type; +struct exception_table_entry; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -423,6 +424,8 @@ struct bpf_prog_aux { * main prog always has linfo_idx == 0 */ u32 linfo_idx; + u32 num_exentries; + struct exception_table_entry *extable; struct bpf_prog_stats __percpu *stats; union { struct work_struct work; diff --git a/include/linux/extable.h b/include/linux/extable.h index 81ecfaa83ad3..4ab9e78f313b 100644 --- a/include/linux/extable.h +++ b/include/linux/extable.h @@ -33,4 +33,14 @@ search_module_extables(unsigned long addr) } #endif /*CONFIG_MODULES*/ +#ifdef CONFIG_BPF_JIT +const struct exception_table_entry *search_bpf_extables(unsigned long addr); +#else +static inline const struct exception_table_entry * +search_bpf_extables(unsigned long addr) +{ + return NULL; +} +#endif + #endif /* _LINUX_EXTABLE_H */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 8a765bbd33f0..673f5d40a93e 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -30,7 +30,7 @@ #include #include #include - +#include #include /* Registers */ @@ -712,6 +712,24 @@ bool is_bpf_text_address(unsigned long addr) return ret; } +const struct exception_table_entry *search_bpf_extables(unsigned long addr) +{ + const struct exception_table_entry *e = NULL; + struct bpf_prog *prog; + + rcu_read_lock(); + prog = bpf_prog_kallsyms_find(addr); + if (!prog) + goto out; + if (!prog->aux->num_exentries) + goto out; + + e = search_extable(prog->aux->extable, prog->aux->num_exentries, addr); +out: + rcu_read_unlock(); + return e; +} + int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, char *sym) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4b6a2cfcd47..fba9ef6a831b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -8729,6 +8729,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) return -EINVAL; } insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); + env->prog->aux->num_exentries++; continue; default: continue; diff --git a/kernel/extable.c b/kernel/extable.c index f6c9406eec7d..f6920a11e28a 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -56,6 +56,8 @@ const struct exception_table_entry *search_exception_tables(unsigned long addr) e = search_kernel_exception_table(addr); if (!e) e = search_module_extables(addr); + if (!e) + e = search_bpf_extables(addr); return e; } From a7658e1a4164ce2b9eb4a11aadbba38586e93bd6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:04 -0700 Subject: [PATCH 20/52] bpf: Check types of arguments passed into helpers Introduce new helper that reuses existing skb perf_event output implementation, but can be called from raw_tracepoint programs that receive 'struct sk_buff *' as tracepoint argument or can walk other kernel data structures to skb pointer. In order to do that teach verifier to resolve true C types of bpf helpers into in-kernel BTF ids. The type of kernel pointer passed by raw tracepoint into bpf program will be tracked by the verifier all the way until it's passed into helper function. For example: kfree_skb() kernel function calls trace_kfree_skb(skb, loc); bpf programs receives that skb pointer and may eventually pass it into bpf_skb_output() bpf helper which in-kernel is implemented via bpf_skb_event_output() kernel function. Its first argument in the kernel is 'struct sk_buff *'. The verifier makes sure that types match all the way. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-11-ast@kernel.org --- include/linux/bpf.h | 18 ++++++--- include/uapi/linux/bpf.h | 27 +++++++++++++- kernel/bpf/btf.c | 68 ++++++++++++++++++++++++++++++++++ kernel/bpf/verifier.c | 44 ++++++++++++++-------- kernel/trace/bpf_trace.c | 4 ++ net/core/filter.c | 15 +++++++- tools/include/uapi/linux/bpf.h | 27 +++++++++++++- 7 files changed, 180 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a7330d75bb94..2c2c29b49845 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -213,6 +213,7 @@ enum bpf_arg_type { ARG_PTR_TO_INT, /* pointer to int */ ARG_PTR_TO_LONG, /* pointer to long */ ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ + ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ }; /* type of values returned from helper functions */ @@ -235,11 +236,17 @@ struct bpf_func_proto { bool gpl_only; bool pkt_access; enum bpf_return_type ret_type; - enum bpf_arg_type arg1_type; - enum bpf_arg_type arg2_type; - enum bpf_arg_type arg3_type; - enum bpf_arg_type arg4_type; - enum bpf_arg_type arg5_type; + union { + struct { + enum bpf_arg_type arg1_type; + enum bpf_arg_type arg2_type; + enum bpf_arg_type arg3_type; + enum bpf_arg_type arg4_type; + enum bpf_arg_type arg5_type; + }; + enum bpf_arg_type arg_type[5]; + }; + u32 *btf_id; /* BTF ids of arguments */ }; /* bpf_context is intentionally undefined structure. Pointer to bpf_context is @@ -765,6 +772,7 @@ int btf_struct_access(struct bpf_verifier_log *log, const struct btf_type *t, int off, int size, enum bpf_access_type atype, u32 *next_btf_id); +u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3bb2cd1de341..4af8b0819a32 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2751,6 +2751,30 @@ union bpf_attr { * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 + * + * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct sk_buff. + * + * This helper is similar to **bpf_perf_event_output**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2863,7 +2887,8 @@ union bpf_attr { FN(sk_storage_get), \ FN(sk_storage_delete), \ FN(send_signal), \ - FN(tcp_gen_syncookie), + FN(tcp_gen_syncookie), \ + FN(skb_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 271d27cd427f..f7557af39756 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3626,6 +3626,74 @@ again: return -EINVAL; } +u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg) +{ + char fnname[KSYM_SYMBOL_LEN + 4] = "btf_"; + const struct btf_param *args; + const struct btf_type *t; + const char *tname, *sym; + u32 btf_id, i; + + if (IS_ERR(btf_vmlinux)) { + bpf_log(log, "btf_vmlinux is malformed\n"); + return -EINVAL; + } + + sym = kallsyms_lookup((long)fn, NULL, NULL, NULL, fnname + 4); + if (!sym) { + bpf_log(log, "kernel doesn't have kallsyms\n"); + return -EFAULT; + } + + for (i = 1; i <= btf_vmlinux->nr_types; i++) { + t = btf_type_by_id(btf_vmlinux, i); + if (BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) + continue; + tname = __btf_name_by_offset(btf_vmlinux, t->name_off); + if (!strcmp(tname, fnname)) + break; + } + if (i > btf_vmlinux->nr_types) { + bpf_log(log, "helper %s type is not found\n", fnname); + return -ENOENT; + } + + t = btf_type_by_id(btf_vmlinux, t->type); + if (!btf_type_is_ptr(t)) + return -EFAULT; + t = btf_type_by_id(btf_vmlinux, t->type); + if (!btf_type_is_func_proto(t)) + return -EFAULT; + + args = (const struct btf_param *)(t + 1); + if (arg >= btf_type_vlen(t)) { + bpf_log(log, "bpf helper %s doesn't have %d-th argument\n", + fnname, arg); + return -EINVAL; + } + + t = btf_type_by_id(btf_vmlinux, args[arg].type); + if (!btf_type_is_ptr(t) || !t->type) { + /* anything but the pointer to struct is a helper config bug */ + bpf_log(log, "ARG_PTR_TO_BTF is misconfigured\n"); + return -EFAULT; + } + btf_id = t->type; + t = btf_type_by_id(btf_vmlinux, t->type); + /* skip modifiers */ + while (btf_type_is_modifier(t)) { + btf_id = t->type; + t = btf_type_by_id(btf_vmlinux, t->type); + } + if (!btf_type_is_struct(t)) { + bpf_log(log, "ARG_PTR_TO_BTF is not a struct\n"); + return -EFAULT; + } + bpf_log(log, "helper %s arg%d has btf_id %d struct %s\n", fnname + 4, + arg, btf_id, __btf_name_by_offset(btf_vmlinux, t->name_off)); + return btf_id; +} + void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj, struct seq_file *m) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fba9ef6a831b..556e82f8869b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -205,6 +205,7 @@ struct bpf_call_arg_meta { u64 msize_umax_value; int ref_obj_id; int func_id; + u32 btf_id; }; struct btf *btf_vmlinux; @@ -3439,6 +3440,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, expected_type = PTR_TO_SOCKET; if (type != expected_type) goto err_type; + } else if (arg_type == ARG_PTR_TO_BTF_ID) { + expected_type = PTR_TO_BTF_ID; + if (type != expected_type) + goto err_type; + if (reg->btf_id != meta->btf_id) { + verbose(env, "Helper has type %s got %s in R%d\n", + kernel_type_name(meta->btf_id), + kernel_type_name(reg->btf_id), regno); + + return -EACCES; + } + if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) { + verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", + regno); + return -EACCES; + } } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) @@ -3586,6 +3603,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_MAP_TYPE_PERF_EVENT_ARRAY: if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output && + func_id != BPF_FUNC_skb_output && func_id != BPF_FUNC_perf_event_read_value) goto error; break; @@ -3673,6 +3691,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_read: case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: + case BPF_FUNC_skb_output: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; @@ -4127,21 +4146,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn meta.func_id = func_id; /* check args */ - err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta); - if (err) - return err; - err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta); - if (err) - return err; - err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta); - if (err) - return err; - err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta); - if (err) - return err; - err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta); - if (err) - return err; + for (i = 0; i < 5; i++) { + if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) { + if (!fn->btf_id[i]) + fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i); + meta.btf_id = fn->btf_id[i]; + } + err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); + if (err) + return err; + } err = record_func_map(env, &meta, func_id, insn_idx); if (err) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6221e8c6ecc3..52f7e9d8c29b 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -995,6 +995,8 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +extern const struct bpf_func_proto bpf_skb_output_proto; + BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) { @@ -1053,6 +1055,8 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto_raw_tp; + case BPF_FUNC_skb_output: + return &bpf_skb_output_proto; case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_raw_tp; case BPF_FUNC_get_stack: diff --git a/net/core/filter.c b/net/core/filter.c index 46196e212413..728ba6203c1f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3798,7 +3798,7 @@ BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(skb_size > skb->len)) + if (unlikely(!skb || skb_size > skb->len)) return -EFAULT; return bpf_event_output(map, flags, meta, meta_size, skb, skb_size, @@ -3816,6 +3816,19 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +static u32 bpf_skb_output_btf_ids[5]; +const struct bpf_func_proto bpf_skb_output_proto = { + .func = bpf_skb_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_skb_output_btf_ids, +}; + static unsigned short bpf_tunnel_key_af(u64 flags) { return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3bb2cd1de341..4af8b0819a32 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2751,6 +2751,30 @@ union bpf_attr { * **-EOPNOTSUPP** kernel configuration does not enable SYN cookies * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 + * + * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct sk_buff. + * + * This helper is similar to **bpf_perf_event_output**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2863,7 +2887,8 @@ union bpf_attr { FN(sk_storage_get), \ FN(sk_storage_delete), \ FN(send_signal), \ - FN(tcp_gen_syncookie), + FN(tcp_gen_syncookie), \ + FN(skb_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call From 580d656d80cfe616432fddd1ec35297a1454e05a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 Oct 2019 20:25:05 -0700 Subject: [PATCH 21/52] selftests/bpf: Add kfree_skb raw_tp test Load basic cls_bpf program. Load raw_tracepoint program and attach to kfree_skb raw tracepoint. Trigger cls_bpf via prog_test_run. At the end of test_run kernel will call kfree_skb which will trigger trace_kfree_skb tracepoint. Which will call our raw_tracepoint program. Which will take that skb and will dump it into perf ring buffer. Check that user space received correct packet. Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191016032505.2089704-12-ast@kernel.org --- .../selftests/bpf/prog_tests/kfree_skb.c | 89 +++++++++++++++ tools/testing/selftests/bpf/progs/kfree_skb.c | 103 ++++++++++++++++++ 2 files changed, 192 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/kfree_skb.c create mode 100644 tools/testing/selftests/bpf/progs/kfree_skb.c diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c new file mode 100644 index 000000000000..430b50de1583 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +static void on_sample(void *ctx, int cpu, void *data, __u32 size) +{ + int ifindex = *(int *)data, duration = 0; + struct ipv6_packet *pkt_v6 = data + 4; + + if (ifindex != 1) + /* spurious kfree_skb not on loopback device */ + return; + if (CHECK(size != 76, "check_size", "size %u != 76\n", size)) + return; + if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth", + "h_proto %x\n", pkt_v6->eth.h_proto)) + return; + if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip", + "iph.nexthdr %x\n", pkt_v6->iph.nexthdr)) + return; + if (CHECK(pkt_v6->tcp.doff != 5, "check_tcp", + "tcp.doff %x\n", pkt_v6->tcp.doff)) + return; + + *(bool *)ctx = true; +} + +void test_kfree_skb(void) +{ + struct bpf_prog_load_attr attr = { + .file = "./kfree_skb.o", + }; + + struct bpf_object *obj, *obj2 = NULL; + struct perf_buffer_opts pb_opts = {}; + struct perf_buffer *pb = NULL; + struct bpf_link *link = NULL; + struct bpf_map *perf_buf_map; + struct bpf_program *prog; + __u32 duration, retval; + int err, pkt_fd, kfree_skb_fd; + bool passed = false; + + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &pkt_fd); + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + return; + + err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd); + if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno)) + goto close_prog; + + prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb"); + if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n")) + goto close_prog; + link = bpf_program__attach_raw_tracepoint(prog, NULL); + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + goto close_prog; + + perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); + if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n")) + goto close_prog; + + /* set up perf buffer */ + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + goto close_prog; + + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration); + + /* read perf buffer */ + err = perf_buffer__poll(pb, 100); + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto close_prog; + /* make sure kfree_skb program was triggered + * and it sent expected skb into ring buffer + */ + CHECK_FAIL(!passed); +close_prog: + perf_buffer__free(pb); + if (!IS_ERR_OR_NULL(link)) + bpf_link__destroy(link); + bpf_object__close(obj); + bpf_object__close(obj2); +} diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c new file mode 100644 index 000000000000..89af8a921ee4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfree_skb.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include +#include "bpf_helpers.h" +#include "bpf_endian.h" + +char _license[] SEC("license") = "GPL"; +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perf_buf_map SEC(".maps"); + +#define _(P) (__builtin_preserve_access_index(P)) + +/* define few struct-s that bpf program needs to access */ +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *head); +}; +struct dev_ifalias { + struct callback_head rcuhead; +}; + +struct net_device /* same as kernel's struct net_device */ { + int ifindex; + struct dev_ifalias *ifalias; +}; + +typedef struct { + int counter; +} atomic_t; +typedef struct refcount_struct { + atomic_t refs; +} refcount_t; + +struct sk_buff { + /* field names and sizes should match to those in the kernel */ + unsigned int len, data_len; + __u16 mac_len, hdr_len, queue_mapping; + struct net_device *dev; + /* order of the fields doesn't matter */ + refcount_t users; + unsigned char *data; + char __pkt_type_offset[0]; +}; + +/* copy arguments from + * include/trace/events/skb.h: + * TRACE_EVENT(kfree_skb, + * TP_PROTO(struct sk_buff *skb, void *location), + * + * into struct below: + */ +struct trace_kfree_skb { + struct sk_buff *skb; + void *location; +}; + +SEC("tp_btf/kfree_skb") +int trace_kfree_skb(struct trace_kfree_skb *ctx) +{ + struct sk_buff *skb = ctx->skb; + struct net_device *dev; + int ifindex; + struct callback_head *ptr; + void *func; + int users; + unsigned char *data; + unsigned short pkt_data; + char pkt_type; + + __builtin_preserve_access_index(({ + users = skb->users.refs.counter; + data = skb->data; + dev = skb->dev; + ifindex = dev->ifindex; + ptr = dev->ifalias->rcuhead.next; + func = ptr->func; + })); + + bpf_probe_read(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset)); + pkt_type &= 7; + + /* read eth proto */ + bpf_probe_read(&pkt_data, sizeof(pkt_data), data + 12); + + bpf_printk("rcuhead.next %llx func %llx\n", ptr, func); + bpf_printk("skb->len %d users %d pkt_type %x\n", + _(skb->len), users, pkt_type); + bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping)); + bpf_printk("dev->ifindex %d data %llx pkt_data %x\n", + ifindex, data, pkt_data); + + if (users != 1 || pkt_data != bpf_htons(0x86dd) || ifindex != 1) + /* raw tp ignores return value */ + return 0; + + /* send first 72 byte of the packet to user space */ + bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU, + &ifindex, sizeof(ifindex)); + return 0; +} From 8d285a3b2e83796d33ec3674b25fe0bfcc647e92 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 17 Oct 2019 10:37:52 +0200 Subject: [PATCH 22/52] selftests/bpf: Restore the netns after flow dissector reattach test flow_dissector_reattach test changes the netns we run in but does not restore it to the one we started in when finished. This interferes with tests that run after it. Fix it by restoring the netns when done. Fixes: f97eea1756f3 ("selftests/bpf: Check that flow dissector can be re-attached") Reported-by: Alexei Starovoitov Reported-by: Andrii Nakryiko Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20191017083752.30999-1-jakub@cloudflare.com --- .../bpf/prog_tests/flow_dissector_reattach.c | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c index 777faffc4639..1f51ba66b98b 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c @@ -91,12 +91,18 @@ out_close: void test_flow_dissector_reattach(void) { - int init_net, err; + int init_net, self_net, err; + + self_net = open("/proc/self/ns/net", O_RDONLY); + if (CHECK_FAIL(self_net < 0)) { + perror("open(/proc/self/ns/net"); + return; + } init_net = open("/proc/1/ns/net", O_RDONLY); if (CHECK_FAIL(init_net < 0)) { perror("open(/proc/1/ns/net)"); - return; + goto out_close; } err = setns(init_net, CLONE_NEWNET); @@ -108,7 +114,7 @@ void test_flow_dissector_reattach(void) if (is_attached(init_net)) { test__skip(); printf("Can't test with flow dissector attached to init_net\n"); - return; + goto out_setns; } /* First run tests in root network namespace */ @@ -118,10 +124,17 @@ void test_flow_dissector_reattach(void) err = unshare(CLONE_NEWNET); if (CHECK_FAIL(err)) { perror("unshare(CLONE_NEWNET)"); - goto out_close; + goto out_setns; } do_flow_dissector_reattach(); +out_setns: + /* Move back to netns we started in. */ + err = setns(self_net, CLONE_NEWNET); + if (CHECK_FAIL(err)) + perror("setns(/proc/self/ns/net)"); + out_close: close(init_net); + close(self_net); } From 0b6e71c398a947a335622ad52807e500e1b5fefa Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 23:00:45 -0700 Subject: [PATCH 23/52] selftests/bpf: Teach test_progs to cd into subdir We are building a bunch of "flavors" of test_progs, e.g., w/ alu32 flag for Clang when building BPF object. test_progs setup is relying on having all the BPF object files and extra resources to be available in current working directory, though. But we actually build all these files into a separate sub-directory. Next set of patches establishes convention of naming "flavored" test_progs (and test runner binaries in general) as test_progs-flavor (e.g., test_progs-alu32), for each such extra flavor. This patch teaches test_progs binary to automatically detect its own extra flavor based on its argv[0], and if present, to change current directory to a flavor-specific subdirectory. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191016060051.2024182-2-andriin@fb.com --- tools/testing/selftests/bpf/test_progs.c | 33 +++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index af75a1c7a458..c7e52f4194e2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -306,7 +306,7 @@ void *spin_lock_thread(void *arg) } /* extern declarations for test funcs */ -#define DEFINE_TEST(name) extern void test_##name(); +#define DEFINE_TEST(name) extern void test_##name(void); #include #undef DEFINE_TEST @@ -518,6 +518,33 @@ static void stdio_restore(void) #endif } +/* + * Determine if test_progs is running as a "flavored" test runner and switch + * into corresponding sub-directory to load correct BPF objects. + * + * This is done by looking at executable name. If it contains "-flavor" + * suffix, then we are running as a flavored test runner. + */ +int cd_flavor_subdir(const char *exec_name) +{ + /* General form of argv[0] passed here is: + * some/path/to/test_progs[-flavor], where -flavor part is optional. + * First cut out "test_progs[-flavor]" part, then extract "flavor" + * part, if it's there. + */ + const char *flavor = strrchr(exec_name, '/'); + + if (!flavor) + return 0; + flavor++; + flavor = strrchr(flavor, '-'); + if (!flavor) + return 0; + flavor++; + printf("Switching to flavor '%s' subdirectory...\n", flavor); + return chdir(flavor); +} + int main(int argc, char **argv) { static const struct argp argp = { @@ -531,6 +558,10 @@ int main(int argc, char **argv) if (err) return err; + err = cd_flavor_subdir(argv[0]); + if (err) + return err; + libbpf_set_print(libbpf_print_fn); srand(time(NULL)); From d25c5e23552d54ebb9eea0de0d8cf9b7a7c5535c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 23:00:46 -0700 Subject: [PATCH 24/52] selftests/bpf: Make CO-RE reloc test impartial to test_progs flavor test_core_reloc_kernel test captures its own process name and validates it as part of the test. Given extra "flavors" of test_progs, this break for anything by default test_progs binary. Fix the test to cut out flavor part of the process name. Fixes: ee2eb063d330 ("selftests/bpf: Add BPF_CORE_READ and BPF_CORE_READ_STR_INTO macro tests") Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191016060051.2024182-3-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 4 ++-- tools/testing/selftests/bpf/progs/core_reloc_types.h | 2 +- tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 7e2f5b4bf7f3..2b3586dc6c86 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -211,8 +211,8 @@ static struct core_reloc_test_case test_cases[] = { .input_len = 0, .output = STRUCT_TO_CHAR_PTR(core_reloc_kernel_output) { .valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, }, - .comm = "test_progs\0\0\0\0\0", - .comm_len = 11, + .comm = "test_progs", + .comm_len = sizeof("test_progs"), }, .output_len = sizeof(struct core_reloc_kernel_output), }, diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index ad763ec0ba8f..f5939d9d5c61 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -6,7 +6,7 @@ struct core_reloc_kernel_output { int valid[10]; - char comm[16]; + char comm[sizeof("test_progs")]; int comm_len; }; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index 50f609618b65..a4b5e0562ed5 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -15,7 +15,8 @@ static volatile struct data { struct core_reloc_kernel_output { int valid[10]; - char comm[16]; + /* we have test_progs[-flavor], so cut flavor part */ + char comm[sizeof("test_progs")]; int comm_len; }; From ee6c52e92dd0dc72f08d2546eca037ee9606ddb3 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 23:00:47 -0700 Subject: [PATCH 25/52] selftests/bpf: Switch test_maps to test_progs' test.h format Make test_maps use tests.h header format consistent with the one used by test_progs, to facilitate unification. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191016060051.2024182-4-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 8 +------- tools/testing/selftests/bpf/test_maps.c | 8 ++++---- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 00d05c5e2d57..5f97262e5fcb 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -249,14 +249,8 @@ $(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_FILES) | $(MAP_TESTS_H) $(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) $(shell ( cd map_tests/; \ echo '/* Generated header, do not edit */'; \ - echo '#ifdef DECLARE'; \ ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@extern void test_\1(void);@'; \ - echo '#endif'; \ - echo '#ifdef CALL'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@test_\1();@'; \ - echo '#endif' \ + sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ ) > $(MAP_TESTS_H)) VERIFIER_TESTS_DIR = $(OUTPUT)/verifier diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index e1f1becda529..806b298397d3 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1717,9 +1717,9 @@ static void run_all_tests(void) test_map_in_map(); } -#define DECLARE +#define DEFINE_TEST(name) extern void test_##name(void); #include -#undef DECLARE +#undef DEFINE_TEST int main(void) { @@ -1731,9 +1731,9 @@ int main(void) map_flags = BPF_F_NO_PREALLOC; run_all_tests(); -#define CALL +#define DEFINE_TEST(name) test_##name(); #include -#undef CALL +#undef DEFINE_TEST printf("test_maps: OK, %d SKIPPED\n", skips); return 0; From 03dcb78460c294a907eeeec1b756cfcd161d2075 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 23:00:48 -0700 Subject: [PATCH 26/52] selftests/bpf: Add simple per-test targets to Makefile Currently it's impossible to do `make test_progs` and have only test_progs be built, because all the binary targets are defined in terms of $(OUTPUT)/, and $(OUTPUT) is absolute path to current directory (or whatever gets overridden to by user). This patch adds simple re-directing targets for all test targets making it possible to do simple and nice `make test_progs` (and any other target). Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191016060051.2024182-5-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 5f97262e5fcb..fbced23935cc 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -84,6 +84,16 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_use include ../lib.mk +# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# to build individual tests. +# NOTE: Semicolon at the end is critical to override lib.mk's default static +# rule for binaries. +$(notdir $(TEST_GEN_PROGS) \ + $(TEST_PROGS) \ + $(TEST_PROGS_EXTENDED) \ + $(TEST_GEN_PROGS_EXTENDED) \ + $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; + # NOTE: $(OUTPUT) won't get default value if used before lib.mk TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read all: $(TEST_CUSTOM_PROGS) From 74b5a5968fe8742205a86234bb99d493bcd5ecee Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 23:00:49 -0700 Subject: [PATCH 27/52] selftests/bpf: Replace test_progs and test_maps w/ general rule Define test runner generation meta-rule that codifies dependencies between test runner, its tests, and its dependent BPF programs. Use that for defining test_progs and test_maps test-runners. Also additionally define 2 flavors of test_progs: - alu32, which builds BPF programs with 32-bit registers codegen; - bpf_gcc, which build BPF programs using GCC, if it supports BPF target. Overall, this is accomplished through $(eval)'ing a set of generic rules, which defines Makefile targets dynamically at runtime. See comments explaining the need for 2 $(evals), though. For each test runner we have (test_maps and test_progs, currently), and, optionally, their flavors, the logic of build process is modeled as follows (using test_progs as an example): - all BPF objects are in progs/: - BPF object's .o file is built into output directory from corresponding progs/.c file; - all BPF objects in progs/*.c depend on all progs/*.h headers; - all BPF objects depend on bpf_*.h helpers from libbpf (but not libbpf archive). There is an extra rule to trigger bpf_helper_defs.h (re-)build, if it's not present/outdated); - build recipe for BPF object can be re-defined per test runner/flavor; - test files are built from prog_tests/*.c: - all such test file objects are built on individual file basis; - currently, every single test file depends on all BPF object files; this might be improved in follow up patches to do 1-to-1 dependency, but allowing to customize this per each individual test; - each test runner definition can specify a list of extra .c and .h files to be built along test files and test runner binary; all such headers are becoming automatic dependency of each test .c file; - due to test files sometimes embedding (using .incbin assembly directive) contents of some BPF objects at compilation time, which are expected to be in CWD of compiler, compilation for test file object does cd into test runner's output directory; to support this mode all the include paths are turned into absolute paths using $(abspath) make function; - prog_tests/test.h is automatically (re-)generated with an entry for each .c file in prog_tests/; - final test runner binary is linked together from test object files and extra object files, linking together libbpf's archive as well; - it's possible to specify extra "resource" files/targets, which will be copied into test runner output directory, if it differes from Makefile-wide $(OUTPUT). This is used to ensure btf_dump test cases and urandom_read binary is put into a test runner's CWD for tests to find them in runtime. For flavored test runners, their output directory is a subdirectory of common Makefile-wide $(OUTPUT) directory with flavor name used as subdirectory name. BPF objects targets might be reused between different test runners, so extra checks are employed to not double-define them. Similarly, we have redefinition guards for output directories and test headers. test_verifier follows slightly different patterns and is simple enough to not justify generalizing TEST_RUNNER_DEFINE/TEST_RUNNER_DEFINE_RULES further to accomodate these differences. Instead, rules for test_verifier are minimized and simplified, while preserving correctness of dependencies. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191016060051.2024182-6-andriin@fb.com --- tools/testing/selftests/bpf/.gitignore | 5 +- tools/testing/selftests/bpf/Makefile | 303 ++++++++++++++----------- 2 files changed, 175 insertions(+), 133 deletions(-) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 7470327edcfe..c51f356f84b5 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -7,7 +7,7 @@ FEATURE-DUMP.libbpf fixdep test_align test_dev_cgroup -test_progs +/test_progs* test_tcpbpf_user test_verifier_log feature @@ -33,9 +33,10 @@ test_tcpnotify_user test_libbpf test_tcp_check_syncookie_user test_sysctl -alu32 libbpf.pc libbpf.so.* test_hashmap test_btf_dump xdping +/alu32 +/bpf_gcc diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index fbced23935cc..c9f43d49eac9 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -2,10 +2,12 @@ include ../../../../scripts/Kbuild.include include ../../../scripts/Makefile.arch -LIBDIR := ../../../lib +CURDIR := $(abspath .) +LIBDIR := $(abspath ../../../lib) BPFDIR := $(LIBDIR)/bpf -APIDIR := ../../../include/uapi -GENDIR := ../../../../include/generated +TOOLSDIR := $(abspath ../../../include) +APIDIR := $(TOOLSDIR)/uapi +GENDIR := $(abspath ../../../../include/generated) GENHDR := $(GENDIR)/autoconf.h ifneq ($(wildcard $(GENHDR)),) @@ -16,8 +18,9 @@ CLANG ?= clang LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) -CFLAGS += -g -Wall -O2 -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) -I$(GENDIR) $(GENFLAGS) -I../../../include \ - -Dbpf_prog_load=bpf_prog_test_load \ +CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \ + -I$(GENDIR) -I$(TOOLSDIR) -I$(CURDIR) \ + -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program LDLIBS += -lcap -lelf -lrt -lpthread @@ -29,12 +32,6 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ test_cgroup_attach xdping -BPF_OBJ_FILES = $(patsubst %.c,%.o, $(notdir $(wildcard progs/*.c))) -TEST_GEN_FILES = $(BPF_OBJ_FILES) - -BTF_C_FILES = $(wildcard progs/btf_dump_test_case_*.c) -TEST_FILES = $(BTF_C_FILES) - # Also test sub-register code-gen if LLVM has eBPF v3 processor support which # contains both ALU32 and JMP32 instructions. SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ @@ -42,13 +39,17 @@ SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \ grep 'if w') ifneq ($(SUBREG_CODEGEN),) -TEST_GEN_FILES += $(patsubst %.o,alu32/%.o, $(BPF_OBJ_FILES)) +TEST_GEN_PROGS += test_progs-alu32 endif +# Also test bpf-gcc, if present ifneq ($(BPF_GCC),) -TEST_GEN_FILES += $(patsubst %.o,bpf_gcc/%.o, $(BPF_OBJ_FILES)) +TEST_GEN_PROGS += test_progs-bpf_gcc endif +TEST_GEN_FILES = +TEST_FILES = + # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ test_libbpf.sh \ @@ -82,6 +83,8 @@ TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_use flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user +TEST_CUSTOM_PROGS = urandom_read + include ../lib.mk # Define simple and short `make test_progs`, `make test_sysctl`, etc targets @@ -94,21 +97,12 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; -# NOTE: $(OUTPUT) won't get default value if used before lib.mk -TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read -all: $(TEST_CUSTOM_PROGS) - -$(OUTPUT)/urandom_read: $(OUTPUT)/%: %.c +$(OUTPUT)/urandom_read: urandom_read.c $(CC) -o $@ $< -Wl,--build-id -$(OUTPUT)/test_stub.o: test_stub.c - $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) -c -o $@ $< - BPFOBJ := $(OUTPUT)/libbpf.a -$(TEST_GEN_PROGS): $(OUTPUT)/test_stub.o $(BPFOBJ) - -$(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(OUTPUT)/libbpf.a +$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ) $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c $(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c @@ -118,7 +112,6 @@ $(OUTPUT)/test_socket_cookie: cgroup_helpers.c $(OUTPUT)/test_sockmap: cgroup_helpers.c $(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c $(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c -$(OUTPUT)/test_progs: cgroup_helpers.c trace_helpers.c $(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c @@ -134,6 +127,10 @@ force: $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ +BPF_HELPERS := $(BPFDIR)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h) +$(BPFDIR)/bpf_helper_defs.h: + $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ bpf_helper_defs.h + # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. @@ -144,10 +141,11 @@ define get_sys_includes $(shell $(1) -v -E - &1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') endef + CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG)) BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ - -I. -I./include/uapi -I../../../include/uapi \ - -I$(BPFDIR) -I$(OUTPUT)/../usr/include + -I. -I./include/uapi -I$(APIDIR) \ + -I$(BPFDIR) -I$(abspath $(OUTPUT)/../usr/include) CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ -Wno-compare-distinct-pointer-types @@ -159,127 +157,170 @@ $(OUTPUT)/test_queue_map.o: test_queue_stack_map.h $(OUTPUT)/test_stack_map.o: test_queue_stack_map.h $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h -$(OUTPUT)/test_progs.o: flow_dissector_load.h -TEST_PROGS_CFLAGS := -I. -I$(OUTPUT) -TEST_MAPS_CFLAGS := -I. -I$(OUTPUT) -TEST_VERIFIER_CFLAGS := -I. -I$(OUTPUT) -Iverifier +# Build BPF object using Clang +# $1 - input .c file +# $2 - output .o file +# $3 - CFLAGS +# $4 - LDFLAGS +define CLANG_BPF_BUILD_RULE + ($(CLANG) $3 -O2 -target bpf -emit-llvm \ + -c $1 -o - || echo "BPF obj compilation failed") | \ + $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 +endef +# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC +define CLANG_NATIVE_BPF_BUILD_RULE + ($(CLANG) $3 -O2 -emit-llvm \ + -c $1 -o - || echo "BPF obj compilation failed") | \ + $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 +endef +# Build BPF object using GCC +define GCC_BPF_BUILD_RULE + $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 +endef -ifneq ($(SUBREG_CODEGEN),) -ALU32_BUILD_DIR = $(OUTPUT)/alu32 -TEST_CUSTOM_PROGS += $(ALU32_BUILD_DIR)/test_progs_32 -$(ALU32_BUILD_DIR): - mkdir -p $@ +# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on +# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. +# Parameters: +# $1 - test runner base binary name (e.g., test_progs) +# $2 - test runner extra "flavor" (e.g., alu32, gcc-bpf, etc) +define DEFINE_TEST_RUNNER -$(ALU32_BUILD_DIR)/urandom_read: $(OUTPUT)/urandom_read | $(ALU32_BUILD_DIR) - cp $< $@ +TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 +TRUNNER_BINARY := $1$(if $2,-)$2 +TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o, \ + $$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c))) +TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ + $$(filter %.c,$(TRUNNER_EXTRA_SOURCES))) +TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES)) +TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h +TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ + $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))) -$(ALU32_BUILD_DIR)/test_progs_32: test_progs.c $(OUTPUT)/libbpf.a\ - $(ALU32_BUILD_DIR)/urandom_read \ - | $(ALU32_BUILD_DIR) - $(CC) $(TEST_PROGS_CFLAGS) $(CFLAGS) \ - -o $(ALU32_BUILD_DIR)/test_progs_32 \ - test_progs.c test_stub.c cgroup_helpers.c trace_helpers.c prog_tests/*.c \ - $(OUTPUT)/libbpf.a $(LDLIBS) +# Evaluate rules now with extra TRUNNER_XXX variables above already defined +$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2)) -$(ALU32_BUILD_DIR)/test_progs_32: $(PROG_TESTS_H) -$(ALU32_BUILD_DIR)/test_progs_32: prog_tests/*.c +endef -$(ALU32_BUILD_DIR)/%.o: progs/%.c $(ALU32_BUILD_DIR)/test_progs_32 \ - | $(ALU32_BUILD_DIR) - ($(CLANG) $(BPF_CFLAGS) $(CLANG_CFLAGS) -O2 -target bpf -emit-llvm \ - -c $< -o - || echo "clang failed") | \ - $(LLC) -march=bpf -mcpu=probe -mattr=+alu32 $(LLC_FLAGS) \ - -filetype=obj -o $@ +# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and +# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with: +# $1 - test runner base binary name (e.g., test_progs) +# $2 - test runner extra "flavor" (e.g., alu32, gcc-bpf, etc) +define DEFINE_TEST_RUNNER_RULES + +ifeq ($($(TRUNNER_OUTPUT)-dir),) +$(TRUNNER_OUTPUT)-dir := y +$(TRUNNER_OUTPUT): + mkdir -p $$@ endif +# ensure we set up BPF objects generation rule just once for a given +# input/output directory combination +ifeq ($($(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs),) +$(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y +$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ + $(TRUNNER_BPF_PROGS_DIR)/%.c \ + $(TRUNNER_BPF_PROGS_DIR)/*.h \ + $$(BPF_HELPERS) | $(TRUNNER_OUTPUT) + $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ + $(TRUNNER_BPF_CFLAGS), \ + $(TRUNNER_BPF_LDFLAGS)) +endif + +# ensure we set up tests.h header generation rule just once +ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) +$(TRUNNER_TESTS_DIR)-tests-hdr := y +$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c + $$(shell ( cd $(TRUNNER_TESTS_DIR); \ + echo '/* Generated header, do not edit */'; \ + ls *.c 2> /dev/null | \ + sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ + ) > $$@) +endif + +# compile individual test files +# Note: we cd into output directory to ensure embedded BPF object is found +$(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ + $(TRUNNER_TESTS_DIR)/%.c \ + $(TRUNNER_EXTRA_HDRS) \ + $(TRUNNER_BPF_OBJS) \ + $$(BPFOBJ) | $(TRUNNER_OUTPUT) + cd $$(@D) && $$(CC) $$(CFLAGS) $$(LDLIBS) -c $(CURDIR)/$$< -o $$(@F) + +$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ + %.c \ + $(TRUNNER_EXTRA_HDRS) \ + $(TRUNNER_TESTS_HDR) \ + $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $$(CC) $$(CFLAGS) $$(LDLIBS) -c $$< -o $$@ + +$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) +ifneq ($2,) + # only copy extra resources if in flavored build + cp -a $$^ $(TRUNNER_OUTPUT)/ +endif + +$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ + $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ + | $(TRUNNER_BINARY)-extras + $$(CC) $$(CFLAGS) $$(LDLIBS) $$(filter %.a %.o,$$^) -o $$@ + +endef + +# Define test_progs test runner. +TRUNNER_TESTS_DIR := prog_tests +TRUNNER_BPF_PROGS_DIR := progs +TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ + flow_dissector_load.h +TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ + $(wildcard progs/btf_dump_test_case_*.c) +TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE +TRUNNER_BPF_CFLAGS := -I. -I$(OUTPUT) $(BPF_CFLAGS) $(CLANG_CFLAGS) +TRUNNER_BPF_LDFLAGS := +$(eval $(call DEFINE_TEST_RUNNER,test_progs)) + +# Define test_progs-alu32 test runner. +ifneq ($(SUBREG_CODEGEN),) +TRUNNER_BPF_LDFLAGS += -mattr=+alu32 +$(eval $(call DEFINE_TEST_RUNNER,test_progs,alu32)) +endif + +# Define test_progs BPF-GCC-flavored test runner. ifneq ($(BPF_GCC),) -GCC_SYS_INCLUDES = $(call get_sys_includes,gcc) IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - /dev/null | \ - sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ - ) > $(PROG_TESTS_H)) - -MAP_TESTS_DIR = $(OUTPUT)/map_tests -$(MAP_TESTS_DIR): - mkdir -p $@ -MAP_TESTS_H := $(MAP_TESTS_DIR)/tests.h -MAP_TESTS_FILES := $(wildcard map_tests/*.c) -test_maps.c: $(MAP_TESTS_H) -$(OUTPUT)/test_maps: CFLAGS += $(TEST_MAPS_CFLAGS) -$(OUTPUT)/test_maps: test_maps.c $(MAP_TESTS_FILES) | $(MAP_TESTS_H) -$(MAP_TESTS_H): $(MAP_TESTS_FILES) | $(MAP_TESTS_DIR) - $(shell ( cd map_tests/; \ - echo '/* Generated header, do not edit */'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \ - ) > $(MAP_TESTS_H)) - -VERIFIER_TESTS_DIR = $(OUTPUT)/verifier -$(VERIFIER_TESTS_DIR): - mkdir -p $@ -VERIFIER_TESTS_H := $(VERIFIER_TESTS_DIR)/tests.h -VERIFIER_TEST_FILES := $(wildcard verifier/*.c) -test_verifier.c: $(VERIFIER_TESTS_H) -$(OUTPUT)/test_verifier: CFLAGS += $(TEST_VERIFIER_CFLAGS) -$(OUTPUT)/test_verifier: test_verifier.c | $(VERIFIER_TEST_FILES) $(VERIFIER_TESTS_H) -$(VERIFIER_TESTS_H): $(VERIFIER_TEST_FILES) | $(VERIFIER_TESTS_DIR) +# Define test_verifier test runner. +# It is much simpler than test_maps/test_progs and sufficiently different from +# them (e.g., test.h is using completely pattern), that it's worth just +# explicitly defining all the rules explicitly. +verifier/tests.h: verifier/*.c $(shell ( cd verifier/; \ echo '/* Generated header, do not edit */'; \ echo '#ifdef FILL_ARRAY'; \ - ls *.c 2> /dev/null | \ - sed -e 's@\(.*\)@#include \"\1\"@'; \ + ls *.c 2> /dev/null | sed -e 's@\(.*\)@#include \"\1\"@'; \ echo '#endif' \ - ) > $(VERIFIER_TESTS_H)) + ) > verifier/tests.h) +$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) + $(CC) $(CFLAGS) $(LDLIBS) $(filter %.a %.o %.c,$^) -o $@ -EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(ALU32_BUILD_DIR) $(BPF_GCC_BUILD_DIR) \ - $(VERIFIER_TESTS_H) $(PROG_TESTS_H) $(MAP_TESTS_H) \ - feature +EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ + prog_tests/tests.h map_tests/tests.h verifier/tests.h \ + feature $(OUTPUT)/*.o $(OUTPUT)/alu32 $(OUTPUT)/bpf_gcc From 5ac93074b581984d67926d48b2c601b12b35a0f9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 23:00:50 -0700 Subject: [PATCH 28/52] selftests/bpf: Move test_queue_stack_map.h into progs/ where it belongs test_queue_stack_map.h is used only from BPF programs. Thus it should be part of progs/ subdir. An added benefit of moving it there is that new TEST_RUNNER_DEFINE_RULES macro-rule will properly capture dependency on this header for all BPF objects and trigger re-build, if it changes. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191016060051.2024182-7-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 3 --- tools/testing/selftests/bpf/{ => progs}/test_queue_stack_map.h | 0 2 files changed, 3 deletions(-) rename tools/testing/selftests/bpf/{ => progs}/test_queue_stack_map.h (100%) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c9f43d49eac9..16056e5d399d 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -153,9 +153,6 @@ CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline -$(OUTPUT)/test_queue_map.o: test_queue_stack_map.h -$(OUTPUT)/test_stack_map.o: test_queue_stack_map.h - $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # Build BPF object using Clang diff --git a/tools/testing/selftests/bpf/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h similarity index 100% rename from tools/testing/selftests/bpf/test_queue_stack_map.h rename to tools/testing/selftests/bpf/progs/test_queue_stack_map.h From cb79a4e1b80b191779c68b9f04f7e43c226ce076 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 15 Oct 2019 23:00:51 -0700 Subject: [PATCH 29/52] selftest/bpf: Remove test_libbpf.sh and test_libbpf_open test_progs is much more sophisticated superset of tests compared to test_libbpf.sh and test_libbpf_open. Remove test_libbpf.sh and test_libbpf_open. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191016060051.2024182-8-andriin@fb.com --- tools/testing/selftests/bpf/.gitignore | 1 - tools/testing/selftests/bpf/Makefile | 3 +- tools/testing/selftests/bpf/test_libbpf.sh | 43 ------ .../testing/selftests/bpf/test_libbpf_open.c | 144 ------------------ 4 files changed, 1 insertion(+), 190 deletions(-) delete mode 100755 tools/testing/selftests/bpf/test_libbpf.sh delete mode 100644 tools/testing/selftests/bpf/test_libbpf_open.c diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index c51f356f84b5..6f46170e09c1 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -11,7 +11,6 @@ test_dev_cgroup test_tcpbpf_user test_verifier_log feature -test_libbpf_open test_sock test_sock_addr test_sock_fields diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 16056e5d399d..4ff5f4aada08 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -52,7 +52,6 @@ TEST_FILES = # Order correspond to 'make run_tests' order TEST_PROGS := test_kmod.sh \ - test_libbpf.sh \ test_xdp_redirect.sh \ test_xdp_meta.sh \ test_xdp_veth.sh \ @@ -79,7 +78,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ test_xdp_vlan.sh # Compile but not part of 'make run_tests' -TEST_GEN_PROGS_EXTENDED = test_libbpf_open test_sock_addr test_skb_cgroup_id_user \ +TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ test_lirc_mode2_user diff --git a/tools/testing/selftests/bpf/test_libbpf.sh b/tools/testing/selftests/bpf/test_libbpf.sh deleted file mode 100755 index 2989b2e2d856..000000000000 --- a/tools/testing/selftests/bpf/test_libbpf.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -export TESTNAME=test_libbpf - -# Determine selftest success via shell exit code -exit_handler() -{ - if [ $? -eq 0 ]; then - echo "selftests: $TESTNAME [PASS]"; - else - echo "$TESTNAME: failed at file $LAST_LOADED" 1>&2 - echo "selftests: $TESTNAME [FAILED]"; - fi -} - -libbpf_open_file() -{ - LAST_LOADED=$1 - if [ -n "$VERBOSE" ]; then - ./test_libbpf_open $1 - else - ./test_libbpf_open --quiet $1 - fi -} - -# Exit script immediately (well catched by trap handler) if any -# program/thing exits with a non-zero status. -set -e - -# (Use 'trap -l' to list meaning of numbers) -trap exit_handler 0 2 3 6 9 - -libbpf_open_file test_l4lb.o - -# Load a program with BPF-to-BPF calls -libbpf_open_file test_l4lb_noinline.o - -# Load a program compiled without the "-target bpf" flag -libbpf_open_file test_xdp.o - -# Success -exit 0 diff --git a/tools/testing/selftests/bpf/test_libbpf_open.c b/tools/testing/selftests/bpf/test_libbpf_open.c deleted file mode 100644 index 9e9db202d218..000000000000 --- a/tools/testing/selftests/bpf/test_libbpf_open.c +++ /dev/null @@ -1,144 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 - * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. - */ -static const char *__doc__ = - "Libbpf test program for loading BPF ELF object files"; - -#include -#include -#include -#include -#include -#include - -#include "bpf_rlimit.h" - -static const struct option long_options[] = { - {"help", no_argument, NULL, 'h' }, - {"debug", no_argument, NULL, 'D' }, - {"quiet", no_argument, NULL, 'q' }, - {0, 0, NULL, 0 } -}; - -static void usage(char *argv[]) -{ - int i; - - printf("\nDOCUMENTATION:\n%s\n\n", __doc__); - printf(" Usage: %s (options-see-below) BPF_FILE\n", argv[0]); - printf(" Listing options:\n"); - for (i = 0; long_options[i].name != 0; i++) { - printf(" --%-12s", long_options[i].name); - printf(" short-option: -%c", - long_options[i].val); - printf("\n"); - } - printf("\n"); -} - -static bool debug = 0; -static int libbpf_debug_print(enum libbpf_print_level level, - const char *fmt, va_list args) -{ - if (level == LIBBPF_DEBUG && !debug) - return 0; - - fprintf(stderr, "[%d] ", level); - return vfprintf(stderr, fmt, args); -} - -#define EXIT_FAIL_LIBBPF EXIT_FAILURE -#define EXIT_FAIL_OPTION 2 - -int test_walk_progs(struct bpf_object *obj, bool verbose) -{ - struct bpf_program *prog; - int cnt = 0; - - bpf_object__for_each_program(prog, obj) { - cnt++; - if (verbose) - printf("Prog (count:%d) section_name: %s\n", cnt, - bpf_program__title(prog, false)); - } - return 0; -} - -int test_walk_maps(struct bpf_object *obj, bool verbose) -{ - struct bpf_map *map; - int cnt = 0; - - bpf_object__for_each_map(map, obj) { - cnt++; - if (verbose) - printf("Map (count:%d) name: %s\n", cnt, - bpf_map__name(map)); - } - return 0; -} - -int test_open_file(char *filename, bool verbose) -{ - struct bpf_object *bpfobj = NULL; - long err; - - if (verbose) - printf("Open BPF ELF-file with libbpf: %s\n", filename); - - /* Load BPF ELF object file and check for errors */ - bpfobj = bpf_object__open(filename); - err = libbpf_get_error(bpfobj); - if (err) { - char err_buf[128]; - libbpf_strerror(err, err_buf, sizeof(err_buf)); - if (verbose) - printf("Unable to load eBPF objects in file '%s': %s\n", - filename, err_buf); - return EXIT_FAIL_LIBBPF; - } - test_walk_progs(bpfobj, verbose); - test_walk_maps(bpfobj, verbose); - - if (verbose) - printf("Close BPF ELF-file with libbpf: %s\n", - bpf_object__name(bpfobj)); - bpf_object__close(bpfobj); - - return 0; -} - -int main(int argc, char **argv) -{ - char filename[1024] = { 0 }; - bool verbose = 1; - int longindex = 0; - int opt; - - libbpf_set_print(libbpf_debug_print); - - /* Parse commands line args */ - while ((opt = getopt_long(argc, argv, "hDq", - long_options, &longindex)) != -1) { - switch (opt) { - case 'D': - debug = 1; - break; - case 'q': /* Use in scripting mode */ - verbose = 0; - break; - case 'h': - default: - usage(argv); - return EXIT_FAIL_OPTION; - } - } - if (optind >= argc) { - usage(argv); - printf("ERROR: Expected BPF_FILE argument after options\n"); - return EXIT_FAIL_OPTION; - } - snprintf(filename, sizeof(filename), "%s", argv[optind]); - - return test_open_file(filename, verbose); -} From c108e3c1bdbd0783d7c19ee80abb0591f79029e8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 17 Oct 2019 23:09:33 -0700 Subject: [PATCH 30/52] bpf: Fix bpf_attr.attach_btf_id check Only raw_tracepoint program type can have bpf_attr.attach_btf_id >= 0. Make sure to reject other program types that accidentally set it to non-zero. Fixes: ccfe29eb29c2 ("bpf: Add attach_btf_id attribute to program load") Reported-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20191018060933.2950231-1-ast@kernel.org --- kernel/bpf/syscall.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 523e3ac15a08..16ea3c0db4f6 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1570,6 +1570,17 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, enum bpf_attach_type expected_attach_type, u32 btf_id) { + switch (prog_type) { + case BPF_PROG_TYPE_RAW_TRACEPOINT: + if (btf_id > BTF_MAX_TYPE) + return -EINVAL; + break; + default: + if (btf_id) + return -EINVAL; + break; + } + switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK: switch (expected_attach_type) { @@ -1610,13 +1621,7 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type, default: return -EINVAL; } - case BPF_PROG_TYPE_RAW_TRACEPOINT: - if (btf_id > BTF_MAX_TYPE) - return -EINVAL; - return 0; default: - if (btf_id) - return -EINVAL; return 0; } } From 1f5343c0ae9673543055e9794362766e1f0ed163 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 18 Oct 2019 17:03:44 +0800 Subject: [PATCH 31/52] bpf: Fix build error without CONFIG_NET If CONFIG_NET is n, building fails: kernel/trace/bpf_trace.o: In function `raw_tp_prog_func_proto': bpf_trace.c:(.text+0x1a34): undefined reference to `bpf_skb_output_proto' Wrap it into a #ifdef to fix this. Fixes: a7658e1a4164 ("bpf: Check types of arguments passed into helpers") Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20191018090344.26936-1-yuehaibing@huawei.com --- kernel/trace/bpf_trace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 52f7e9d8c29b..c3240898cc44 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1055,8 +1055,10 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) switch (func_id) { case BPF_FUNC_perf_event_output: return &bpf_perf_event_output_proto_raw_tp; +#ifdef CONFIG_NET case BPF_FUNC_skb_output: return &bpf_skb_output_proto; +#endif case BPF_FUNC_get_stackid: return &bpf_get_stackid_proto_raw_tp; case BPF_FUNC_get_stack: From 54b8625cd940b6baace0bd9b1cf26b2de68ba307 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 18 Oct 2019 07:41:26 -0700 Subject: [PATCH 32/52] bpf, libbpf: Add kernel version section parsing back With commit "libbpf: stop enforcing kern_version,..." we removed the kernel version section parsing in favor of querying for the kernel using uname() and populating the version using the result of the query. After this any version sections were simply ignored. Unfortunately, the world of kernels is not so friendly. I've found some customized kernels where uname() does not match the in kernel version. To fix this so programs can load in this environment this patch adds back parsing the section and if it exists uses the user specified kernel version to override the uname() result. However, keep most the kernel uname() discovery bits so users are not required to insert the version except in these odd cases. Fixes: 5e61f27070292 ("libbpf: stop enforcing kern_version, populate it for users") Signed-off-by: John Fastabend Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/157140968634.9073.6407090804163937103.stgit@john-XPS-13-9370 --- tools/lib/bpf/libbpf.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 22bf3b189947..cccfd9355134 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -657,6 +657,21 @@ bpf_object__init_license(struct bpf_object *obj, void *data, size_t size) return 0; } +static int +bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) +{ + __u32 kver; + + if (size != sizeof(kver)) { + pr_warning("invalid kver section in %s\n", obj->path); + return -LIBBPF_ERRNO__FORMAT; + } + memcpy(&kver, data, sizeof(kver)); + obj->kern_version = kver; + pr_debug("kernel version of %s is %x\n", obj->path, obj->kern_version); + return 0; +} + static int compare_bpf_map(const void *_a, const void *_b) { const struct bpf_map *a = _a; @@ -1574,7 +1589,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) if (err) return err; } else if (strcmp(name, "version") == 0) { - /* skip, we don't need it anymore */ + err = bpf_object__init_kversion(obj, + data->d_buf, + data->d_size); + if (err) + return err; } else if (strcmp(name, "maps") == 0) { obj->efile.maps_shndx = idx; } else if (strcmp(name, MAPS_ELF_SEC) == 0) { From ab81e203bc0d4a4542bca2498535f8761a3cfd92 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Sun, 20 Oct 2019 13:23:44 +0200 Subject: [PATCH 33/52] scripts/bpf: Print an error when known types list needs updating Don't generate a broken bpf_helper_defs.h header if the helper script needs updating because it doesn't recognize a newly added type. Instead print an error that explains why the build is failing, clean up the partially generated header and stop. v1->v2: - Switched from temporary file to .DELETE_ON_ERROR. Fixes: 456a513bb5d4 ("scripts/bpf: Emit an #error directive known types list needs updating") Suggested-by: Andrii Nakryiko Signed-off-by: Jakub Sitnicki Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191020112344.19395-1-jakub@cloudflare.com --- scripts/bpf_helpers_doc.py | 4 ++-- tools/lib/bpf/Makefile | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 08300bc024da..7548569e8076 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -488,8 +488,8 @@ class PrinterHelpers(Printer): return t if t in self.mapped_types: return self.mapped_types[t] - print("") - print("#error \"Unrecognized type '%s', please add it to known types!\"" % t) + print("Unrecognized type '%s', please add it to known types!" % t, + file=sys.stderr) sys.exit(1) seen_helpers = set() diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 75b538577c17..54ff80faa8df 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -286,3 +286,6 @@ tags: # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) + +# Delete partially updated (corrupted) files on error +.DELETE_ON_ERROR: From be18010ea2d83c184cc32afdc895410a1cf2cbd5 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Mon, 21 Oct 2019 13:55:32 +0800 Subject: [PATCH 34/52] tools, bpf: Rename pr_warning to pr_warn to align with kernel logging For kernel logging macros, pr_warning() is completely removed and replaced by pr_warn(). By using pr_warn() in tools/lib/bpf/ for symmetry to kernel logging macros, we could eventually drop the use of pr_warning() in the whole kernel tree. Signed-off-by: Kefeng Wang Signed-off-by: Daniel Borkmann Reviewed-by: Sergey Senozhatsky Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191021055532.185245-1-wangkefeng.wang@huawei.com --- tools/lib/bpf/btf.c | 56 +-- tools/lib/bpf/btf_dump.c | 18 +- tools/lib/bpf/libbpf.c | 693 ++++++++++++++++---------------- tools/lib/bpf/libbpf_internal.h | 8 +- tools/lib/bpf/xsk.c | 4 +- 5 files changed, 386 insertions(+), 393 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3eae8d1addfa..d72e9a79dce1 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -390,14 +390,14 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) GElf_Ehdr ehdr; if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warning("failed to init libelf for %s\n", path); + pr_warn("failed to init libelf for %s\n", path); return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } fd = open(path, O_RDONLY); if (fd < 0) { err = -errno; - pr_warning("failed to open %s: %s\n", path, strerror(errno)); + pr_warn("failed to open %s: %s\n", path, strerror(errno)); return ERR_PTR(err); } @@ -405,19 +405,19 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) elf = elf_begin(fd, ELF_C_READ, NULL); if (!elf) { - pr_warning("failed to open %s as ELF file\n", path); + pr_warn("failed to open %s as ELF file\n", path); goto done; } if (!gelf_getehdr(elf, &ehdr)) { - pr_warning("failed to get EHDR from %s\n", path); + pr_warn("failed to get EHDR from %s\n", path); goto done; } if (!btf_check_endianness(&ehdr)) { - pr_warning("non-native ELF endianness is not supported\n"); + pr_warn("non-native ELF endianness is not supported\n"); goto done; } if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { - pr_warning("failed to get e_shstrndx from %s\n", path); + pr_warn("failed to get e_shstrndx from %s\n", path); goto done; } @@ -427,29 +427,29 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext) idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section(%d) header from %s\n", - idx, path); + pr_warn("failed to get section(%d) header from %s\n", + idx, path); goto done; } name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name); if (!name) { - pr_warning("failed to get section(%d) name from %s\n", - idx, path); + pr_warn("failed to get section(%d) name from %s\n", + idx, path); goto done; } if (strcmp(name, BTF_ELF_SEC) == 0) { btf_data = elf_getdata(scn, 0); if (!btf_data) { - pr_warning("failed to get section(%d, %s) data from %s\n", - idx, name, path); + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); goto done; } continue; } else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = elf_getdata(scn, 0); if (!btf_ext_data) { - pr_warning("failed to get section(%d, %s) data from %s\n", - idx, name, path); + pr_warn("failed to get section(%d, %s) data from %s\n", + idx, name, path); goto done; } continue; @@ -600,9 +600,9 @@ int btf__load(struct btf *btf) log_buf, log_buf_size, false); if (btf->fd < 0) { err = -errno; - pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); + pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); if (*log_buf) - pr_warning("%s\n", log_buf); + pr_warn("%s\n", log_buf); goto done; } @@ -707,8 +707,8 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == max_name) { - pr_warning("map:%s length of '____btf_map_%s' is too long\n", - map_name, map_name); + pr_warn("map:%s length of '____btf_map_%s' is too long\n", + map_name, map_name); return -EINVAL; } @@ -721,14 +721,14 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, container_type = btf__type_by_id(btf, container_id); if (!container_type) { - pr_warning("map:%s cannot find BTF type for container_id:%u\n", - map_name, container_id); + pr_warn("map:%s cannot find BTF type for container_id:%u\n", + map_name, container_id); return -EINVAL; } if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { - pr_warning("map:%s container_name:%s is an invalid container struct\n", - map_name, container_name); + pr_warn("map:%s container_name:%s is an invalid container struct\n", + map_name, container_name); return -EINVAL; } @@ -737,25 +737,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, key_size = btf__resolve_size(btf, key->type); if (key_size < 0) { - pr_warning("map:%s invalid BTF key_type_size\n", map_name); + pr_warn("map:%s invalid BTF key_type_size\n", map_name); return key_size; } if (expected_key_size != key_size) { - pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map_name, (__u32)key_size, expected_key_size); + pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map_name, (__u32)key_size, expected_key_size); return -EINVAL; } value_size = btf__resolve_size(btf, value->type); if (value_size < 0) { - pr_warning("map:%s invalid BTF value_type_size\n", map_name); + pr_warn("map:%s invalid BTF value_type_size\n", map_name); return value_size; } if (expected_value_size != value_size) { - pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map_name, (__u32)value_size, expected_value_size); + pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map_name, (__u32)value_size, expected_value_size); return -EINVAL; } diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 139812b46c7b..cb126d8fcf75 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -428,7 +428,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) /* type loop, but resolvable through fwd declaration */ if (btf_is_composite(t) && through_ptr && t->name_off != 0) return 0; - pr_warning("unsatisfiable type cycle, id:[%u]\n", id); + pr_warn("unsatisfiable type cycle, id:[%u]\n", id); return -ELOOP; } @@ -636,8 +636,8 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) if (id == cont_id) return; if (t->name_off == 0) { - pr_warning("anonymous struct/union loop, id:[%u]\n", - id); + pr_warn("anonymous struct/union loop, id:[%u]\n", + id); return; } btf_dump_emit_struct_fwd(d, id, t); @@ -782,7 +782,7 @@ static int btf_align_of(const struct btf *btf, __u32 id) return align; } default: - pr_warning("unsupported BTF_KIND:%u\n", btf_kind(t)); + pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); return 1; } } @@ -1067,7 +1067,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, * chain, restore stack, emit warning, and try to * proceed nevertheless */ - pr_warning("not enough memory for decl stack:%d", err); + pr_warn("not enough memory for decl stack:%d", err); d->decl_stack_cnt = stack_start; return; } @@ -1096,8 +1096,8 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, case BTF_KIND_TYPEDEF: goto done; default: - pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", - btf_kind(t), id); + pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", + btf_kind(t), id); goto done; } } @@ -1323,8 +1323,8 @@ static void btf_dump_emit_type_chain(struct btf_dump *d, return; } default: - pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", - kind, id); + pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", + kind, id); return; } diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index cccfd9355134..31c8acddc17b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -312,8 +312,8 @@ void bpf_program__unload(struct bpf_program *prog) for (i = 0; i < prog->instances.nr; i++) zclose(prog->instances.fds[i]); } else if (prog->instances.nr != -1) { - pr_warning("Internal error: instances.nr is %d\n", - prog->instances.nr); + pr_warn("Internal error: instances.nr is %d\n", + prog->instances.nr); } prog->instances.nr = -1; @@ -364,8 +364,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, const size_t bpf_insn_sz = sizeof(struct bpf_insn); if (size == 0 || size % bpf_insn_sz) { - pr_warning("corrupted section '%s', size: %zu\n", - section_name, size); + pr_warn("corrupted section '%s', size: %zu\n", + section_name, size); return -EINVAL; } @@ -373,22 +373,22 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->section_name = strdup(section_name); if (!prog->section_name) { - pr_warning("failed to alloc name for prog under section(%d) %s\n", - idx, section_name); + pr_warn("failed to alloc name for prog under section(%d) %s\n", + idx, section_name); goto errout; } prog->pin_name = __bpf_program__pin_name(prog); if (!prog->pin_name) { - pr_warning("failed to alloc pin name for prog under section(%d) %s\n", - idx, section_name); + pr_warn("failed to alloc pin name for prog under section(%d) %s\n", + idx, section_name); goto errout; } prog->insns = malloc(size); if (!prog->insns) { - pr_warning("failed to alloc insns for prog under section %s\n", - section_name); + pr_warn("failed to alloc insns for prog under section %s\n", + section_name); goto errout; } prog->insns_cnt = size / bpf_insn_sz; @@ -426,8 +426,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size, * is still valid, so don't need special treat for * bpf_close_object(). */ - pr_warning("failed to alloc a new program under section '%s'\n", - section_name); + pr_warn("failed to alloc a new program under section '%s'\n", + section_name); bpf_program__exit(&prog); return -ENOMEM; } @@ -467,8 +467,8 @@ bpf_object__init_prog_names(struct bpf_object *obj) obj->efile.strtabidx, sym.st_name); if (!name) { - pr_warning("failed to get sym name string for prog %s\n", - prog->section_name); + pr_warn("failed to get sym name string for prog %s\n", + prog->section_name); return -LIBBPF_ERRNO__LIBELF; } } @@ -477,15 +477,15 @@ bpf_object__init_prog_names(struct bpf_object *obj) name = ".text"; if (!name) { - pr_warning("failed to find sym for prog %s\n", - prog->section_name); + pr_warn("failed to find sym for prog %s\n", + prog->section_name); return -EINVAL; } prog->name = strdup(name); if (!prog->name) { - pr_warning("failed to allocate memory for prog sym %s\n", - name); + pr_warn("failed to allocate memory for prog sym %s\n", + name); return -ENOMEM; } } @@ -514,7 +514,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1); if (!obj) { - pr_warning("alloc memory failed for %s\n", path); + pr_warn("alloc memory failed for %s\n", path); return ERR_PTR(-ENOMEM); } @@ -581,7 +581,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) GElf_Ehdr *ep; if (obj_elf_valid(obj)) { - pr_warning("elf init: internal error\n"); + pr_warn("elf init: internal error\n"); return -LIBBPF_ERRNO__LIBELF; } @@ -599,7 +599,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warning("failed to open %s: %s\n", obj->path, cp); + pr_warn("failed to open %s: %s\n", obj->path, cp); return err; } @@ -608,13 +608,13 @@ static int bpf_object__elf_init(struct bpf_object *obj) } if (!obj->efile.elf) { - pr_warning("failed to open %s as ELF file\n", obj->path); + pr_warn("failed to open %s as ELF file\n", obj->path); err = -LIBBPF_ERRNO__LIBELF; goto errout; } if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { - pr_warning("failed to get EHDR from %s\n", obj->path); + pr_warn("failed to get EHDR from %s\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; } @@ -623,7 +623,7 @@ static int bpf_object__elf_init(struct bpf_object *obj) /* Old LLVM set e_machine to EM_NONE */ if (ep->e_type != ET_REL || (ep->e_machine && ep->e_machine != EM_BPF)) { - pr_warning("%s is not an eBPF object file\n", obj->path); + pr_warn("%s is not an eBPF object file\n", obj->path); err = -LIBBPF_ERRNO__FORMAT; goto errout; } @@ -645,7 +645,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj) #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warning("endianness mismatch.\n"); + pr_warn("endianness mismatch.\n"); return -LIBBPF_ERRNO__ENDIAN; } @@ -663,7 +663,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size) __u32 kver; if (size != sizeof(kver)) { - pr_warning("invalid kver section in %s\n", obj->path); + pr_warn("invalid kver section in %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } memcpy(&kver, data, sizeof(kver)); @@ -705,15 +705,15 @@ static int bpf_object_search_section_size(const struct bpf_object *obj, idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section(%d) header from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) header from %s\n", + idx, obj->path); return -EIO; } sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); if (!sec_name) { - pr_warning("failed to get section(%d) name from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) name from %s\n", + idx, obj->path); return -EIO; } @@ -722,8 +722,8 @@ static int bpf_object_search_section_size(const struct bpf_object *obj, data = elf_getdata(scn, 0); if (!data) { - pr_warning("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); + pr_warn("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); return -EIO; } @@ -783,8 +783,8 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); if (!sname) { - pr_warning("failed to get sym name string for var %s\n", - name); + pr_warn("failed to get sym name string for var %s\n", + name); return -EIO; } if (strcmp(name, sname) == 0) { @@ -808,7 +808,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) new_cap = max((size_t)4, obj->maps_cap * 3 / 2); new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps)); if (!new_maps) { - pr_warning("alloc maps for object failed\n"); + pr_warn("alloc maps for object failed\n"); return ERR_PTR(-ENOMEM); } @@ -849,7 +849,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, libbpf_type_to_btf_name[type]); map->name = strdup(map_name); if (!map->name) { - pr_warning("failed to alloc map name\n"); + pr_warn("failed to alloc map name\n"); return -ENOMEM; } pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n", @@ -865,7 +865,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, *data_buff = malloc(data->d_size); if (!*data_buff) { zfree(&map->name); - pr_warning("failed to alloc map content buffer\n"); + pr_warn("failed to alloc map content buffer\n"); return -ENOMEM; } memcpy(*data_buff, data->d_buf, data->d_size); @@ -927,8 +927,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) if (scn) data = elf_getdata(scn, NULL); if (!scn || !data) { - pr_warning("failed to get Elf_Data from map section %d\n", - obj->efile.maps_shndx); + pr_warn("failed to get Elf_Data from map section %d\n", + obj->efile.maps_shndx); return -EINVAL; } @@ -955,9 +955,9 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) map_def_sz = data->d_size / nr_maps; if (!data->d_size || (data->d_size % nr_maps) != 0) { - pr_warning("unable to determine map definition size " - "section %s, %d maps in %zd bytes\n", - obj->path, nr_maps, data->d_size); + pr_warn("unable to determine map definition size " + "section %s, %d maps in %zd bytes\n", + obj->path, nr_maps, data->d_size); return -EINVAL; } @@ -980,8 +980,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name); if (!map_name) { - pr_warning("failed to get map #%d name sym string for obj %s\n", - i, obj->path); + pr_warn("failed to get map #%d name sym string for obj %s\n", + i, obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -991,14 +991,14 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n", map_name, map->sec_idx, map->sec_offset); if (sym.st_value + map_def_sz > data->d_size) { - pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", - obj->path, map_name); + pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", + obj->path, map_name); return -EINVAL; } map->name = strdup(map_name); if (!map->name) { - pr_warning("failed to alloc map name\n"); + pr_warn("failed to alloc map name\n"); return -ENOMEM; } pr_debug("map %d is \"%s\"\n", i, map->name); @@ -1022,10 +1022,10 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) for (b = ((char *)def) + sizeof(struct bpf_map_def); b < ((char *)def) + map_def_sz; b++) { if (*b != 0) { - pr_warning("maps section in %s: \"%s\" " - "has unrecognized, non-zero " - "options\n", - obj->path, map_name); + pr_warn("maps section in %s: \"%s\" " + "has unrecognized, non-zero " + "options\n", + obj->path, map_name); if (strict) return -EINVAL; } @@ -1069,20 +1069,20 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf, const struct btf_type *arr_t; if (!btf_is_ptr(t)) { - pr_warning("map '%s': attr '%s': expected PTR, got %u.\n", - map_name, name, btf_kind(t)); + pr_warn("map '%s': attr '%s': expected PTR, got %u.\n", + map_name, name, btf_kind(t)); return false; } arr_t = btf__type_by_id(btf, t->type); if (!arr_t) { - pr_warning("map '%s': attr '%s': type [%u] not found.\n", - map_name, name, t->type); + pr_warn("map '%s': attr '%s': type [%u] not found.\n", + map_name, name, t->type); return false; } if (!btf_is_array(arr_t)) { - pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n", - map_name, name, btf_kind(arr_t)); + pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n", + map_name, name, btf_kind(arr_t)); return false; } arr_info = btf_array(arr_t); @@ -1110,33 +1110,33 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, vlen = btf_vlen(var); if (map_name == NULL || map_name[0] == '\0') { - pr_warning("map #%d: empty name.\n", var_idx); + pr_warn("map #%d: empty name.\n", var_idx); return -EINVAL; } if ((__u64)vi->offset + vi->size > data->d_size) { - pr_warning("map '%s' BTF data is corrupted.\n", map_name); + pr_warn("map '%s' BTF data is corrupted.\n", map_name); return -EINVAL; } if (!btf_is_var(var)) { - pr_warning("map '%s': unexpected var kind %u.\n", - map_name, btf_kind(var)); + pr_warn("map '%s': unexpected var kind %u.\n", + map_name, btf_kind(var)); return -EINVAL; } if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED && var_extra->linkage != BTF_VAR_STATIC) { - pr_warning("map '%s': unsupported var linkage %u.\n", - map_name, var_extra->linkage); + pr_warn("map '%s': unsupported var linkage %u.\n", + map_name, var_extra->linkage); return -EOPNOTSUPP; } def = skip_mods_and_typedefs(obj->btf, var->type, NULL); if (!btf_is_struct(def)) { - pr_warning("map '%s': unexpected def kind %u.\n", - map_name, btf_kind(var)); + pr_warn("map '%s': unexpected def kind %u.\n", + map_name, btf_kind(var)); return -EINVAL; } if (def->size > vi->size) { - pr_warning("map '%s': invalid def size.\n", map_name); + pr_warn("map '%s': invalid def size.\n", map_name); return -EINVAL; } @@ -1145,7 +1145,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, return PTR_ERR(map); map->name = strdup(map_name); if (!map->name) { - pr_warning("map '%s': failed to alloc map name.\n", map_name); + pr_warn("map '%s': failed to alloc map name.\n", map_name); return -ENOMEM; } map->libbpf_type = LIBBPF_MAP_UNSPEC; @@ -1161,8 +1161,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, const char *name = btf__name_by_offset(obj->btf, m->name_off); if (!name) { - pr_warning("map '%s': invalid field #%d.\n", - map_name, i); + pr_warn("map '%s': invalid field #%d.\n", map_name, i); return -EINVAL; } if (strcmp(name, "type") == 0) { @@ -1192,8 +1191,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found key_size = %u.\n", map_name, sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warning("map '%s': conflicting key size %u != %u.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %u.\n", + map_name, map->def.key_size, sz); return -EINVAL; } map->def.key_size = sz; @@ -1202,26 +1201,26 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, t = btf__type_by_id(obj->btf, m->type); if (!t) { - pr_warning("map '%s': key type [%d] not found.\n", - map_name, m->type); + pr_warn("map '%s': key type [%d] not found.\n", + map_name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { - pr_warning("map '%s': key spec is not PTR: %u.\n", - map_name, btf_kind(t)); + pr_warn("map '%s': key spec is not PTR: %u.\n", + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", + map_name, t->type, sz); return sz; } pr_debug("map '%s': found key [%u], sz = %lld.\n", map_name, t->type, sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warning("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %lld.\n", + map_name, map->def.key_size, sz); return -EINVAL; } map->def.key_size = sz; @@ -1235,8 +1234,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, pr_debug("map '%s': found value_size = %u.\n", map_name, sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warning("map '%s': conflicting value size %u != %u.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %u.\n", + map_name, map->def.value_size, sz); return -EINVAL; } map->def.value_size = sz; @@ -1245,34 +1244,34 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, t = btf__type_by_id(obj->btf, m->type); if (!t) { - pr_warning("map '%s': value type [%d] not found.\n", - map_name, m->type); + pr_warn("map '%s': value type [%d] not found.\n", + map_name, m->type); return -EINVAL; } if (!btf_is_ptr(t)) { - pr_warning("map '%s': value spec is not PTR: %u.\n", - map_name, btf_kind(t)); + pr_warn("map '%s': value spec is not PTR: %u.\n", + map_name, btf_kind(t)); return -EINVAL; } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", + map_name, t->type, sz); return sz; } pr_debug("map '%s': found value [%u], sz = %lld.\n", map_name, t->type, sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warning("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %lld.\n", + map_name, map->def.value_size, sz); return -EINVAL; } map->def.value_size = sz; map->btf_value_type_id = t->type; } else { if (strict) { - pr_warning("map '%s': unknown field '%s'.\n", - map_name, name); + pr_warn("map '%s': unknown field '%s'.\n", + map_name, name); return -ENOTSUP; } pr_debug("map '%s': ignoring unknown field '%s'.\n", @@ -1281,7 +1280,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } if (map->def.type == BPF_MAP_TYPE_UNSPEC) { - pr_warning("map '%s': map type isn't specified.\n", map_name); + pr_warn("map '%s': map type isn't specified.\n", map_name); return -EINVAL; } @@ -1304,8 +1303,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) if (scn) data = elf_getdata(scn, NULL); if (!scn || !data) { - pr_warning("failed to get Elf_Data from map section %d (%s)\n", - obj->efile.maps_shndx, MAPS_ELF_SEC); + pr_warn("failed to get Elf_Data from map section %d (%s)\n", + obj->efile.maps_shndx, MAPS_ELF_SEC); return -EINVAL; } @@ -1322,7 +1321,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) } if (!sec) { - pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC); + pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC); return -ENOENT; } @@ -1466,14 +1465,13 @@ static int bpf_object__init_btf(struct bpf_object *obj, if (btf_data) { obj->btf = btf__new(btf_data->d_buf, btf_data->d_size); if (IS_ERR(obj->btf)) { - pr_warning("Error loading ELF section %s: %d.\n", - BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %d.\n", + BTF_ELF_SEC, err); goto out; } err = btf__finalize_data(obj, obj->btf); if (err) { - pr_warning("Error finalizing %s: %d.\n", - BTF_ELF_SEC, err); + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); goto out; } } @@ -1486,8 +1484,8 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); if (IS_ERR(obj->btf_ext)) { - pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", - BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); + pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n", + BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); obj->btf_ext = NULL; goto out; } @@ -1503,7 +1501,7 @@ out: obj->btf = NULL; } if (btf_required && !obj->btf) { - pr_warning("BTF is required, but is missing or corrupted.\n"); + pr_warn("BTF is required, but is missing or corrupted.\n"); return err == 0 ? -ENOENT : err; } return 0; @@ -1521,8 +1519,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) err = btf__load(obj->btf); if (err) { - pr_warning("Error loading %s into kernel: %d.\n", - BTF_ELF_SEC, err); + pr_warn("Error loading %s into kernel: %d.\n", + BTF_ELF_SEC, err); btf__free(obj->btf); obj->btf = NULL; /* btf_ext can't exist without btf, so free it as well */ @@ -1548,7 +1546,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) /* Elf is corrupted/truncated, avoid calling elf_strptr. */ if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { - pr_warning("failed to get e_shstrndx from %s\n", obj->path); + pr_warn("failed to get e_shstrndx from %s\n", obj->path); return -LIBBPF_ERRNO__FORMAT; } @@ -1559,22 +1557,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) idx++; if (gelf_getshdr(scn, &sh) != &sh) { - pr_warning("failed to get section(%d) header from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) header from %s\n", + idx, obj->path); return -LIBBPF_ERRNO__FORMAT; } name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name); if (!name) { - pr_warning("failed to get section(%d) name from %s\n", - idx, obj->path); + pr_warn("failed to get section(%d) name from %s\n", + idx, obj->path); return -LIBBPF_ERRNO__FORMAT; } data = elf_getdata(scn, 0); if (!data) { - pr_warning("failed to get section(%d) data from %s(%s)\n", - idx, name, obj->path); + pr_warn("failed to get section(%d) data from %s(%s)\n", + idx, name, obj->path); return -LIBBPF_ERRNO__FORMAT; } pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", @@ -1604,8 +1602,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) btf_ext_data = data; } else if (sh.sh_type == SHT_SYMTAB) { if (obj->efile.symbols) { - pr_warning("bpf: multiple SYMTAB in %s\n", - obj->path); + pr_warn("bpf: multiple SYMTAB in %s\n", + obj->path); return -LIBBPF_ERRNO__FORMAT; } obj->efile.symbols = data; @@ -1621,8 +1619,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) char *cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warning("failed to alloc program %s (%s): %s", - name, obj->path, cp); + pr_warn("failed to alloc program %s (%s): %s", + name, obj->path, cp); return err; } } else if (strcmp(name, ".data") == 0) { @@ -1649,7 +1647,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) reloc = reallocarray(reloc, nr_reloc + 1, sizeof(*obj->efile.reloc)); if (!reloc) { - pr_warning("realloc failed\n"); + pr_warn("realloc failed\n"); return -ENOMEM; } @@ -1667,7 +1665,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps) } if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) { - pr_warning("Corrupted ELF file: index of strtab invalid\n"); + pr_warn("Corrupted ELF file: index of strtab invalid\n"); return -LIBBPF_ERRNO__FORMAT; } err = bpf_object__init_btf(obj, btf_data, btf_ext_data); @@ -1757,7 +1755,7 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels); if (!prog->reloc_desc) { - pr_warning("failed to alloc memory in relocation\n"); + pr_warn("failed to alloc memory in relocation\n"); return -ENOMEM; } prog->nr_reloc = nrels; @@ -1773,13 +1771,13 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, GElf_Rel rel; if (!gelf_getrel(data, i, &rel)) { - pr_warning("relocation: failed to get %d reloc\n", i); + pr_warn("relocation: failed to get %d reloc\n", i); return -LIBBPF_ERRNO__FORMAT; } if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { - pr_warning("relocation: symbol %"PRIx64" not found\n", - GELF_R_SYM(rel.r_info)); + pr_warn("relocation: symbol %"PRIx64" not found\n", + GELF_R_SYM(rel.r_info)); return -LIBBPF_ERRNO__FORMAT; } @@ -1796,20 +1794,20 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, insn_idx, shdr_idx); if (shdr_idx >= SHN_LORESERVE) { - pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n", - name, shdr_idx, insn_idx, - insns[insn_idx].code); + pr_warn("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n", + name, shdr_idx, insn_idx, + insns[insn_idx].code); return -LIBBPF_ERRNO__RELOC; } if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { - pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", - prog->section_name, shdr_idx); + pr_warn("Program '%s' contains unrecognized relo data pointing to section %u\n", + prog->section_name, shdr_idx); return -LIBBPF_ERRNO__RELOC; } if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { - pr_warning("incorrect bpf_call opcode\n"); + pr_warn("incorrect bpf_call opcode\n"); return -LIBBPF_ERRNO__RELOC; } prog->reloc_desc[i].type = RELO_CALL; @@ -1820,8 +1818,8 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, } if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { - pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", - insn_idx, insns[insn_idx].code); + pr_warn("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", + insn_idx, insns[insn_idx].code); return -LIBBPF_ERRNO__RELOC; } @@ -1830,13 +1828,13 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); if (type != LIBBPF_MAP_UNSPEC) { if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { - pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", - name, insn_idx, insns[insn_idx].code); + pr_warn("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", + name, insn_idx, insns[insn_idx].code); return -LIBBPF_ERRNO__RELOC; } if (!obj->caps.global_data) { - pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", - name, insn_idx); + pr_warn("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", + name, insn_idx); return -LIBBPF_ERRNO__RELOC; } } @@ -1857,8 +1855,8 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, } if (map_idx >= nr_maps) { - pr_warning("bpf relocation: map_idx %d larger than %d\n", - (int)map_idx, (int)nr_maps - 1); + pr_warn("bpf relocation: map_idx %d larger than %d\n", + (int)map_idx, (int)nr_maps - 1); return -LIBBPF_ERRNO__RELOC; } @@ -1985,8 +1983,8 @@ bpf_object__probe_name(struct bpf_object *obj) ret = bpf_load_program_xattr(&attr, NULL, 0); if (ret < 0) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", - __func__, cp, errno); + pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", + __func__, cp, errno); return -errno; } close(ret); @@ -2026,8 +2024,8 @@ bpf_object__probe_global_data(struct bpf_object *obj) map = bpf_create_map_xattr(&map_attr); if (map < 0) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, errno); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, errno); return -errno; } @@ -2142,8 +2140,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_freeze(map->fd); if (err) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("Error freezing map(%s) as read-only: %s\n", - map->name, cp); + pr_warn("Error freezing map(%s) as read-only: %s\n", + map->name, cp); err = 0; } } @@ -2182,8 +2180,8 @@ bpf_object__create_maps(struct bpf_object *obj) if (!nr_cpus) nr_cpus = libbpf_num_possible_cpus(); if (nr_cpus < 0) { - pr_warning("failed to determine number of system CPUs: %d\n", - nr_cpus); + pr_warn("failed to determine number of system CPUs: %d\n", + nr_cpus); err = nr_cpus; goto err_out; } @@ -2211,8 +2209,8 @@ bpf_object__create_maps(struct bpf_object *obj) create_attr.btf_value_type_id)) { err = -errno; cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); + pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", + map->name, cp, err); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -2227,8 +2225,8 @@ bpf_object__create_maps(struct bpf_object *obj) err = -errno; err_out: cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warning("failed to create map (name: '%s'): %s(%d)\n", - map->name, cp, err); + pr_warn("failed to create map (name: '%s'): %s(%d)\n", + map->name, cp, err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -2253,8 +2251,8 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err, void *btf_prog_info, const char *info_name) { if (err != -ENOENT) { - pr_warning("Error in loading %s for sec %s.\n", - info_name, prog->section_name); + pr_warn("Error in loading %s for sec %s.\n", + info_name, prog->section_name); return err; } @@ -2265,14 +2263,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err, * Some info has already been found but has problem * in the last btf_ext reloc. Must have to error out. */ - pr_warning("Error in relocating %s for sec %s.\n", - info_name, prog->section_name); + pr_warn("Error in relocating %s for sec %s.\n", + info_name, prog->section_name); return err; } /* Have problem loading the very first info. Ignore the rest. */ - pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", - info_name, prog->section_name, info_name); + pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n", + info_name, prog->section_name, info_name); return 0; } @@ -2473,8 +2471,8 @@ static int bpf_core_spec_parse(const struct btf *btf, return sz; spec->offset += access_idx * sz; } else { - pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", - type_id, spec_str, i, id, btf_kind(t)); + pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", + type_id, spec_str, i, id, btf_kind(t)); return -EINVAL; } } @@ -2618,8 +2616,8 @@ recur: targ_id = btf_array(targ_type)->type; goto recur; default: - pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", - btf_kind(local_type), local_id, targ_id); + pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", + btf_kind(local_type), local_id, targ_id); return 0; } } @@ -2823,9 +2821,9 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, if (targ_spec) { new_val = targ_spec->offset; } else { - pr_warning("prog '%s': patching insn #%d w/ failed reloc, imm %d -> %d\n", - bpf_program__title(prog, false), insn_idx, - orig_val, -1); + pr_warn("prog '%s': patching insn #%d w/ failed reloc, imm %d -> %d\n", + bpf_program__title(prog, false), insn_idx, + orig_val, -1); new_val = (__u32)-1; } break; @@ -2834,9 +2832,9 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, new_val = targ_spec ? 1 : 0; break; default: - pr_warning("prog '%s': unknown relo %d at insn #%d'\n", - bpf_program__title(prog, false), - relo->kind, insn_idx); + pr_warn("prog '%s': unknown relo %d at insn #%d'\n", + bpf_program__title(prog, false), + relo->kind, insn_idx); return -EINVAL; } @@ -2853,10 +2851,10 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, bpf_program__title(prog, false), insn_idx, orig_val, new_val); } else { - pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", - bpf_program__title(prog, false), - insn_idx, insn->code, insn->src_reg, insn->dst_reg, - insn->off, insn->imm); + pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", + bpf_program__title(prog, false), + insn_idx, insn->code, insn->src_reg, insn->dst_reg, + insn->off, insn->imm); return -EINVAL; } @@ -2945,7 +2943,7 @@ static struct btf *bpf_core_find_kernel_btf(void) return btf; } - pr_warning("failed to find valid kernel BTF\n"); + pr_warn("failed to find valid kernel BTF\n"); return ERR_PTR(-ESRCH); } @@ -3077,9 +3075,9 @@ static int bpf_core_reloc_field(struct bpf_program *prog, err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec); if (err) { - pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", - prog_name, relo_idx, local_id, local_name, spec_str, - err); + pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", + prog_name, relo_idx, local_id, local_name, spec_str, + err); return -EINVAL; } @@ -3090,9 +3088,9 @@ static int bpf_core_reloc_field(struct bpf_program *prog, if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) { cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf); if (IS_ERR(cand_ids)) { - pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", - prog_name, relo_idx, local_id, local_name, - PTR_ERR(cand_ids)); + pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", + prog_name, relo_idx, local_id, local_name, + PTR_ERR(cand_ids)); return PTR_ERR(cand_ids); } err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); @@ -3114,8 +3112,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog, bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); libbpf_print(LIBBPF_DEBUG, ": %d\n", err); if (err < 0) { - pr_warning("prog '%s': relo #%d: matching error: %d\n", - prog_name, relo_idx, err); + pr_warn("prog '%s': relo #%d: matching error: %d\n", + prog_name, relo_idx, err); return err; } if (err == 0) @@ -3127,9 +3125,9 @@ static int bpf_core_reloc_field(struct bpf_program *prog, /* if there are many candidates, they should all * resolve to the same offset */ - pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n", - prog_name, relo_idx, cand_spec.offset, - targ_spec.offset); + pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", + prog_name, relo_idx, cand_spec.offset, + targ_spec.offset); return -EINVAL; } @@ -3148,8 +3146,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog, if (j == 0 && !prog->obj->relaxed_core_relocs && relo->kind != BPF_FIELD_EXISTS) { - pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", - prog_name, relo_idx, local_id, local_name, spec_str); + pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", + prog_name, relo_idx, local_id, local_name, spec_str); return -ESRCH; } @@ -3157,8 +3155,8 @@ static int bpf_core_reloc_field(struct bpf_program *prog, err = bpf_core_reloc_insn(prog, relo, &local_spec, j ? &targ_spec : NULL); if (err) { - pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", - prog_name, relo_idx, relo->insn_off, err); + pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", + prog_name, relo_idx, relo->insn_off, err); return -EINVAL; } @@ -3183,8 +3181,7 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) else targ_btf = bpf_core_find_kernel_btf(); if (IS_ERR(targ_btf)) { - pr_warning("failed to get target BTF: %ld\n", - PTR_ERR(targ_btf)); + pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); return PTR_ERR(targ_btf); } @@ -3203,8 +3200,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) } prog = bpf_object__find_program_by_title(obj, sec_name); if (!prog) { - pr_warning("failed to find program '%s' for CO-RE offset relocation\n", - sec_name); + pr_warn("failed to find program '%s' for CO-RE offset relocation\n", + sec_name); err = -EINVAL; goto out; } @@ -3216,8 +3213,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) err = bpf_core_reloc_field(prog, rec, i, obj->btf, targ_btf, cand_cache); if (err) { - pr_warning("prog '%s': relo #%d: failed to relocate: %d\n", - sec_name, i, err); + pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", + sec_name, i, err); goto out; } } @@ -3258,21 +3255,21 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, return -LIBBPF_ERRNO__RELOC; if (prog->idx == obj->efile.text_shndx) { - pr_warning("relo in .text insn %d into off %d\n", - relo->insn_idx, relo->text_off); + pr_warn("relo in .text insn %d into off %d\n", + relo->insn_idx, relo->text_off); return -LIBBPF_ERRNO__RELOC; } if (prog->main_prog_cnt == 0) { text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx); if (!text) { - pr_warning("no .text section found yet relo into text exist\n"); + pr_warn("no .text section found yet relo into text exist\n"); return -LIBBPF_ERRNO__RELOC; } new_cnt = prog->insns_cnt + text->insns_cnt; new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn)); if (!new_insn) { - pr_warning("oom in prog realloc\n"); + pr_warn("oom in prog realloc\n"); return -ENOMEM; } @@ -3327,8 +3324,8 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) map_idx = prog->reloc_desc[i].map_idx; if (insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warning("relocation out of range: '%s'\n", - prog->section_name); + pr_warn("relocation out of range: '%s'\n", + prog->section_name); return -LIBBPF_ERRNO__RELOC; } @@ -3362,8 +3359,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) if (obj->btf_ext) { err = bpf_object__relocate_core(obj, targ_btf_path); if (err) { - pr_warning("failed to perform CO-RE relocations: %d\n", - err); + pr_warn("failed to perform CO-RE relocations: %d\n", + err); return err; } } @@ -3372,8 +3369,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path) err = bpf_program__relocate(prog, obj); if (err) { - pr_warning("failed to relocate '%s'\n", - prog->section_name); + pr_warn("failed to relocate '%s'\n", prog->section_name); return err; } } @@ -3385,7 +3381,7 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) int i, err; if (!obj_elf_valid(obj)) { - pr_warning("Internal error: elf object is closed\n"); + pr_warn("Internal error: elf object is closed\n"); return -LIBBPF_ERRNO__INTERNAL; } @@ -3396,13 +3392,13 @@ static int bpf_object__collect_reloc(struct bpf_object *obj) struct bpf_program *prog; if (shdr->sh_type != SHT_REL) { - pr_warning("internal error at %d\n", __LINE__); + pr_warn("internal error at %d\n", __LINE__); return -LIBBPF_ERRNO__INTERNAL; } prog = bpf_object__find_prog_by_idx(obj, idx); if (!prog) { - pr_warning("relocation failed: no section(%d)\n", idx); + pr_warn("relocation failed: no section(%d)\n", idx); return -LIBBPF_ERRNO__RELOC; } @@ -3454,7 +3450,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, retry_load: log_buf = malloc(log_buf_size); if (!log_buf) - pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); + pr_warn("Alloc log buffer for bpf loader error, continue without log\n"); ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); @@ -3473,16 +3469,16 @@ retry_load: } ret = -LIBBPF_ERRNO__LOAD; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("load bpf program failed: %s\n", cp); + pr_warn("load bpf program failed: %s\n", cp); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; - pr_warning("-- BEGIN DUMP LOG ---\n"); - pr_warning("\n%s\n", log_buf); - pr_warning("-- END LOG --\n"); + pr_warn("-- BEGIN DUMP LOG ---\n"); + pr_warn("\n%s\n", log_buf); + pr_warn("-- END LOG --\n"); } else if (load_attr.insns_cnt >= BPF_MAXINSNS) { - pr_warning("Program too large (%zu insns), at most %d insns\n", - load_attr.insns_cnt, BPF_MAXINSNS); + pr_warn("Program too large (%zu insns), at most %d insns\n", + load_attr.insns_cnt, BPF_MAXINSNS); ret = -LIBBPF_ERRNO__PROG2BIG; } else { /* Wrong program type? */ @@ -3516,14 +3512,14 @@ bpf_program__load(struct bpf_program *prog, if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { - pr_warning("Internal error: can't load program '%s'\n", - prog->section_name); + pr_warn("Internal error: can't load program '%s'\n", + prog->section_name); return -LIBBPF_ERRNO__INTERNAL; } prog->instances.fds = malloc(sizeof(int)); if (!prog->instances.fds) { - pr_warning("Not enough memory for BPF fds\n"); + pr_warn("Not enough memory for BPF fds\n"); return -ENOMEM; } prog->instances.nr = 1; @@ -3532,8 +3528,8 @@ bpf_program__load(struct bpf_program *prog, if (!prog->preprocessor) { if (prog->instances.nr != 1) { - pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", - prog->section_name, prog->instances.nr); + pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n", + prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, license, kern_version, &fd); @@ -3550,8 +3546,8 @@ bpf_program__load(struct bpf_program *prog, err = preprocessor(prog, i, prog->insns, prog->insns_cnt, &result); if (err) { - pr_warning("Preprocessing the %dth instance of program '%s' failed\n", - i, prog->section_name); + pr_warn("Preprocessing the %dth instance of program '%s' failed\n", + i, prog->section_name); goto out; } @@ -3569,8 +3565,8 @@ bpf_program__load(struct bpf_program *prog, license, kern_version, &fd); if (err) { - pr_warning("Loading the %dth instance of program '%s' failed\n", - i, prog->section_name); + pr_warn("Loading the %dth instance of program '%s' failed\n", + i, prog->section_name); goto out; } @@ -3580,8 +3576,7 @@ bpf_program__load(struct bpf_program *prog, } out: if (err) - pr_warning("failed to load program '%s'\n", - prog->section_name); + pr_warn("failed to load program '%s'\n", prog->section_name); zfree(&prog->insns); prog->insns_cnt = 0; return err; @@ -3623,8 +3618,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, int err; if (elf_version(EV_CURRENT) == EV_NONE) { - pr_warning("failed to init libelf for %s\n", - path ? : "(mem buf)"); + pr_warn("failed to init libelf for %s\n", + path ? : "(mem buf)"); return ERR_PTR(-LIBBPF_ERRNO__LIBELF); } @@ -3759,7 +3754,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return -EINVAL; if (obj->loaded) { - pr_warning("object should not be loaded twice\n"); + pr_warn("object should not be loaded twice\n"); return -EINVAL; } @@ -3772,7 +3767,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return 0; out: bpf_object__unload(obj); - pr_warning("failed to load object '%s'\n", obj->path); + pr_warn("failed to load object '%s'\n", obj->path); return err; } @@ -3802,13 +3797,13 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("failed to statfs %s: %s\n", dir, cp); + pr_warn("failed to statfs %s: %s\n", dir, cp); err = -errno; } free(dname); if (!err && st_fs.f_type != BPF_FS_MAGIC) { - pr_warning("specified path %s is not on BPF FS\n", path); + pr_warn("specified path %s is not on BPF FS\n", path); err = -EINVAL; } @@ -3826,19 +3821,19 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path, return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (instance < 0 || instance >= prog->instances.nr) { - pr_warning("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); + pr_warn("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); return -EINVAL; } if (bpf_obj_pin(prog->instances.fds[instance], path)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("failed to pin program: %s\n", cp); + pr_warn("failed to pin program: %s\n", cp); return -errno; } pr_debug("pinned program '%s'\n", path); @@ -3856,13 +3851,13 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path, return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (instance < 0 || instance >= prog->instances.nr) { - pr_warning("invalid prog instance %d of prog %s (max %d)\n", - instance, prog->section_name, prog->instances.nr); + pr_warn("invalid prog instance %d of prog %s (max %d)\n", + instance, prog->section_name, prog->instances.nr); return -EINVAL; } @@ -3884,7 +3879,7 @@ static int make_dir(const char *path) if (err) { cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warning("failed to mkdir %s: %s\n", path, cp); + pr_warn("failed to mkdir %s: %s\n", path, cp); } return err; } @@ -3898,12 +3893,12 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (prog->instances.nr <= 0) { - pr_warning("no instances of prog %s to pin\n", + pr_warn("no instances of prog %s to pin\n", prog->section_name); return -EINVAL; } @@ -3965,12 +3960,12 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) return err; if (prog == NULL) { - pr_warning("invalid program pointer\n"); + pr_warn("invalid program pointer\n"); return -EINVAL; } if (prog->instances.nr <= 0) { - pr_warning("no instances of prog %s to pin\n", + pr_warn("no instances of prog %s to pin\n", prog->section_name); return -EINVAL; } @@ -4012,13 +4007,13 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return err; if (map == NULL) { - pr_warning("invalid map pointer\n"); + pr_warn("invalid map pointer\n"); return -EINVAL; } if (bpf_obj_pin(map->fd, path)) { cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warning("failed to pin map: %s\n", cp); + pr_warn("failed to pin map: %s\n", cp); return -errno; } @@ -4036,7 +4031,7 @@ int bpf_map__unpin(struct bpf_map *map, const char *path) return err; if (map == NULL) { - pr_warning("invalid map pointer\n"); + pr_warn("invalid map pointer\n"); return -EINVAL; } @@ -4057,7 +4052,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) return -ENOENT; if (!obj->loaded) { - pr_warning("object not yet loaded; load it first\n"); + pr_warn("object not yet loaded; load it first\n"); return -ENOENT; } @@ -4140,7 +4135,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path) return -ENOENT; if (!obj->loaded) { - pr_warning("object not yet loaded; load it first\n"); + pr_warn("object not yet loaded; load it first\n"); return -ENOENT; } @@ -4341,7 +4336,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj, &obj->programs[nr_programs - 1]; if (p->obj != obj) { - pr_warning("error: program handler doesn't match object\n"); + pr_warn("error: program handler doesn't match object\n"); return NULL; } @@ -4404,7 +4399,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) if (needs_copy) { title = strdup(title); if (!title) { - pr_warning("failed to strdup program title\n"); + pr_warn("failed to strdup program title\n"); return ERR_PTR(-ENOMEM); } } @@ -4426,13 +4421,13 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, return -EINVAL; if (prog->instances.nr > 0 || prog->instances.fds) { - pr_warning("Can't set pre-processor after loading\n"); + pr_warn("Can't set pre-processor after loading\n"); return -EINVAL; } instances_fds = malloc(sizeof(int) * nr_instances); if (!instances_fds) { - pr_warning("alloc memory failed for fds\n"); + pr_warn("alloc memory failed for fds\n"); return -ENOMEM; } @@ -4453,15 +4448,15 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n) return -EINVAL; if (n >= prog->instances.nr || n < 0) { - pr_warning("Can't get the %dth fd from program %s: only %d instances\n", - n, prog->section_name, prog->instances.nr); + pr_warn("Can't get the %dth fd from program %s: only %d instances\n", + n, prog->section_name, prog->instances.nr); return -EINVAL; } fd = prog->instances.fds[n]; if (fd < 0) { - pr_warning("%dth instance of program '%s' is invalid\n", - n, prog->section_name); + pr_warn("%dth instance of program '%s' is invalid\n", + n, prog->section_name); return -ENOENT; } @@ -4658,7 +4653,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, int ret; if (IS_ERR(btf)) { - pr_warning("vmlinux BTF is not found\n"); + pr_warn("vmlinux BTF is not found\n"); return -EINVAL; } /* prepend "btf_trace_" prefix per kernel convention */ @@ -4667,14 +4662,14 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, ret = btf__find_by_name(btf, raw_tp_btf_name); btf__free(btf); if (ret <= 0) { - pr_warning("%s is not found in vmlinux BTF\n", dst); + pr_warn("%s is not found in vmlinux BTF\n", dst); return -EINVAL; } *expected_attach_type = ret; } return 0; } - pr_warning("failed to guess program type based on ELF section name '%s'\n", name); + pr_warn("failed to guess program type based on ELF section name '%s'\n", name); type_names = libbpf_get_type_names(false); if (type_names != NULL) { pr_info("supported section(type) names are:%s\n", type_names); @@ -4701,7 +4696,7 @@ int libbpf_attach_type_by_name(const char *name, *attach_type = section_names[i].attach_type; return 0; } - pr_warning("failed to guess attach type based on ELF section name '%s'\n", name); + pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); type_names = libbpf_get_type_names(true); if (type_names != NULL) { pr_info("attachable section(type) names are:%s\n", type_names); @@ -4784,11 +4779,11 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) { if (!bpf_map_type__is_map_in_map(map->def.type)) { - pr_warning("error: unsupported map type\n"); + pr_warn("error: unsupported map type\n"); return -EINVAL; } if (map->inner_map_fd != -1) { - pr_warning("error: inner_map_fd already specified\n"); + pr_warn("error: inner_map_fd already specified\n"); return -EINVAL; } map->inner_map_fd = fd; @@ -4808,8 +4803,8 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i) e = obj->maps + obj->nr_maps; if ((m < s) || (m >= e)) { - pr_warning("error in %s: map handler doesn't belong to object\n", - __func__); + pr_warn("error in %s: map handler doesn't belong to object\n", + __func__); return NULL; } @@ -4938,7 +4933,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } if (!first_prog) { - pr_warning("object file doesn't contain bpf program\n"); + pr_warn("object file doesn't contain bpf program\n"); bpf_object__close(obj); return -ENOENT; } @@ -4997,14 +4992,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int prog_fd, err; if (pfd < 0) { - pr_warning("program '%s': invalid perf event FD %d\n", - bpf_program__title(prog, false), pfd); + pr_warn("program '%s': invalid perf event FD %d\n", + bpf_program__title(prog, false), pfd); return ERR_PTR(-EINVAL); } prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n", - bpf_program__title(prog, false)); + pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n", + bpf_program__title(prog, false)); return ERR_PTR(-EINVAL); } @@ -5017,16 +5012,16 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; free(link); - pr_warning("program '%s': failed to attach to pfd %d: %s\n", - bpf_program__title(prog, false), pfd, + pr_warn("program '%s': failed to attach to pfd %d: %s\n", + bpf_program__title(prog, false), pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; free(link); - pr_warning("program '%s': failed to enable pfd %d: %s\n", - bpf_program__title(prog, false), pfd, + pr_warn("program '%s': failed to enable pfd %d: %s\n", + bpf_program__title(prog, false), pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return ERR_PTR(err); } @@ -5101,9 +5096,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, type = uprobe ? determine_uprobe_perf_type() : determine_kprobe_perf_type(); if (type < 0) { - pr_warning("failed to determine %s perf type: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + pr_warn("failed to determine %s perf type: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(type, errmsg, sizeof(errmsg))); return type; } if (retprobe) { @@ -5111,10 +5106,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, : determine_kprobe_retprobe_bit(); if (bit < 0) { - pr_warning("failed to determine %s retprobe bit: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, - sizeof(errmsg))); + pr_warn("failed to determine %s retprobe bit: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); return bit; } attr.config |= 1 << bit; @@ -5131,9 +5125,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warning("%s perf_event_open() failed: %s\n", - uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("%s perf_event_open() failed: %s\n", + uprobe ? "uprobe" : "kprobe", + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return err; } return pfd; @@ -5150,20 +5144,20 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name, 0 /* offset */, -1 /* pid */); if (pfd < 0) { - pr_warning("program '%s': failed to create %s '%s' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to create %s '%s' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link = bpf_program__attach_perf_event(prog, pfd); if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warning("program '%s': failed to attach to %s '%s': %s\n", - bpf_program__title(prog, false), - retprobe ? "kretprobe" : "kprobe", func_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to %s '%s': %s\n", + bpf_program__title(prog, false), + retprobe ? "kretprobe" : "kprobe", func_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } return link; @@ -5181,22 +5175,22 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, pfd = perf_event_open_probe(true /* uprobe */, retprobe, binary_path, func_offset, pid); if (pfd < 0) { - pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", - binary_path, func_offset, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link = bpf_program__attach_perf_event(prog, pfd); if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n", - bpf_program__title(prog, false), - retprobe ? "uretprobe" : "uprobe", - binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n", + bpf_program__title(prog, false), + retprobe ? "uretprobe" : "uprobe", + binary_path, func_offset, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } return link; @@ -5230,9 +5224,9 @@ static int perf_event_open_tracepoint(const char *tp_category, tp_id = determine_tracepoint_id(tp_category, tp_name); if (tp_id < 0) { - pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n", - tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", + tp_category, tp_name, + libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); return tp_id; } @@ -5244,9 +5238,9 @@ static int perf_event_open_tracepoint(const char *tp_category, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n", - tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return err; } return pfd; @@ -5262,20 +5256,20 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, pfd = perf_event_open_tracepoint(tp_category, tp_name); if (pfd < 0) { - pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link = bpf_program__attach_perf_event(prog, pfd); if (IS_ERR(link)) { close(pfd); err = PTR_ERR(link); - pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n", - bpf_program__title(prog, false), - tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n", + bpf_program__title(prog, false), + tp_category, tp_name, + libbpf_strerror_r(err, errmsg, sizeof(errmsg))); return link; } return link; @@ -5297,8 +5291,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, prog_fd = bpf_program__fd(prog); if (prog_fd < 0) { - pr_warning("program '%s': can't attach before loaded\n", - bpf_program__title(prog, false)); + pr_warn("program '%s': can't attach before loaded\n", + bpf_program__title(prog, false)); return ERR_PTR(-EINVAL); } @@ -5311,9 +5305,9 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, if (pfd < 0) { pfd = -errno; free(link); - pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n", - bpf_program__title(prog, false), tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", + bpf_program__title(prog, false), tp_name, + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); return ERR_PTR(pfd); } link->fd = pfd; @@ -5415,7 +5409,7 @@ static void perf_buffer__free_cpu_buf(struct perf_buffer *pb, return; if (cpu_buf->base && munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) - pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); + pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); if (cpu_buf->fd >= 0) { ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0); close(cpu_buf->fd); @@ -5465,8 +5459,8 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, -1, PERF_FLAG_FD_CLOEXEC); if (cpu_buf->fd < 0) { err = -errno; - pr_warning("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to open perf buffer event on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5476,15 +5470,15 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, if (cpu_buf->base == MAP_FAILED) { cpu_buf->base = NULL; err = -errno; - pr_warning("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; - pr_warning("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", + cpu, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5544,8 +5538,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, int err, i; if (page_cnt & (page_cnt - 1)) { - pr_warning("page count should be power of two, but is %zu\n", - page_cnt); + pr_warn("page count should be power of two, but is %zu\n", + page_cnt); return ERR_PTR(-EINVAL); } @@ -5553,14 +5547,14 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len); if (err) { err = -errno; - pr_warning("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to get map info for map FD %d: %s\n", + map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); return ERR_PTR(err); } if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { - pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", - map.name); + pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", + map.name); return ERR_PTR(-EINVAL); } @@ -5580,8 +5574,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (pb->epoll_fd < 0) { err = -errno; - pr_warning("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to create epoll instance: %s\n", + libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5600,13 +5594,13 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events)); if (!pb->events) { err = -ENOMEM; - pr_warning("failed to allocate events: out of memory\n"); + pr_warn("failed to allocate events: out of memory\n"); goto error; } pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs)); if (!pb->cpu_bufs) { err = -ENOMEM; - pr_warning("failed to allocate buffers: out of memory\n"); + pr_warn("failed to allocate buffers: out of memory\n"); goto error; } @@ -5629,9 +5623,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, &cpu_buf->fd, 0); if (err) { err = -errno; - pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n", - cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", + cpu, map_key, cpu_buf->fd, + libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } @@ -5640,9 +5634,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, &pb->events[i]) < 0) { err = -errno; - pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n", - cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", + cpu, cpu_buf->fd, + libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } } @@ -5695,7 +5689,7 @@ perf_buffer__process_record(struct perf_event_header *e, void *ctx) break; } default: - pr_warning("unknown perf sample type %d\n", e->type); + pr_warn("unknown perf sample type %d\n", e->type); return LIBBPF_PERF_EVENT_ERROR; } return LIBBPF_PERF_EVENT_CONT; @@ -5725,7 +5719,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warning("error while processing records: %d\n", err); + pr_warn("error while processing records: %d\n", err); return err; } } @@ -5922,13 +5916,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays) v2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->count_offset); if (v1 != v2) - pr_warning("%s: mismatch in element count\n", __func__); + pr_warn("%s: mismatch in element count\n", __func__); v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); v2 = bpf_prog_info_read_offset_u32(&info_linear->info, desc->size_offset); if (v1 != v2) - pr_warning("%s: mismatch in rec size\n", __func__); + pr_warn("%s: mismatch in rec size\n", __func__); } /* step 7: update info_len and data_len */ @@ -5996,20 +5990,19 @@ int libbpf_num_possible_cpus(void) fd = open(fcpu, O_RDONLY); if (fd < 0) { error = errno; - pr_warning("Failed to open file %s: %s\n", - fcpu, strerror(error)); + pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); return -error; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { error = len ? errno : EINVAL; - pr_warning("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); + pr_warn("Failed to read # of possible cpus from %s: %s\n", + fcpu, strerror(error)); return -error; } if (len == sizeof(buf)) { - pr_warning("File %s size overflow\n", fcpu); + pr_warn("File %s size overflow\n", fcpu); return -EOVERFLOW; } buf[len] = '\0'; @@ -6020,8 +6013,8 @@ int libbpf_num_possible_cpus(void) buf[ir] = '\0'; n = sscanf(&buf[il], "%u-%u", &start, &end); if (n <= 0) { - pr_warning("Failed to get # CPUs from %s\n", - &buf[il]); + pr_warn("Failed to get # CPUs from %s\n", + &buf[il]); return -EINVAL; } else if (n == 1) { end = start; @@ -6031,7 +6024,7 @@ int libbpf_num_possible_cpus(void) } } if (tmp_cpus <= 0) { - pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); + pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); return -EINVAL; } diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 5bfe85d4f8aa..b2880766e6b9 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -43,7 +43,7 @@ do { \ libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ } while (0) -#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_warn(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) #define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) #define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) @@ -52,7 +52,7 @@ static inline bool libbpf_validate_opts(const char *opts, const char *type_name) { if (user_sz < sizeof(size_t)) { - pr_warning("%s size (%zu) is too small\n", type_name, user_sz); + pr_warn("%s size (%zu) is too small\n", type_name, user_sz); return false; } if (user_sz > opts_sz) { @@ -60,8 +60,8 @@ static inline bool libbpf_validate_opts(const char *opts, for (i = opts_sz; i < user_sz; i++) { if (opts[i]) { - pr_warning("%s has non-zero extra bytes", - type_name); + pr_warn("%s has non-zero extra bytes", + type_name); return false; } } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index b0f532544c91..78665005b6f7 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -311,7 +311,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) "LGPL-2.1 or BSD-2-Clause", 0, log_buf, log_buf_size); if (prog_fd < 0) { - pr_warning("BPF log buffer:\n%s", log_buf); + pr_warn("BPF log buffer:\n%s", log_buf); return prog_fd; } @@ -499,7 +499,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, return -EFAULT; if (umem->refcount) { - pr_warning("Error: shared umems not supported by libbpf.\n"); + pr_warn("Error: shared umems not supported by libbpf.\n"); return -EBUSY; } From bc3f2956f2b2241aac339d0a1ab7e3a0b8fcc886 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 20 Oct 2019 20:38:56 -0700 Subject: [PATCH 35/52] tools: Sync if_link.h Sync if_link.h into tools/ and get rid of annoying libbpf Makefile warning. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191021033902.3856966-2-andriin@fb.com --- tools/include/uapi/linux/if_link.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 4a8c02cafa9a..8aec8769d944 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -167,6 +167,8 @@ enum { IFLA_NEW_IFINDEX, IFLA_MIN_MTU, IFLA_MAX_MTU, + IFLA_PROP_LIST, + IFLA_ALT_IFNAME, /* Alternative ifname */ __IFLA_MAX }; From f1eead9e3ceef67b98be4b55ed1bfcfa4497b7db Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 20 Oct 2019 20:38:57 -0700 Subject: [PATCH 36/52] libbpf: Add bpf_program__get_{type, expected_attach_type) APIs There are bpf_program__set_type() and bpf_program__set_expected_attach_type(), but no corresponding getters, which seems rather incomplete. Fix this. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191021033902.3856966-3-andriin@fb.com --- tools/lib/bpf/libbpf.c | 11 +++++++++++ tools/lib/bpf/libbpf.h | 5 +++++ tools/lib/bpf/libbpf.map | 2 ++ 3 files changed, 18 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 31c8acddc17b..86fc2f0f8b33 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4463,6 +4463,11 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n) return fd; } +enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog) +{ + return prog->type; +} + void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type) { prog->type = type; @@ -4497,6 +4502,12 @@ BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT); BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP); BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog) +{ + return prog->expected_attach_type; +} + void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 53ce212764e0..0fdf086beba7 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -302,8 +302,13 @@ LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog); LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog); LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog); LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); + +LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog); LIBBPF_API void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type); + +LIBBPF_API enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog); LIBBPF_API void bpf_program__set_expected_attach_type(struct bpf_program *prog, enum bpf_attach_type type); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 4d241fd92dd4..d1473ea4d7a5 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -195,4 +195,6 @@ LIBBPF_0.0.6 { global: bpf_object__open_file; bpf_object__open_mem; + bpf_program__get_expected_attach_type; + bpf_program__get_type; } LIBBPF_0.0.5; From 32dff6db29acc1d2b9fe0423ab033f15c717d776 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 20 Oct 2019 20:38:58 -0700 Subject: [PATCH 37/52] libbpf: Add uprobe/uretprobe and tp/raw_tp section suffixes Map uprobe/uretprobe into KPROBE program type. tp/raw_tp are just an alias for more verbose tracepoint/raw_tracepoint, respectively. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191021033902.3856966-4-andriin@fb.com --- tools/lib/bpf/libbpf.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 86fc2f0f8b33..a027fbf89966 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4547,11 +4547,15 @@ static const struct { } section_names[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), + BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), + BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_RAW_TRACEPOINT), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), From dd4436bb838338cfda253d7f012610a73e4078fd Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 20 Oct 2019 20:38:59 -0700 Subject: [PATCH 38/52] libbpf: Teach bpf_object__open to guess program types Teach bpf_object__open how to guess program type and expected attach type from section names, similar to what bpf_prog_load() does. This seems like a really useful features and an oversight to not have this done during bpf_object_open(). To preserver backwards compatible behavior of bpf_prog_load(), its attr->prog_type is treated as an override of bpf_object__open() decisions, if attr->prog_type is not UNSPECIFIED. There is a slight difference in behavior for bpf_prog_load(). Previously, if bpf_prog_load() was loading BPF object with more than one program, first program's guessed program type and expected attach type would determine corresponding attributes of all the subsequent program types, even if their sections names suggest otherwise. That seems like a rather dubious behavior and with this change it will behave more sanely: each program's type is determined individually, unless they are forced to uniformity through attr->prog_type. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191021033902.3856966-5-andriin@fb.com --- tools/lib/bpf/libbpf.c | 65 +++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index a027fbf89966..803219d6898c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3611,6 +3611,7 @@ static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, struct bpf_object_open_opts *opts) { + struct bpf_program *prog; struct bpf_object *obj; const char *obj_name; char tmp_name[64]; @@ -3650,8 +3651,24 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, CHECK_ERR(bpf_object__probe_caps(obj), err, out); CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps), err, out); CHECK_ERR(bpf_object__collect_reloc(obj), err, out); - bpf_object__elf_finish(obj); + + bpf_object__for_each_program(prog, obj) { + enum bpf_prog_type prog_type; + enum bpf_attach_type attach_type; + + err = libbpf_prog_type_by_name(prog->section_name, &prog_type, + &attach_type); + if (err == -ESRCH) + /* couldn't guess, but user might manually specify */ + continue; + if (err) + goto out; + + bpf_program__set_type(prog, prog_type); + bpf_program__set_expected_attach_type(prog, attach_type); + } + return obj; out: bpf_object__close(obj); @@ -4691,7 +4708,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, free(type_names); } - return -EINVAL; + return -ESRCH; } int libbpf_attach_type_by_name(const char *name, @@ -4721,15 +4738,6 @@ int libbpf_attach_type_by_name(const char *name, return -EINVAL; } -static int -bpf_program__identify_section(struct bpf_program *prog, - enum bpf_prog_type *prog_type, - enum bpf_attach_type *expected_attach_type) -{ - return libbpf_prog_type_by_name(prog->section_name, prog_type, - expected_attach_type); -} - int bpf_map__fd(const struct bpf_map *map) { return map ? map->fd : -EINVAL; @@ -4897,8 +4905,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, { struct bpf_object_open_attr open_attr = {}; struct bpf_program *prog, *first_prog = NULL; - enum bpf_attach_type expected_attach_type; - enum bpf_prog_type prog_type; struct bpf_object *obj; struct bpf_map *map; int err; @@ -4916,26 +4922,27 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, return -ENOENT; bpf_object__for_each_program(prog, obj) { + enum bpf_attach_type attach_type = attr->expected_attach_type; /* - * If type is not specified, try to guess it based on - * section name. + * to preserve backwards compatibility, bpf_prog_load treats + * attr->prog_type, if specified, as an override to whatever + * bpf_object__open guessed */ - prog_type = attr->prog_type; - prog->prog_ifindex = attr->ifindex; - expected_attach_type = attr->expected_attach_type; - if (prog_type == BPF_PROG_TYPE_UNSPEC) { - err = bpf_program__identify_section(prog, &prog_type, - &expected_attach_type); - if (err < 0) { - bpf_object__close(obj); - return -EINVAL; - } + if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { + bpf_program__set_type(prog, attr->prog_type); + bpf_program__set_expected_attach_type(prog, + attach_type); + } + if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { + /* + * we haven't guessed from section name and user + * didn't provide a fallback type, too bad... + */ + bpf_object__close(obj); + return -EINVAL; } - bpf_program__set_type(prog, prog_type); - bpf_program__set_expected_attach_type(prog, - expected_attach_type); - + prog->prog_ifindex = attr->ifindex; prog->log_level = attr->log_level; prog->prog_flags = attr->prog_flags; if (!first_prog) From f90415e9600c5227131531c0ed11514a2d3bbe62 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 20 Oct 2019 20:39:00 -0700 Subject: [PATCH 39/52] selftests/bpf: Make a copy of subtest name test_progs never created a copy of subtest name, rather just stored pointer to whatever string test provided. This is bad as that string might be freed or modified by the end of subtest. Fix this by creating a copy of given subtest name when subtest starts. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191021033902.3856966-6-andriin@fb.com --- tools/testing/selftests/bpf/test_progs.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c7e52f4194e2..a05a807840c0 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -20,7 +20,7 @@ struct prog_test_def { bool tested; bool need_cgroup_cleanup; - const char *subtest_name; + char *subtest_name; int subtest_num; /* store counts before subtest started */ @@ -81,16 +81,17 @@ void test__end_subtest() fprintf(env.stdout, "#%d/%d %s:%s\n", test->test_num, test->subtest_num, test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); + + free(test->subtest_name); + test->subtest_name = NULL; } bool test__start_subtest(const char *name) { struct prog_test_def *test = env.test; - if (test->subtest_name) { + if (test->subtest_name) test__end_subtest(); - test->subtest_name = NULL; - } test->subtest_num++; @@ -104,7 +105,13 @@ bool test__start_subtest(const char *name) if (!should_run(&env.subtest_selector, test->subtest_num, name)) return false; - test->subtest_name = name; + test->subtest_name = strdup(name); + if (!test->subtest_name) { + fprintf(env.stderr, + "Subtest #%d: failed to copy subtest name!\n", + test->subtest_num); + return false; + } env.test->old_error_cnt = env.test->error_cnt; return true; From 8af1c8b8d6223c31fada6148fd870257407952d1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 20 Oct 2019 20:39:01 -0700 Subject: [PATCH 40/52] selftests/bpf: Make reference_tracking test use subtests reference_tracking is actually a set of 9 sub-tests. Make it explicitly so. Also, add explicit "classifier/" prefix to BPF program section names to let libbpf correctly guess program type. Thus, also remove explicit bpf_prog__set_type() call. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191021033902.3856966-7-andriin@fb.com --- .../bpf/prog_tests/reference_tracking.c | 3 ++- .../selftests/bpf/progs/test_sk_lookup_kern.c | 18 +++++++++--------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 86cee820d4d3..4493ba277bd7 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -31,7 +31,8 @@ void test_reference_tracking(void) if (strstr(title, ".text") != NULL) continue; - bpf_program__set_type(prog, BPF_PROG_TYPE_SCHED_CLS); + if (!test__start_subtest(title)) + continue; /* Expect verifier failure if test name has 'fail' */ if (strstr(title, "fail") != NULL) { diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c index e21cd736c196..cb49ccb707d1 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c @@ -53,7 +53,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off, return result; } -SEC("sk_lookup_success") +SEC("classifier/sk_lookup_success") int bpf_sk_lookup_test0(struct __sk_buff *skb) { void *data_end = (void *)(long)skb->data_end; @@ -78,7 +78,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb) return sk ? TC_ACT_OK : TC_ACT_UNSPEC; } -SEC("sk_lookup_success_simple") +SEC("classifier/sk_lookup_success_simple") int bpf_sk_lookup_test1(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -90,7 +90,7 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb) return 0; } -SEC("fail_use_after_free") +SEC("classifier/fail_use_after_free") int bpf_sk_lookup_uaf(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -105,7 +105,7 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb) return family; } -SEC("fail_modify_sk_pointer") +SEC("classifier/fail_modify_sk_pointer") int bpf_sk_lookup_modptr(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -120,7 +120,7 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb) return 0; } -SEC("fail_modify_sk_or_null_pointer") +SEC("classifier/fail_modify_sk_or_null_pointer") int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -134,7 +134,7 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb) return 0; } -SEC("fail_no_release") +SEC("classifier/fail_no_release") int bpf_sk_lookup_test2(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -143,7 +143,7 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb) return 0; } -SEC("fail_release_twice") +SEC("classifier/fail_release_twice") int bpf_sk_lookup_test3(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -155,7 +155,7 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb) return 0; } -SEC("fail_release_unchecked") +SEC("classifier/fail_release_unchecked") int bpf_sk_lookup_test4(struct __sk_buff *skb) { struct bpf_sock_tuple tuple = {}; @@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb) bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0); } -SEC("fail_no_release_subcall") +SEC("classifier/fail_no_release_subcall") int bpf_sk_lookup_test5(struct __sk_buff *skb) { lookup_no_release(skb); From 1678e33c21b705e9e5d26385aa1611aabe5482dc Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 20 Oct 2019 20:39:02 -0700 Subject: [PATCH 41/52] selftest/bpf: Get rid of a bunch of explicit BPF program type setting Now that libbpf can correctly guess BPF program types from section names, remove a bunch of explicit bpf_program__set_type() calls throughout tests. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20191021033902.3856966-8-andriin@fb.com --- tools/testing/selftests/bpf/prog_tests/attach_probe.c | 5 ----- tools/testing/selftests/bpf/prog_tests/core_reloc.c | 1 - tools/testing/selftests/bpf/prog_tests/rdonly_maps.c | 4 ---- tools/testing/selftests/bpf/test_maps.c | 4 ---- 4 files changed, 14 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 4f50d32c4abb..0ee1ce100a4a 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -99,11 +99,6 @@ void test_attach_probe(void) "prog '%s' not found\n", uretprobe_name)) goto cleanup; - bpf_program__set_kprobe(kprobe_prog); - bpf_program__set_kprobe(kretprobe_prog); - bpf_program__set_kprobe(uprobe_prog); - bpf_program__set_kprobe(uretprobe_prog); - /* create maps && load programs */ err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 2b3586dc6c86..523dca82dc82 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -391,7 +391,6 @@ void test_core_reloc(void) if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name)) goto cleanup; - bpf_program__set_type(prog, BPF_PROG_TYPE_RAW_TRACEPOINT); load_attr.obj = obj; load_attr.log_level = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index 9bf9de0aaeea..d90acc13d1ec 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -36,10 +36,6 @@ void test_rdonly_maps(void) if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj))) return; - bpf_object__for_each_program(prog, obj) { - bpf_program__set_raw_tracepoint(prog); - } - err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) goto cleanup; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 806b298397d3..02eae1e864c2 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1142,7 +1142,6 @@ out_sockmap: #define MAPINMAP_PROG "./test_map_in_map.o" static void test_map_in_map(void) { - struct bpf_program *prog; struct bpf_object *obj; struct bpf_map *map; int mim_fd, fd, err; @@ -1179,9 +1178,6 @@ static void test_map_in_map(void) goto out_map_in_map; } - bpf_object__for_each_program(prog, obj) { - bpf_program__set_xdp(prog); - } bpf_object__load(obj); map = bpf_object__find_map_by_name(obj, "mim_array"); From e13a2fe642bd4a42c2b468cdb25ad3aab933d572 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 21 Oct 2019 21:31:19 -0700 Subject: [PATCH 42/52] tools/bpf: Turn on llvm alu32 attribute by default LLVM alu32 was introduced in LLVM7: https://reviews.llvm.org/rL325987 https://reviews.llvm.org/rL325989 Experiments showed that in general performance is better with alu32 enabled: https://lwn.net/Articles/775316/ This patch turns on alu32 with no-flavor test_progs which is tested most often. The flavor test at no_alu32/test_progs can be used to test without alu32 enabled. The Makefile check for whether LLVM supports '-mattr=+alu32 -mcpu=v3' is removed as LLVM7 should be available for recent distributions and also latest LLVM is preferred to run BPF selftests. Note that jmp32 is checked by -mcpu=probe and will be enabled if the host kernel supports it. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191022043119.2625263-1-yhs@fb.com --- tools/testing/selftests/bpf/Makefile | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4ff5f4aada08..11ff34e7311b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -30,17 +30,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage test_select_reuseport test_section_names \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_cgroup_attach xdping - -# Also test sub-register code-gen if LLVM has eBPF v3 processor support which -# contains both ALU32 and JMP32 instructions. -SUBREG_CODEGEN := $(shell echo "int cal(int a) { return a > 0; }" | \ - $(CLANG) -target bpf -O2 -emit-llvm -S -x c - -o - | \ - $(LLC) -mattr=+alu32 -mcpu=v3 2>&1 | \ - grep 'if w') -ifneq ($(SUBREG_CODEGEN),) -TEST_GEN_PROGS += test_progs-alu32 -endif + test_cgroup_attach xdping test_progs-no_alu32 # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) @@ -179,7 +169,7 @@ endef # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. # Parameters: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., alu32, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc) define DEFINE_TEST_RUNNER TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 @@ -201,7 +191,7 @@ endef # Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and # set up by DEFINE_TEST_RUNNER itself, create test runner build rules with: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., alu32, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc) define DEFINE_TEST_RUNNER_RULES ifeq ($($(TRUNNER_OUTPUT)-dir),) @@ -272,14 +262,12 @@ TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \ $(wildcard progs/btf_dump_test_case_*.c) TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := -I. -I$(OUTPUT) $(BPF_CFLAGS) $(CLANG_CFLAGS) -TRUNNER_BPF_LDFLAGS := +TRUNNER_BPF_LDFLAGS := -mattr=+alu32 $(eval $(call DEFINE_TEST_RUNNER,test_progs)) -# Define test_progs-alu32 test runner. -ifneq ($(SUBREG_CODEGEN),) -TRUNNER_BPF_LDFLAGS += -mattr=+alu32 -$(eval $(call DEFINE_TEST_RUNNER,test_progs,alu32)) -endif +# Define test_progs-no_alu32 test runner. +TRUNNER_BPF_LDFLAGS := +$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) # Define test_progs BPF-GCC-flavored test runner. ifneq ($(BPF_GCC),) @@ -319,4 +307,4 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ - feature $(OUTPUT)/*.o $(OUTPUT)/alu32 $(OUTPUT)/bpf_gcc + feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc From e00aca65e646da08f8dce31c9b89f11dab76198c Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 22 Oct 2019 10:21:00 -0700 Subject: [PATCH 43/52] libbpf: Make DECLARE_LIBBPF_OPTS macro strictly a variable declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LIBBPF_OPTS is implemented as a mix of field declaration and memset + assignment. This makes it neither variable declaration nor purely statements, which is a problem, because you can't mix it with either other variable declarations nor other function statements, because C90 compiler mode emits warning on mixing all that together. This patch changes LIBBPF_OPTS into a strictly declaration of variable and solves this problem, as can be seen in case of bpftool, which previously would emit compiler warning, if done this way (LIBBPF_OPTS as part of function variables declaration block). This patch also renames LIBBPF_OPTS into DECLARE_LIBBPF_OPTS to follow kernel convention for similar macros more closely. v1->v2: - rename LIBBPF_OPTS into DECLARE_LIBBPF_OPTS (Jakub Sitnicki). Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191022172100.3281465-1-andriin@fb.com --- tools/bpf/bpftool/prog.c | 8 ++++---- tools/lib/bpf/libbpf.c | 4 ++-- tools/lib/bpf/libbpf.h | 19 ++++++++++++------- .../selftests/bpf/prog_tests/attach_probe.c | 2 +- .../selftests/bpf/prog_tests/core_reloc.c | 2 +- .../bpf/prog_tests/reference_tracking.c | 2 +- 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 27da96a797ab..4535c863d2cd 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1091,8 +1091,11 @@ free_data_in: static int load_with_options(int argc, char **argv, bool first_prog_only) { - struct bpf_object_load_attr load_attr = { 0 }; enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, + .relaxed_maps = relaxed_maps, + ); + struct bpf_object_load_attr load_attr = { 0 }; enum bpf_attach_type expected_attach_type; struct map_replace *map_replace = NULL; struct bpf_program *prog = NULL, *pos; @@ -1106,9 +1109,6 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) const char *file; int idx, err; - LIBBPF_OPTS(bpf_object_open_opts, open_opts, - .relaxed_maps = relaxed_maps, - ); if (!REQ_ARGS(2)) return -1; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 803219d6898c..8b4d765c422b 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -3678,7 +3678,7 @@ out: static struct bpf_object * __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags) { - LIBBPF_OPTS(bpf_object_open_opts, opts, + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .relaxed_maps = flags & MAPS_RELAX_COMPAT, ); @@ -3730,7 +3730,7 @@ struct bpf_object * bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, const char *name) { - LIBBPF_OPTS(bpf_object_open_opts, opts, + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .object_name = name, /* wrong default, but backwards-compatible */ .relaxed_maps = true, diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0fdf086beba7..c63e2ff84abc 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -75,14 +75,19 @@ struct bpf_object_open_attr { * have all the padding bytes initialized to zero. It's not guaranteed though, * when copying literal, that compiler won't copy garbage in literal's padding * bytes, but that's the best way I've found and it seems to work in practice. + * + * Macro declares opts struct of given type and name, zero-initializes, + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. */ -#define LIBBPF_OPTS(TYPE, NAME, ...) \ - struct TYPE NAME; \ - memset(&NAME, 0, sizeof(struct TYPE)); \ - NAME = (struct TYPE) { \ - .sz = sizeof(struct TYPE), \ - __VA_ARGS__ \ - } +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME = ({ \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + (struct TYPE) { \ + .sz = sizeof(struct TYPE), \ + __VA_ARGS__ \ + }; \ + }) struct bpf_object_open_opts { /* size of this struct, for forward/backward compatiblity */ diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index 0ee1ce100a4a..a83111a32d4a 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -50,7 +50,7 @@ void test_attach_probe(void) const int kprobe_idx = 0, kretprobe_idx = 1; const int uprobe_idx = 2, uretprobe_idx = 3; const char *obj_name = "attach_probe"; - LIBBPF_OPTS(bpf_object_open_opts, open_opts, + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = obj_name, .relaxed_maps = true, ); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 523dca82dc82..09dfa75fe948 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -377,7 +377,7 @@ void test_core_reloc(void) if (!test__start_subtest(test_case->case_name)) continue; - LIBBPF_OPTS(bpf_object_open_opts, opts, + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .relaxed_core_relocs = test_case->relaxed_core_relocs, ); diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index 4493ba277bd7..fc0d7f4f02cf 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -5,7 +5,7 @@ void test_reference_tracking(void) { const char *file = "test_sk_lookup_kern.o"; const char *obj_name = "ref_track"; - LIBBPF_OPTS(bpf_object_open_opts, open_opts, + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = obj_name, .relaxed_maps = true, ); From 5e5b03d163e15a40b0fa57c70b4e8edd549b0b98 Mon Sep 17 00:00:00 2001 From: "Ben Dooks (Codethink)" Date: Tue, 22 Oct 2019 13:59:25 +0100 Subject: [PATCH 44/52] xdp: Fix type of string pointer in __XDP_ACT_SYM_TAB The table entry in __XDP_ACT_SYM_TAB for the last item is set to { -1, 0 } where it should be { -1, NULL } as the second item is a pointer to a string. Fixes the following sparse warnings: ./include/trace/events/xdp.h:28:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:53:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:82:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:140:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:155:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:190:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:225:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:260:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:318:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:356:1: warning: Using plain integer as NULL pointer ./include/trace/events/xdp.h:390:1: warning: Using plain integer as NULL pointer Signed-off-by: Ben Dooks (Codethink) Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20191022125925.10508-1-ben.dooks@codethink.co.uk --- include/trace/events/xdp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 8c8420230a10..c7e3c9c5bad3 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -22,7 +22,7 @@ #define __XDP_ACT_SYM_FN(x) \ { XDP_##x, #x }, #define __XDP_ACT_SYM_TAB \ - __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, 0 } + __XDP_ACT_MAP(__XDP_ACT_SYM_FN) { -1, NULL } __XDP_ACT_MAP(__XDP_ACT_TP_FN) TRACE_EVENT(xdp_exception, From d7d962a095474fcd94861d5f4cccada2e4215aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= Date: Tue, 22 Oct 2019 09:22:06 +0200 Subject: [PATCH 45/52] libbpf: Use implicit XSKMAP lookup from AF_XDP XDP program MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit 43e74c0267a3 ("bpf_xdp_redirect_map: Perform map lookup in eBPF helper") the bpf_redirect_map() helper learned to do map lookup, which means that the explicit lookup in the XDP program for AF_XDP is not needed for post-5.3 kernels. This commit adds the implicit map lookup with default action, which improves the performance for the "rx_drop" [1] scenario with ~4%. For pre-5.3 kernels, the bpf_redirect_map() returns XDP_ABORTED, and a fallback path for backward compatibility is entered, where explicit lookup is still performed. This means a slight regression for older kernels (an additional bpf_redirect_map() call), but I consider that a fair punishment for users not upgrading their kernels. ;-) v1->v2: Backward compatibility (Toke) [2] v2->v3: Avoid masking/zero-extension by using JMP32 [3] [1] # xdpsock -i eth0 -z -r [2] https://lore.kernel.org/bpf/87pnirb3dc.fsf@toke.dk/ [3] https://lore.kernel.org/bpf/87v9sip0i8.fsf@toke.dk/ Suggested-by: Toke Høiland-Jørgensen Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/20191022072206.6318-1-bjorn.topel@gmail.com --- tools/lib/bpf/xsk.c | 46 +++++++++++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 78665005b6f7..9a2af445ef23 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -274,33 +274,55 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk) /* This is the C-program: * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) * { - * int index = ctx->rx_queue_index; + * int ret, index = ctx->rx_queue_index; * * // A set entry here means that the correspnding queue_id * // has an active AF_XDP socket bound to it. + * ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); + * if (ret > 0) + * return ret; + * + * // Fallback for pre-5.3 kernels, not supporting default + * // action in the flags parameter. * if (bpf_map_lookup_elem(&xsks_map, &index)) * return bpf_redirect_map(&xsks_map, index, 0); - * * return XDP_PASS; * } */ struct bpf_insn prog[] = { - /* r1 = *(u32 *)(r1 + 16) */ - BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), - /* *(u32 *)(r10 - 4) = r1 */ - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r2 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r2 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), + /* r1 = xskmap[] */ BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_3, 2), + /* call bpf_redirect_map */ + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* if w0 != 0 goto pc+13 */ + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), + /* r2 = r10 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + /* r2 += -4 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* call bpf_map_lookup_elem */ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + /* r1 = r0 */ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), - BPF_MOV32_IMM(BPF_REG_0, 2), - /* if r1 == 0 goto +5 */ + /* r0 = XDP_PASS */ + BPF_MOV64_IMM(BPF_REG_0, 2), + /* if r1 == 0 goto pc+5 */ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), /* r2 = *(u32 *)(r10 - 4) */ - BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), - BPF_MOV32_IMM(BPF_REG_3, 0), + /* r1 = xskmap[] */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + /* r3 = 0 */ + BPF_MOV64_IMM(BPF_REG_3, 0), + /* call bpf_redirect_map */ BPF_EMIT_CALL(BPF_FUNC_redirect_map), /* The jumps are to this instruction */ BPF_EXIT_INSN(), From 9bc6384b364407381cc24c2150d13dd29f5bfdd2 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 22 Oct 2019 23:09:13 -0700 Subject: [PATCH 46/52] selftests/bpf: Move test_section_names into test_progs and fix it Make test_section_names into test_progs test. Also fix ESRCH expected results. Add uprobe/uretprobe and tp/raw_tp test cases. Fixes: dd4436bb8383 ("libbpf: Teach bpf_object__open to guess program types") Reported-by: kernel test robot Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191023060913.1713817-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 2 +- .../section_names.c} | 90 +++++++------------ 2 files changed, 31 insertions(+), 61 deletions(-) rename tools/testing/selftests/bpf/{test_section_names.c => prog_tests/section_names.c} (73%) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 11ff34e7311b..521fbdada5cc 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -28,7 +28,7 @@ LDLIBS += -lcap -lelf -lrt -lpthread TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ - test_cgroup_storage test_select_reuseport test_section_names \ + test_cgroup_storage test_select_reuseport \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ test_cgroup_attach xdping test_progs-no_alu32 diff --git a/tools/testing/selftests/bpf/test_section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c similarity index 73% rename from tools/testing/selftests/bpf/test_section_names.c rename to tools/testing/selftests/bpf/prog_tests/section_names.c index 29833aeaf0de..9d9351dc2ded 100644 --- a/tools/testing/selftests/bpf/test_section_names.c +++ b/tools/testing/selftests/bpf/prog_tests/section_names.c @@ -1,10 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2018 Facebook +#include -#include -#include - -#include "bpf_util.h" +static int duration = 0; struct sec_name_test { const char sec_name[32]; @@ -20,19 +18,23 @@ struct sec_name_test { }; static struct sec_name_test tests[] = { - {"InvAliD", {-EINVAL, 0, 0}, {-EINVAL, 0} }, - {"cgroup", {-EINVAL, 0, 0}, {-EINVAL, 0} }, + {"InvAliD", {-ESRCH, 0, 0}, {-EINVAL, 0} }, + {"cgroup", {-ESRCH, 0, 0}, {-EINVAL, 0} }, {"socket", {0, BPF_PROG_TYPE_SOCKET_FILTER, 0}, {-EINVAL, 0} }, {"kprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"uprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, {"kretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, + {"uretprobe/", {0, BPF_PROG_TYPE_KPROBE, 0}, {-EINVAL, 0} }, {"classifier", {0, BPF_PROG_TYPE_SCHED_CLS, 0}, {-EINVAL, 0} }, {"action", {0, BPF_PROG_TYPE_SCHED_ACT, 0}, {-EINVAL, 0} }, {"tracepoint/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} }, + {"tp/", {0, BPF_PROG_TYPE_TRACEPOINT, 0}, {-EINVAL, 0} }, { "raw_tracepoint/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0}, }, + {"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} }, {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} }, {"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} }, {"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} }, @@ -146,7 +148,7 @@ static struct sec_name_test tests[] = { }, }; -static int test_prog_type_by_name(const struct sec_name_test *test) +static void test_prog_type_by_name(const struct sec_name_test *test) { enum bpf_attach_type expected_attach_type; enum bpf_prog_type prog_type; @@ -155,79 +157,47 @@ static int test_prog_type_by_name(const struct sec_name_test *test) rc = libbpf_prog_type_by_name(test->sec_name, &prog_type, &expected_attach_type); - if (rc != test->expected_load.rc) { - warnx("prog: unexpected rc=%d for %s", rc, test->sec_name); - return -1; - } + CHECK(rc != test->expected_load.rc, "check_code", + "prog: unexpected rc=%d for %s", rc, test->sec_name); if (rc) - return 0; + return; - if (prog_type != test->expected_load.prog_type) { - warnx("prog: unexpected prog_type=%d for %s", prog_type, - test->sec_name); - return -1; - } + CHECK(prog_type != test->expected_load.prog_type, "check_prog_type", + "prog: unexpected prog_type=%d for %s", + prog_type, test->sec_name); - if (expected_attach_type != test->expected_load.expected_attach_type) { - warnx("prog: unexpected expected_attach_type=%d for %s", - expected_attach_type, test->sec_name); - return -1; - } - - return 0; + CHECK(expected_attach_type != test->expected_load.expected_attach_type, + "check_attach_type", "prog: unexpected expected_attach_type=%d for %s", + expected_attach_type, test->sec_name); } -static int test_attach_type_by_name(const struct sec_name_test *test) +static void test_attach_type_by_name(const struct sec_name_test *test) { enum bpf_attach_type attach_type; int rc; rc = libbpf_attach_type_by_name(test->sec_name, &attach_type); - if (rc != test->expected_attach.rc) { - warnx("attach: unexpected rc=%d for %s", rc, test->sec_name); - return -1; - } + CHECK(rc != test->expected_attach.rc, "check_ret", + "attach: unexpected rc=%d for %s", rc, test->sec_name); if (rc) - return 0; + return; - if (attach_type != test->expected_attach.attach_type) { - warnx("attach: unexpected attach_type=%d for %s", attach_type, - test->sec_name); - return -1; - } - - return 0; + CHECK(attach_type != test->expected_attach.attach_type, + "check_attach_type", "attach: unexpected attach_type=%d for %s", + attach_type, test->sec_name); } -static int run_test_case(const struct sec_name_test *test) +void test_section_names(void) { - if (test_prog_type_by_name(test)) - return -1; - if (test_attach_type_by_name(test)) - return -1; - return 0; -} - -static int run_tests(void) -{ - int passes = 0; - int fails = 0; int i; for (i = 0; i < ARRAY_SIZE(tests); ++i) { - if (run_test_case(&tests[i])) - ++fails; - else - ++passes; - } - printf("Summary: %d PASSED, %d FAILED\n", passes, fails); - return fails ? -1 : 0; -} + struct sec_name_test *test = &tests[i]; -int main(int argc, char **argv) -{ - return run_tests(); + test_prog_type_by_name(test); + test_attach_type_by_name(test); + } } From 45e587b5e8e5f24dd2393f96546b604a38612249 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Wed, 23 Oct 2019 08:31:28 -0700 Subject: [PATCH 47/52] selftests/bpf: Fix LDLIBS order Order of $(LDLIBS) matters to linker, so put it after all the .o and .a files. Fixes: 74b5a5968fe8 ("selftests/bpf: Replace test_progs and test_maps w/ general rule") Reported-by: Daniel Borkmann Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191023153128.3486140-1-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 521fbdada5cc..933f39381039 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -231,14 +231,14 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_BPF_OBJS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) - cd $$(@D) && $$(CC) $$(CFLAGS) $$(LDLIBS) -c $(CURDIR)/$$< -o $$(@F) + cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ %.c \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) - $$(CC) $$(CFLAGS) $$(LDLIBS) -c $$< -o $$@ + $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) @@ -249,7 +249,7 @@ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ | $(TRUNNER_BINARY)-extras - $$(CC) $$(CFLAGS) $$(LDLIBS) $$(filter %.a %.o,$$^) -o $$@ + $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ endef @@ -303,7 +303,7 @@ verifier/tests.h: verifier/*.c echo '#endif' \ ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) - $(CC) $(CFLAGS) $(LDLIBS) $(filter %.a %.o %.c,$^) -o $@ + $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ From e0e4f8e938c48b7c5377661fa3e4738901e6a19b Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 21 Oct 2019 10:57:04 +0200 Subject: [PATCH 48/52] xsk: Improve documentation for AF_XDP Added sections on all the bind flags, libbpf, all the setsockopts and all the getsockopts. Also updated the document to reflect the latest features and to correct some spelling errors. v1 -> v2: * Updated XDP program with latest BTF map format * Added one more FAQ entry * Some minor edits and corrections v2 -> v3: * Simplified XDP_SHARED_UMEM example XDP program Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/1571648224-16889-1-git-send-email-magnus.karlsson@intel.com --- Documentation/networking/af_xdp.rst | 259 +++++++++++++++++++++++++--- 1 file changed, 231 insertions(+), 28 deletions(-) diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index 83f7ae5fc045..7a4caaaf3a17 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -40,13 +40,13 @@ allocates memory for this UMEM using whatever means it feels is most appropriate (malloc, mmap, huge pages, etc). This memory area is then registered with the kernel using the new setsockopt XDP_UMEM_REG. The UMEM also has two rings: the FILL ring and the COMPLETION ring. The -fill ring is used by the application to send down addr for the kernel +FILL ring is used by the application to send down addr for the kernel to fill in with RX packet data. References to these frames will then appear in the RX ring once each packet has been received. The -completion ring, on the other hand, contains frame addr that the +COMPLETION ring, on the other hand, contains frame addr that the kernel has transmitted completely and can now be used again by user space, for either TX or RX. Thus, the frame addrs appearing in the -completion ring are addrs that were previously transmitted using the +COMPLETION ring are addrs that were previously transmitted using the TX ring. In summary, the RX and FILL rings are used for the RX path and the TX and COMPLETION rings are used for the TX path. @@ -91,11 +91,16 @@ Concepts ======== In order to use an AF_XDP socket, a number of associated objects need -to be setup. +to be setup. These objects and their options are explained in the +following sections. -Jonathan Corbet has also written an excellent article on LWN, -"Accelerating networking with AF_XDP". It can be found at -https://lwn.net/Articles/750845/. +For an overview on how AF_XDP works, you can also take a look at the +Linux Plumbers paper from 2018 on the subject: +http://vger.kernel.org/lpc_net2018_talks/lpc18_paper_af_xdp_perf-v2.pdf. Do +NOT consult the paper from 2017 on "AF_PACKET v4", the first attempt +at AF_XDP. Nearly everything changed since then. Jonathan Corbet has +also written an excellent article on LWN, "Accelerating networking +with AF_XDP". It can be found at https://lwn.net/Articles/750845/. UMEM ---- @@ -113,22 +118,22 @@ the next socket B can do this by setting the XDP_SHARED_UMEM flag in struct sockaddr_xdp member sxdp_flags, and passing the file descriptor of A to struct sockaddr_xdp member sxdp_shared_umem_fd. -The UMEM has two single-producer/single-consumer rings, that are used +The UMEM has two single-producer/single-consumer rings that are used to transfer ownership of UMEM frames between the kernel and the user-space application. Rings ----- -There are a four different kind of rings: Fill, Completion, RX and +There are a four different kind of rings: FILL, COMPLETION, RX and TX. All rings are single-producer/single-consumer, so the user-space application need explicit synchronization of multiple processes/threads are reading/writing to them. -The UMEM uses two rings: Fill and Completion. Each socket associated +The UMEM uses two rings: FILL and COMPLETION. Each socket associated with the UMEM must have an RX queue, TX queue or both. Say, that there is a setup with four sockets (all doing TX and RX). Then there will be -one Fill ring, one Completion ring, four TX rings and four RX rings. +one FILL ring, one COMPLETION ring, four TX rings and four RX rings. The rings are head(producer)/tail(consumer) based rings. A producer writes the data ring at the index pointed out by struct xdp_ring @@ -146,7 +151,7 @@ The size of the rings need to be of size power of two. UMEM Fill Ring ~~~~~~~~~~~~~~ -The Fill ring is used to transfer ownership of UMEM frames from +The FILL ring is used to transfer ownership of UMEM frames from user-space to kernel-space. The UMEM addrs are passed in the ring. As an example, if the UMEM is 64k and each chunk is 4k, then the UMEM has 16 chunks and can pass addrs between 0 and 64k. @@ -164,8 +169,8 @@ chunks mode, then the incoming addr will be left untouched. UMEM Completion Ring ~~~~~~~~~~~~~~~~~~~~ -The Completion Ring is used transfer ownership of UMEM frames from -kernel-space to user-space. Just like the Fill ring, UMEM indicies are +The COMPLETION Ring is used transfer ownership of UMEM frames from +kernel-space to user-space. Just like the FILL ring, UMEM indices are used. Frames passed from the kernel to user-space are frames that has been @@ -181,7 +186,7 @@ The RX ring is the receiving side of a socket. Each entry in the ring is a struct xdp_desc descriptor. The descriptor contains UMEM offset (addr) and the length of the data (len). -If no frames have been passed to kernel via the Fill ring, no +If no frames have been passed to kernel via the FILL ring, no descriptors will (or can) appear on the RX ring. The user application consumes struct xdp_desc descriptors from this @@ -199,8 +204,24 @@ be relaxed in the future. The user application produces struct xdp_desc descriptors to this ring. +Libbpf +====== + +Libbpf is a helper library for eBPF and XDP that makes using these +technologies a lot simpler. It also contains specific helper functions +in tools/lib/bpf/xsk.h for facilitating the use of AF_XDP. It +contains two types of functions: those that can be used to make the +setup of AF_XDP socket easier and ones that can be used in the data +plane to access the rings safely and quickly. To see an example on how +to use this API, please take a look at the sample application in +samples/bpf/xdpsock_usr.c which uses libbpf for both setup and data +plane operations. + +We recommend that you use this library unless you have become a power +user. It will make your program a lot simpler. + XSKMAP / BPF_MAP_TYPE_XSKMAP ----------------------------- +============================ On XDP side there is a BPF map type BPF_MAP_TYPE_XSKMAP (XSKMAP) that is used in conjunction with bpf_redirect_map() to pass the ingress @@ -216,21 +237,184 @@ queue 17. Only the XDP program executing for eth0 and queue 17 will successfully pass data to the socket. Please refer to the sample application (samples/bpf/) in for an example. +Configuration Flags and Socket Options +====================================== + +These are the various configuration flags that can be used to control +and monitor the behavior of AF_XDP sockets. + +XDP_COPY and XDP_ZERO_COPY bind flags +------------------------------------- + +When you bind to a socket, the kernel will first try to use zero-copy +copy. If zero-copy is not supported, it will fall back on using copy +mode, i.e. copying all packets out to user space. But if you would +like to force a certain mode, you can use the following flags. If you +pass the XDP_COPY flag to the bind call, the kernel will force the +socket into copy mode. If it cannot use copy mode, the bind call will +fail with an error. Conversely, the XDP_ZERO_COPY flag will force the +socket into zero-copy mode or fail. + +XDP_SHARED_UMEM bind flag +------------------------- + +This flag enables you to bind multiple sockets to the same UMEM, but +only if they share the same queue id. In this mode, each socket has +their own RX and TX rings, but the UMEM (tied to the fist socket +created) only has a single FILL ring and a single COMPLETION +ring. To use this mode, create the first socket and bind it in the normal +way. Create a second socket and create an RX and a TX ring, or at +least one of them, but no FILL or COMPLETION rings as the ones from +the first socket will be used. In the bind call, set he +XDP_SHARED_UMEM option and provide the initial socket's fd in the +sxdp_shared_umem_fd field. You can attach an arbitrary number of extra +sockets this way. + +What socket will then a packet arrive on? This is decided by the XDP +program. Put all the sockets in the XSK_MAP and just indicate which +index in the array you would like to send each packet to. A simple +round-robin example of distributing packets is shown below: + +.. code-block:: c + + #include + #include "bpf_helpers.h" + + #define MAX_SOCKS 16 + + struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, MAX_SOCKS); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + } xsks_map SEC(".maps"); + + static unsigned int rr; + + SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + { + rr = (rr + 1) & (MAX_SOCKS - 1); + + return bpf_redirect_map(&xsks_map, rr, 0); + } + +Note, that since there is only a single set of FILL and COMPLETION +rings, and they are single producer, single consumer rings, you need +to make sure that multiple processes or threads do not use these rings +concurrently. There are no synchronization primitives in the +libbpf code that protects multiple users at this point in time. + +XDP_USE_NEED_WAKEUP bind flag +----------------------------- + +This option adds support for a new flag called need_wakeup that is +present in the FILL ring and the TX ring, the rings for which user +space is a producer. When this option is set in the bind call, the +need_wakeup flag will be set if the kernel needs to be explicitly +woken up by a syscall to continue processing packets. If the flag is +zero, no syscall is needed. + +If the flag is set on the FILL ring, the application needs to call +poll() to be able to continue to receive packets on the RX ring. This +can happen, for example, when the kernel has detected that there are no +more buffers on the FILL ring and no buffers left on the RX HW ring of +the NIC. In this case, interrupts are turned off as the NIC cannot +receive any packets (as there are no buffers to put them in), and the +need_wakeup flag is set so that user space can put buffers on the +FILL ring and then call poll() so that the kernel driver can put these +buffers on the HW ring and start to receive packets. + +If the flag is set for the TX ring, it means that the application +needs to explicitly notify the kernel to send any packets put on the +TX ring. This can be accomplished either by a poll() call, as in the +RX path, or by calling sendto(). + +An example of how to use this flag can be found in +samples/bpf/xdpsock_user.c. An example with the use of libbpf helpers +would look like this for the TX path: + +.. code-block:: c + + if (xsk_ring_prod__needs_wakeup(&my_tx_ring)) + sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0); + +I.e., only use the syscall if the flag is set. + +We recommend that you always enable this mode as it usually leads to +better performance especially if you run the application and the +driver on the same core, but also if you use different cores for the +application and the kernel driver, as it reduces the number of +syscalls needed for the TX path. + +XDP_{RX|TX|UMEM_FILL|UMEM_COMPLETION}_RING setsockopts +------------------------------------------------------ + +These setsockopts sets the number of descriptors that the RX, TX, +FILL, and COMPLETION rings respectively should have. It is mandatory +to set the size of at least one of the RX and TX rings. If you set +both, you will be able to both receive and send traffic from your +application, but if you only want to do one of them, you can save +resources by only setting up one of them. Both the FILL ring and the +COMPLETION ring are mandatory if you have a UMEM tied to your socket, +which is the normal case. But if the XDP_SHARED_UMEM flag is used, any +socket after the first one does not have a UMEM and should in that +case not have any FILL or COMPLETION rings created. + +XDP_UMEM_REG setsockopt +----------------------- + +This setsockopt registers a UMEM to a socket. This is the area that +contain all the buffers that packet can recide in. The call takes a +pointer to the beginning of this area and the size of it. Moreover, it +also has parameter called chunk_size that is the size that the UMEM is +divided into. It can only be 2K or 4K at the moment. If you have an +UMEM area that is 128K and a chunk size of 2K, this means that you +will be able to hold a maximum of 128K / 2K = 64 packets in your UMEM +area and that your largest packet size can be 2K. + +There is also an option to set the headroom of each single buffer in +the UMEM. If you set this to N bytes, it means that the packet will +start N bytes into the buffer leaving the first N bytes for the +application to use. The final option is the flags field, but it will +be dealt with in separate sections for each UMEM flag. + +XDP_STATISTICS getsockopt +------------------------- + +Gets drop statistics of a socket that can be useful for debug +purposes. The supported statistics are shown below: + +.. code-block:: c + + struct xdp_statistics { + __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ + __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ + __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ + }; + +XDP_OPTIONS getsockopt +---------------------- + +Gets options from an XDP socket. The only one supported so far is +XDP_OPTIONS_ZEROCOPY which tells you if zero-copy is on or not. + Usage ===== -In order to use AF_XDP sockets there are two parts needed. The +In order to use AF_XDP sockets two parts are needed. The user-space application and the XDP program. For a complete setup and usage example, please refer to the sample application. The user-space side is xdpsock_user.c and the XDP side is part of libbpf. -The XDP code sample included in tools/lib/bpf/xsk.c is the following:: +The XDP code sample included in tools/lib/bpf/xsk.c is the following: + +.. code-block:: c SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) { int index = ctx->rx_queue_index; - // A set entry here means that the correspnding queue_id + // A set entry here means that the corresponding queue_id // has an active AF_XDP socket bound to it. if (bpf_map_lookup_elem(&xsks_map, &index)) return bpf_redirect_map(&xsks_map, index, 0); @@ -238,7 +422,10 @@ The XDP code sample included in tools/lib/bpf/xsk.c is the following:: return XDP_PASS; } -Naive ring dequeue and enqueue could look like this:: +A simple but not so performance ring dequeue and enqueue could look +like this: + +.. code-block:: c // struct xdp_rxtx_ring { // __u32 *producer; @@ -287,17 +474,16 @@ Naive ring dequeue and enqueue could look like this:: return 0; } - -For a more optimized version, please refer to the sample application. +But please use the libbpf functions as they are optimized and ready to +use. Will make your life easier. Sample application ================== There is a xdpsock benchmarking/test application included that -demonstrates how to use AF_XDP sockets with both private and shared -UMEMs. Say that you would like your UDP traffic from port 4242 to end -up in queue 16, that we will enable AF_XDP on. Here, we use ethtool -for this:: +demonstrates how to use AF_XDP sockets with private UMEMs. Say that +you would like your UDP traffic from port 4242 to end up in queue 16, +that we will enable AF_XDP on. Here, we use ethtool for this:: ethtool -N p3p2 rx-flow-hash udp4 fn ethtool -N p3p2 flow-type udp4 src-port 4242 dst-port 4242 \ @@ -311,13 +497,18 @@ using:: For XDP_SKB mode, use the switch "-S" instead of "-N" and all options can be displayed with "-h", as usual. +This sample application uses libbpf to make the setup and usage of +AF_XDP simpler. If you want to know how the raw uapi of AF_XDP is +really used to make something more advanced, take a look at the libbpf +code in tools/lib/bpf/xsk.[ch]. + FAQ ======= Q: I am not seeing any traffic on the socket. What am I doing wrong? A: When a netdev of a physical NIC is initialized, Linux usually - allocates one Rx and Tx queue pair per core. So on a 8 core system, + allocates one RX and TX queue pair per core. So on a 8 core system, queue ids 0 to 7 will be allocated, one per core. In the AF_XDP bind call or the xsk_socket__create libbpf function call, you specify a specific queue id to bind to and it is only the traffic @@ -343,9 +534,21 @@ A: When a netdev of a physical NIC is initialized, Linux usually sudo ethtool -N flow-type udp4 src-port 4242 dst-port \ 4242 action 2 - A number of other ways are possible all up to the capabilitites of + A number of other ways are possible all up to the capabilities of the NIC you have. +Q: Can I use the XSKMAP to implement a switch betwen different umems + in copy mode? + +A: The short answer is no, that is not supported at the moment. The + XSKMAP can only be used to switch traffic coming in on queue id X + to sockets bound to the same queue id X. The XSKMAP can contain + sockets bound to different queue ids, for example X and Y, but only + traffic goming in from queue id Y can be directed to sockets bound + to the same queue id Y. In zero-copy mode, you should use the + switch, or other distribution mechanism, in your NIC to direct + traffic to the correct queue id and socket. + Credits ======= From 58eeb2289ab9bd8acad41a589431bbdbf7622595 Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 23 Oct 2019 17:40:38 +0200 Subject: [PATCH 49/52] libbpf: Fix strncat bounds error in libbpf_prog_type_by_name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On compiling samples with this change, one gets an error: error: ‘strncat’ specified bound 118 equals destination size [-Werror=stringop-truncation] strncat(dst, name + section_names[i].len, ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ sizeof(raw_tp_btf_name) - (dst - raw_tp_btf_name)); ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ strncat requires the destination to have enough space for the terminating null byte. Fixes: f75a697e09137 ("libbpf: Auto-detect btf_id of BTF-based raw_tracepoint") Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191023154038.24075-1-kpsingh@chromium.org --- tools/lib/bpf/libbpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 8b4d765c422b..d71631a01926 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -4690,7 +4690,7 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, } /* prepend "btf_trace_" prefix per kernel convention */ strncat(dst, name + section_names[i].len, - sizeof(raw_tp_btf_name) - (dst - raw_tp_btf_name)); + sizeof(raw_tp_btf_name) - sizeof("btf_trace_")); ret = btf__find_by_name(btf, raw_tp_btf_name); btf__free(btf); if (ret <= 0) { From 3820729160440158a014add69cc0d371061a96b2 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 24 Oct 2019 17:18:11 -0700 Subject: [PATCH 50/52] bpf: Prepare btf_ctx_access for non raw_tp use case This patch makes a few changes to btf_ctx_access() to prepare it for non raw_tp use case where the attach_btf_id is not necessary a BTF_KIND_TYPEDEF. It moves the "btf_trace_" prefix check and typedef-follow logic to a new function "check_attach_btf_id()" which is called only once during bpf_check(). btf_ctx_access() only operates on a BTF_KIND_FUNC_PROTO type now. That should also be more efficient since it is done only one instead of every-time check_ctx_access() is called. "check_attach_btf_id()" needs to find the func_proto type from the attach_btf_id. It needs to store the result into the newly added prog->aux->attach_func_proto. func_proto btf type has no name, so a proper name should be stored into "attach_func_name" also. v2: - Move the "btf_trace_" check to an earlier verifier phase (Alexei) Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20191025001811.1718491-1-kafai@fb.com --- include/linux/bpf.h | 5 +++ include/linux/btf.h | 31 +++++++++++++++++ kernel/bpf/btf.c | 73 +++++++--------------------------------- kernel/bpf/syscall.c | 4 +-- kernel/bpf/verifier.c | 52 +++++++++++++++++++++++++++- kernel/trace/bpf_trace.c | 2 ++ 6 files changed, 103 insertions(+), 64 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2c2c29b49845..171be30fe0ae 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -392,6 +392,11 @@ struct bpf_prog_aux { u32 attach_btf_id; /* in-kernel BTF type id to attach to */ bool verifier_zext; /* Zero extensions has been inserted by verifier. */ bool offload_requested; + bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ + /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ + const struct btf_type *attach_func_proto; + /* function name for valid attach_btf_id */ + const char *attach_func_name; struct bpf_prog **func; void *jit_data; /* JIT specific data. arch dependent */ struct latch_tree_node ksym_tnode; diff --git a/include/linux/btf.h b/include/linux/btf.h index 55d43bc856be..9dee00859c5f 100644 --- a/include/linux/btf.h +++ b/include/linux/btf.h @@ -5,6 +5,7 @@ #define _LINUX_BTF_H 1 #include +#include struct btf; struct btf_member; @@ -53,6 +54,36 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s, int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t); bool btf_type_is_void(const struct btf_type *t); +static inline bool btf_type_is_ptr(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; +} + +static inline bool btf_type_is_int(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_INT; +} + +static inline bool btf_type_is_enum(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM; +} + +static inline bool btf_type_is_typedef(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_TYPEDEF; +} + +static inline bool btf_type_is_func(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC; +} + +static inline bool btf_type_is_func_proto(const struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; +} + #ifdef CONFIG_BPF_SYSCALL const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); const char *btf_name_by_offset(const struct btf *btf, u32 offset); diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index f7557af39756..128d89601d73 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -336,16 +336,6 @@ static bool btf_type_is_fwd(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_FWD; } -static bool btf_type_is_func(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC; -} - -static bool btf_type_is_func_proto(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_FUNC_PROTO; -} - static bool btf_type_nosize(const struct btf_type *t) { return btf_type_is_void(t) || btf_type_is_fwd(t) || @@ -377,16 +367,6 @@ static bool btf_type_is_array(const struct btf_type *t) return BTF_INFO_KIND(t->info) == BTF_KIND_ARRAY; } -static bool btf_type_is_ptr(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_PTR; -} - -static bool btf_type_is_int(const struct btf_type *t) -{ - return BTF_INFO_KIND(t->info) == BTF_KIND_INT; -} - static bool btf_type_is_var(const struct btf_type *t) { return BTF_INFO_KIND(t->info) == BTF_KIND_VAR; @@ -3442,54 +3422,27 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { + const struct btf_type *t = prog->aux->attach_func_proto; + const char *tname = prog->aux->attach_func_name; struct bpf_verifier_log *log = info->log; - u32 btf_id = prog->aux->attach_btf_id; const struct btf_param *args; - const struct btf_type *t; - const char prefix[] = "btf_trace_"; - const char *tname; u32 nr_args, arg; - if (!btf_id) - return true; - - if (IS_ERR(btf_vmlinux)) { - bpf_log(log, "btf_vmlinux is malformed\n"); - return false; - } - - t = btf_type_by_id(btf_vmlinux, btf_id); - if (!t || BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) { - bpf_log(log, "btf_id is invalid\n"); - return false; - } - - tname = __btf_name_by_offset(btf_vmlinux, t->name_off); - if (strncmp(prefix, tname, sizeof(prefix) - 1)) { - bpf_log(log, "btf_id points to wrong type name %s\n", tname); - return false; - } - tname += sizeof(prefix) - 1; - - t = btf_type_by_id(btf_vmlinux, t->type); - if (!btf_type_is_ptr(t)) - return false; - t = btf_type_by_id(btf_vmlinux, t->type); - if (!btf_type_is_func_proto(t)) - return false; - if (off % 8) { - bpf_log(log, "raw_tp '%s' offset %d is not multiple of 8\n", + bpf_log(log, "func '%s' offset %d is not multiple of 8\n", tname, off); return false; } arg = off / 8; args = (const struct btf_param *)(t + 1); - /* skip first 'void *__data' argument in btf_trace_##name typedef */ - args++; - nr_args = btf_type_vlen(t) - 1; + nr_args = btf_type_vlen(t); + if (prog->aux->attach_btf_trace) { + /* skip first 'void *__data' argument in btf_trace_##name typedef */ + args++; + nr_args--; + } if (arg >= nr_args) { - bpf_log(log, "raw_tp '%s' doesn't have %d-th argument\n", + bpf_log(log, "func '%s' doesn't have %d-th argument\n", tname, arg); return false; } @@ -3503,7 +3456,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, return true; if (!btf_type_is_ptr(t)) { bpf_log(log, - "raw_tp '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", + "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", tname, arg, __btf_name_by_offset(btf_vmlinux, t->name_off), btf_kind_str[BTF_INFO_KIND(t->info)]); @@ -3526,11 +3479,11 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, t = btf_type_by_id(btf_vmlinux, t->type); if (!btf_type_is_struct(t)) { bpf_log(log, - "raw_tp '%s' arg%d type %s is not a struct\n", + "func '%s' arg%d type %s is not a struct\n", tname, arg, btf_kind_str[BTF_INFO_KIND(t->info)]); return false; } - bpf_log(log, "raw_tp '%s' arg%d has btf_id %d type %s '%s'\n", + bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], __btf_name_by_offset(btf_vmlinux, t->name_off)); return true; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 16ea3c0db4f6..ff5225759553 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1848,9 +1848,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) goto out_put_prog; } /* raw_tp name is taken from type name instead */ - tp_name = kernel_type_name(prog->aux->attach_btf_id); - /* skip the prefix */ - tp_name += sizeof("btf_trace_") - 1; + tp_name = prog->aux->attach_func_name; } else { if (strncpy_from_user(buf, u64_to_user_ptr(attr->raw_tracepoint.name), diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 556e82f8869b..c59778c0fc4d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9372,6 +9372,52 @@ static void print_verification_stats(struct bpf_verifier_env *env) env->peak_states, env->longest_mark_read_walk); } +static int check_attach_btf_id(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog = env->prog; + u32 btf_id = prog->aux->attach_btf_id; + const struct btf_type *t; + const char *tname; + + if (prog->type == BPF_PROG_TYPE_RAW_TRACEPOINT && btf_id) { + const char prefix[] = "btf_trace_"; + + t = btf_type_by_id(btf_vmlinux, btf_id); + if (!t) { + verbose(env, "attach_btf_id %u is invalid\n", btf_id); + return -EINVAL; + } + if (!btf_type_is_typedef(t)) { + verbose(env, "attach_btf_id %u is not a typedef\n", + btf_id); + return -EINVAL; + } + tname = btf_name_by_offset(btf_vmlinux, t->name_off); + if (!tname || strncmp(prefix, tname, sizeof(prefix) - 1)) { + verbose(env, "attach_btf_id %u points to wrong type name %s\n", + btf_id, tname); + return -EINVAL; + } + tname += sizeof(prefix) - 1; + t = btf_type_by_id(btf_vmlinux, t->type); + if (!btf_type_is_ptr(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + t = btf_type_by_id(btf_vmlinux, t->type); + if (!btf_type_is_func_proto(t)) + /* should never happen in valid vmlinux build */ + return -EINVAL; + + /* remember two read only pointers that are valid for + * the life time of the kernel + */ + prog->aux->attach_func_name = tname; + prog->aux->attach_func_proto = t; + prog->aux->attach_btf_trace = true; + } + return 0; +} + int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr) { @@ -9435,9 +9481,13 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, /* Either gcc or pahole or kernel are broken. */ verbose(env, "in-kernel BTF is malformed\n"); ret = PTR_ERR(btf_vmlinux); - goto err_unlock; + goto skip_full_check; } + ret = check_attach_btf_id(env); + if (ret) + goto skip_full_check; + env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT); if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) env->strict_alignment = true; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index c3240898cc44..571c25d60710 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1080,6 +1080,8 @@ static bool raw_tp_prog_is_valid_access(int off, int size, return false; if (off % size != 0) return false; + if (!prog->aux->attach_btf_id) + return true; return btf_ctx_access(off, size, type, prog, info); } From a94364603610f341564351a0e130c4bbcaeddfa0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 24 Oct 2019 15:30:25 +0200 Subject: [PATCH 51/52] bpftool: Allow to read btf as raw data The bpftool interface stays the same, but now it's possible to run it over BTF raw data, like: $ bpftool btf dump file /sys/kernel/btf/vmlinux [1] INT '(anon)' size=4 bits_offset=0 nr_bits=32 encoding=(none) [2] INT 'long unsigned int' size=8 bits_offset=0 nr_bits=64 encoding=(none) [3] CONST '(anon)' type_id=2 Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Andrii Nakryiko Acked-by: Jakub Kicinski Link: https://lore.kernel.org/bpf/20191024133025.10691-1-jolsa@kernel.org --- tools/bpf/bpftool/btf.c | 57 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 9a9376d1d3df..a7b8bf233cf5 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -12,6 +12,9 @@ #include #include #include +#include +#include +#include #include "btf.h" #include "json_writer.h" @@ -388,6 +391,54 @@ done: return err; } +static struct btf *btf__parse_raw(const char *file) +{ + struct btf *btf; + struct stat st; + __u8 *buf; + FILE *f; + + if (stat(file, &st)) + return NULL; + + f = fopen(file, "rb"); + if (!f) + return NULL; + + buf = malloc(st.st_size); + if (!buf) { + btf = ERR_PTR(-ENOMEM); + goto exit_close; + } + + if ((size_t) st.st_size != fread(buf, 1, st.st_size, f)) { + btf = ERR_PTR(-EINVAL); + goto exit_free; + } + + btf = btf__new(buf, st.st_size); + +exit_free: + free(buf); +exit_close: + fclose(f); + return btf; +} + +static bool is_btf_raw(const char *file) +{ + __u16 magic = 0; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + return false; + + read(fd, &magic, sizeof(magic)); + close(fd); + return magic == BTF_MAGIC; +} + static int do_dump(int argc, char **argv) { struct btf *btf = NULL; @@ -465,7 +516,11 @@ static int do_dump(int argc, char **argv) } NEXT_ARG(); } else if (is_prefix(src, "file")) { - btf = btf__parse_elf(*argv, NULL); + if (is_btf_raw(*argv)) + btf = btf__parse_raw(*argv); + else + btf = btf__parse_elf(*argv, NULL); + if (IS_ERR(btf)) { err = PTR_ERR(btf); btf = NULL; From 027cbaaf61983351622c29f5a2adc7340340cb7f Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Thu, 24 Oct 2019 21:55:03 -0700 Subject: [PATCH 52/52] selftests/bpf: Fix .gitignore to ignore no_alu32/ When switching to alu32 by default, no_alu32/ subdirectory wasn't added to .gitignore. Fix it. Fixes: e13a2fe642bd ("tools/bpf: Turn on llvm alu32 attribute by default") Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20191025045503.3043427-1-andriin@fb.com --- tools/testing/selftests/bpf/.gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 6f46170e09c1..4865116b96c7 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -37,5 +37,5 @@ libbpf.so.* test_hashmap test_btf_dump xdping -/alu32 +/no_alu32 /bpf_gcc