From 533420a4151e28e6a38a830cb876afddb5587d7d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 12 Jan 2020 23:31:38 -0800 Subject: [PATCH 1/6] tools: Sync uapi/linux/if_link.h Sync uapi/linux/if_link.h into tools to avoid out of sync warnings during libbpf build. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200113073143.1779940-2-andriin@fb.com --- tools/include/uapi/linux/if_link.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index 8aec8769d944..1d69f637c5d6 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -169,6 +169,7 @@ enum { IFLA_MAX_MTU, IFLA_PROP_LIST, IFLA_ALT_IFNAME, /* Alternative ifname */ + IFLA_PERM_ADDRESS, __IFLA_MAX }; From 292e1d73b125d7a3fd7ff382557e003ece3c0d65 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 12 Jan 2020 23:31:39 -0800 Subject: [PATCH 2/6] libbpf: Clean up bpf_helper_defs.h generation output bpf_helpers_doc.py script, used to generate bpf_helper_defs.h, unconditionally emits one informational message to stderr. Remove it and preserve stderr to contain only relevant errors. Also make sure script invocations command is muted by default in libbpf's Makefile. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200113073143.1779940-3-andriin@fb.com --- scripts/bpf_helpers_doc.py | 2 -- tools/lib/bpf/Makefile | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 7548569e8076..90baf7d70911 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -158,8 +158,6 @@ class HeaderParser(object): break self.reader.close() - print('Parsed description of %d helper function(s)' % len(self.helpers), - file=sys.stderr) ############################################################################### diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index db2afccde757..aee7f1a83c77 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -183,7 +183,7 @@ $(BPF_IN_STATIC): force elfdep zdep bpfdep $(BPF_HELPER_DEFS) $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h - $(Q)$(srctree)/scripts/bpf_helpers_doc.py --header \ + $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \ --file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS) $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) From 2cc51d34d93c9cf76128da6535a2a0a38c9faddb Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 12 Jan 2020 23:31:40 -0800 Subject: [PATCH 3/6] selftests/bpf: Conform selftests/bpf Makefile output to libbpf and bpftool Bring selftest/bpf's Makefile output to the same format used by libbpf and bpftool: 2 spaces of padding on the left + 8-character left-aligned build step identifier. Also, hide feature detection output by default. Can be enabled back by setting V=1. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200113073143.1779940-4-andriin@fb.com --- tools/testing/selftests/bpf/Makefile | 47 +++++++++++++++------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index c28e67548f45..bf9f7e415e95 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -87,14 +87,15 @@ Q = msg = else Q = @ -msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4)) +msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))"; MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 endif # override lib.mk's default rules OVERRIDE_TARGETS := 1 override define CLEAN - $(call msg, CLEAN) + $(call msg,CLEAN) $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ clean endef @@ -112,15 +113,15 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; $(OUTPUT)/%:%.c - $(call msg, BINARY,,$@) + $(call msg,BINARY,,$@) $(LINK.c) $^ $(LDLIBS) -o $@ $(OUTPUT)/urandom_read: urandom_read.c - $(call msg, BINARY,,$@) + $(call msg,BINARY,,$@) $(CC) -o $@ $< -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c - $(call msg, CC,,$@) + $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< BPFOBJ := $(OUTPUT)/libbpf.a @@ -146,18 +147,20 @@ $(OUTPUT)/test_sysctl: cgroup_helpers.c # force a rebuild of BPFOBJ when its dependencies are updated force: -DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool +DEFAULT_BPFTOOL := $(OUTPUT)/tools/sbin/bpftool BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): force - $(Q)$(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install + $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ + prefix= DESTDIR=$(OUTPUT)/tools/ install $(BPFOBJ): force - $(Q)$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ BPF_HELPERS := $(OUTPUT)/bpf_helper_defs.h $(wildcard $(BPFDIR)/bpf_*.h) $(OUTPUT)/bpf_helper_defs.h: $(BPFOBJ) - $(Q)$(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ $(OUTPUT)/bpf_helper_defs.h + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ + OUTPUT=$(OUTPUT)/ $(OUTPUT)/bpf_helper_defs.h # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, @@ -194,28 +197,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $3 - CFLAGS # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE - $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE - $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE - $(call msg, CLANG-BPF,$(TRUNNER_BINARY),$2) + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE - $(call msg, GCC-BPF,$(TRUNNER_BINARY),$2) + $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2) $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef @@ -256,7 +259,7 @@ define DEFINE_TEST_RUNNER_RULES ifeq ($($(TRUNNER_OUTPUT)-dir),) $(TRUNNER_OUTPUT)-dir := y $(TRUNNER_OUTPUT): - $$(call msg, MKDIR,,$$@) + $$(call msg,MKDIR,,$$@) mkdir -p $$@ endif @@ -275,7 +278,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ $(TRUNNER_OUTPUT)/%.o \ | $(BPFTOOL) $(TRUNNER_OUTPUT) - $$(call msg, GEN-SKEL,$(TRUNNER_BINARY),$$@) + $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@) $$(BPFTOOL) gen skeleton $$< > $$@ endif @@ -283,7 +286,7 @@ endif ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) $(TRUNNER_TESTS_DIR)-tests-hdr := y $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c - $$(call msg, TEST-HDR,$(TRUNNER_BINARY),$$@) + $$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@) $$(shell ( cd $(TRUNNER_TESTS_DIR); \ echo '/* Generated header, do not edit */'; \ ls *.c 2> /dev/null | \ @@ -299,7 +302,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_BPF_OBJS) \ $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) - $$(call msg, TEST-OBJ,$(TRUNNER_BINARY),$$@) + $$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@) cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ @@ -307,20 +310,20 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) - $$(call msg, EXTRA-OBJ,$(TRUNNER_BINARY),$$@) + $$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@) $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ # only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) - $$(call msg, EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) + $$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ | $(TRUNNER_BINARY)-extras - $$(call msg, BINARY,,$$@) + $$(call msg,BINARY,,$$@) $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ endef @@ -372,12 +375,12 @@ verifier/tests.h: verifier/*.c echo '#endif' \ ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) - $(call msg, BINARY,,$@) + $(call msg,BINARY,,$@) $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) - $(call msg, CXX,,$@) + $(call msg,CXX,,$@) $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ From 1cf5b23988ea0086a252a5c8b005b075f1e9b030 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 12 Jan 2020 23:31:41 -0800 Subject: [PATCH 4/6] bpftool: Apply preserve_access_index attribute to all types in BTF dump This patch makes structs and unions, emitted through BTF dump, automatically CO-RE-relocatable (unless disabled with `#define BPF_NO_PRESERVE_ACCESS_INDEX`, specified before including generated header file). This effectivaly turns usual bpf_probe_read() call into equivalent of bpf_core_read(), by automatically applying builtin_preserve_access_index to any field accesses of types in generated C types header. This is especially useful for tp_btf/fentry/fexit BPF program types. They allow direct memory access, so BPF C code just uses straightfoward a->b->c access pattern to read data from kernel. But without kernel structs marked as CO-RE relocatable through preserve_access_index attribute, one has to enclose all the data reads into a special __builtin_preserve_access_index code block, like so: __builtin_preserve_access_index(({ x = p->pid; /* where p is struct task_struct *, for example */ })); This is very inconvenient and obscures the logic quite a bit. By marking all auto-generated types with preserve_access_index attribute the above code is reduced to just a clean and natural `x = p->pid;`. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200113073143.1779940-5-andriin@fb.com --- tools/bpf/bpftool/btf.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index e5bc97b71ceb..60c75be0666d 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -370,6 +370,10 @@ static int dump_btf_c(const struct btf *btf, if (IS_ERR(d)) return PTR_ERR(d); + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); + printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n"); + printf("#endif\n\n"); + if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { err = btf_dump__dump_type(d, root_type_ids[i]); @@ -386,6 +390,10 @@ static int dump_btf_c(const struct btf *btf, } } + printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n"); + printf("#pragma clang attribute pop\n"); + printf("#endif\n"); + done: btf_dump__free(d); return err; From 9c01546d26d28cf57341d8380ba900bf68e26b18 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 12 Jan 2020 23:31:42 -0800 Subject: [PATCH 5/6] tools/bpf: Add runqslower tool to tools/bpf Convert one of BCC tools (runqslower [0]) to BPF CO-RE + libbpf. It matches its BCC-based counterpart 1-to-1, supporting all the same parameters and functionality. runqslower tool utilizes BPF skeleton, auto-generated from BPF object file, as well as memory-mapped interface to global (read-only, in this case) data. Its Makefile also ensures auto-generation of "relocatable" vmlinux.h, which is necessary for BTF-typed raw tracepoints with direct memory access. [0] https://github.com/iovisor/bcc/blob/11bf5d02c895df9646c117c713082eb192825293/tools/runqslower.py Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200113073143.1779940-6-andriin@fb.com --- tools/bpf/Makefile | 20 ++- tools/bpf/runqslower/.gitignore | 1 + tools/bpf/runqslower/Makefile | 80 +++++++++++ tools/bpf/runqslower/runqslower.bpf.c | 100 ++++++++++++++ tools/bpf/runqslower/runqslower.c | 187 ++++++++++++++++++++++++++ tools/bpf/runqslower/runqslower.h | 13 ++ 6 files changed, 396 insertions(+), 5 deletions(-) create mode 100644 tools/bpf/runqslower/.gitignore create mode 100644 tools/bpf/runqslower/Makefile create mode 100644 tools/bpf/runqslower/runqslower.bpf.c create mode 100644 tools/bpf/runqslower/runqslower.c create mode 100644 tools/bpf/runqslower/runqslower.h diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile index 5535650800ab..f897eeeb0b4f 100644 --- a/tools/bpf/Makefile +++ b/tools/bpf/Makefile @@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args FEATURE_DISPLAY = libbfd disassembler-four-args check_feat := 1 -NON_CHECK_FEAT_TARGETS := clean bpftool_clean +NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean ifdef MAKECMDGOALS ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),) check_feat := 0 @@ -73,7 +73,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm -all: $(PROGS) bpftool +all: $(PROGS) bpftool runqslower $(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm' $(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o @@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c $(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c -clean: bpftool_clean +clean: bpftool_clean runqslower_clean $(call QUIET_CLEAN, bpf-progs) $(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \ $(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.* @@ -97,7 +97,7 @@ clean: bpftool_clean $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpf $(Q)$(RM) -r -- $(OUTPUT)feature -install: $(PROGS) bpftool_install +install: $(PROGS) bpftool_install runqslower_install $(call QUIET_INSTALL, bpf_jit_disasm) $(Q)$(INSTALL) -m 0755 -d $(DESTDIR)$(prefix)/bin $(Q)$(INSTALL) $(OUTPUT)bpf_jit_disasm $(DESTDIR)$(prefix)/bin/bpf_jit_disasm @@ -115,4 +115,14 @@ bpftool_install: bpftool_clean: $(call descend,bpftool,clean) -.PHONY: all install clean bpftool bpftool_install bpftool_clean +runqslower: + $(call descend,runqslower) + +runqslower_install: + $(call descend,runqslower,install) + +runqslower_clean: + $(call descend,runqslower,clean) + +.PHONY: all install clean bpftool bpftool_install bpftool_clean \ + runqslower runqslower_install runqslower_clean diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore new file mode 100644 index 000000000000..90a456a2a72f --- /dev/null +++ b/tools/bpf/runqslower/.gitignore @@ -0,0 +1 @@ +/.output diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile new file mode 100644 index 000000000000..f1363ae8e473 --- /dev/null +++ b/tools/bpf/runqslower/Makefile @@ -0,0 +1,80 @@ +# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +OUTPUT := .output +CLANG := clang +LLC := llc +LLVM_STRIP := llvm-strip +DEFAULT_BPFTOOL := $(OUTPUT)/sbin/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) +LIBBPF_SRC := $(abspath ../../lib/bpf) +CFLAGS := -g -Wall + +# Try to detect best kernel BTF source +KERNEL_REL := $(shell uname -r) +ifneq ("$(wildcard /sys/kenerl/btf/vmlinux)","") +VMLINUX_BTF := /sys/kernel/btf/vmlinux +else ifneq ("$(wildcard /boot/vmlinux-$(KERNEL_REL))","") +VMLINUX_BTF := /boot/vmlinux-$(KERNEL_REL) +else +$(error "Can't detect kernel BTF, use VMLINUX_BTF to specify it explicitly") +endif + +abs_out := $(abspath $(OUTPUT)) +ifeq ($(V),1) +Q = +msg = +else +Q = @ +msg = @printf ' %-8s %s%s\n' "$(1)" "$(notdir $(2))" "$(if $(3), $(3))"; +MAKEFLAGS += --no-print-directory +submake_extras := feature_display=0 +endif + +.DELETE_ON_ERROR: + +.PHONY: all clean runqslower +all: runqslower + +runqslower: $(OUTPUT)/runqslower + +clean: + $(call msg,CLEAN) + $(Q)rm -rf $(OUTPUT) runqslower + +$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(OUTPUT)/libbpf.a + $(call msg,BINARY,$@) + $(Q)$(CC) $(CFLAGS) -lelf -lz $^ -o $@ + +$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \ + $(OUTPUT)/runqslower.bpf.o + +$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h + +$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL) + $(call msg,GEN-SKEL,$@) + $(Q)$(BPFTOOL) gen skeleton $< > $@ + +$(OUTPUT)/%.bpf.o: %.bpf.c $(OUTPUT)/libbpf.a | $(OUTPUT) + $(call msg,BPF,$@) + $(Q)$(CLANG) -g -O2 -target bpf -I$(OUTPUT) -I$(LIBBPF_SRC) \ + -c $(filter %.c,$^) -o $@ && \ + $(LLVM_STRIP) -g $@ + +$(OUTPUT)/%.o: %.c | $(OUTPUT) + $(call msg,CC,$@) + $(Q)$(CC) $(CFLAGS) -I$(LIBBPF_SRC) -I$(OUTPUT) -c $(filter %.c,$^) -o $@ + +$(OUTPUT): + $(call msg,MKDIR,$@) + $(Q)mkdir -p $(OUTPUT) + +$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF) | $(OUTPUT) $(BPFTOOL) + $(call msg,GEN,$@) + $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ + +$(OUTPUT)/libbpf.a: | $(OUTPUT) + $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) \ + OUTPUT=$(abs_out)/ $(abs_out)/libbpf.a + +$(DEFAULT_BPFTOOL): + $(Q)$(MAKE) $(submake_extras) -C ../bpftool \ + prefix= OUTPUT=$(abs_out)/ DESTDIR=$(abs_out) install diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c new file mode 100644 index 000000000000..623cce4d37f5 --- /dev/null +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include "vmlinux.h" +#include +#include "runqslower.h" + +#define TASK_RUNNING 0 + +#define BPF_F_INDEX_MASK 0xffffffffULL +#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK + +const volatile __u64 min_us = 0; +const volatile pid_t targ_pid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 10240); + __type(key, u32); + __type(value, u64); +} start SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(u32)); + __uint(value_size, sizeof(u32)); +} events SEC(".maps"); + +/* record enqueue timestamp */ +__always_inline +static int trace_enqueue(u32 tgid, u32 pid) +{ + u64 ts; + + if (!pid || (targ_pid && targ_pid != pid)) + return 0; + + ts = bpf_ktime_get_ns(); + bpf_map_update_elem(&start, &pid, &ts, 0); + return 0; +} + +SEC("tp_btf/sched_wakeup") +int handle__sched_wakeup(u64 *ctx) +{ + /* TP_PROTO(struct task_struct *p) */ + struct task_struct *p = (void *)ctx[0]; + + return trace_enqueue(p->tgid, p->pid); +} + +SEC("tp_btf/sched_wakeup_new") +int handle__sched_wakeup_new(u64 *ctx) +{ + /* TP_PROTO(struct task_struct *p) */ + struct task_struct *p = (void *)ctx[0]; + + return trace_enqueue(p->tgid, p->pid); +} + +SEC("tp_btf/sched_switch") +int handle__sched_switch(u64 *ctx) +{ + /* TP_PROTO(bool preempt, struct task_struct *prev, + * struct task_struct *next) + */ + struct task_struct *prev = (struct task_struct *)ctx[1]; + struct task_struct *next = (struct task_struct *)ctx[2]; + struct event event = {}; + u64 *tsp, delta_us; + long state; + u32 pid; + + /* ivcsw: treat like an enqueue event and store timestamp */ + if (prev->state == TASK_RUNNING) + trace_enqueue(prev->tgid, prev->pid); + + pid = next->pid; + + /* fetch timestamp and calculate delta */ + tsp = bpf_map_lookup_elem(&start, &pid); + if (!tsp) + return 0; /* missed enqueue */ + + delta_us = (bpf_ktime_get_ns() - *tsp) / 1000; + if (min_us && delta_us <= min_us) + return 0; + + event.pid = pid; + event.delta_us = delta_us; + bpf_get_current_comm(&event.task, sizeof(event.task)); + + /* output */ + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, + &event, sizeof(event)); + + bpf_map_delete_elem(&start, &pid); + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c new file mode 100644 index 000000000000..996f0e2c560e --- /dev/null +++ b/tools/bpf/runqslower/runqslower.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook +#include +#include +#include +#include +#include +#include +#include +#include +#include "runqslower.h" +#include "runqslower.skel.h" + +struct env { + pid_t pid; + __u64 min_us; + bool verbose; +} env = { + .min_us = 10000, +}; + +const char *argp_program_version = "runqslower 0.1"; +const char *argp_program_bug_address = ""; +const char argp_program_doc[] = +"runqslower Trace long process scheduling delays.\n" +" For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n" +"\n" +"This script traces high scheduling delays between tasks being\n" +"ready to run and them running on CPU after that.\n" +"\n" +"USAGE: runqslower [-p PID] [min_us]\n" +"\n" +"EXAMPLES:\n" +" runqslower # trace run queue latency higher than 10000 us (default)\n" +" runqslower 1000 # trace run queue latency higher than 1000 us\n" +" runqslower -p 123 # trace pid 123 only\n"; + +static const struct argp_option opts[] = { + { "pid", 'p', "PID", 0, "Process PID to trace"}, + { "verbose", 'v', NULL, 0, "Verbose debug output" }, + {}, +}; + +static error_t parse_arg(int key, char *arg, struct argp_state *state) +{ + static int pos_args; + int pid; + long long min_us; + + switch (key) { + case 'v': + env.verbose = true; + break; + case 'p': + errno = 0; + pid = strtol(arg, NULL, 10); + if (errno || pid <= 0) { + fprintf(stderr, "Invalid PID: %s\n", arg); + argp_usage(state); + } + env.pid = pid; + break; + case ARGP_KEY_ARG: + if (pos_args++) { + fprintf(stderr, + "Unrecognized positional argument: %s\n", arg); + argp_usage(state); + } + errno = 0; + min_us = strtoll(arg, NULL, 10); + if (errno || min_us <= 0) { + fprintf(stderr, "Invalid delay (in us): %s\n", arg); + argp_usage(state); + } + env.min_us = min_us; + break; + default: + return ARGP_ERR_UNKNOWN; + } + return 0; +} + +int libbpf_print_fn(enum libbpf_print_level level, + const char *format, va_list args) +{ + if (level == LIBBPF_DEBUG && !env.verbose) + return 0; + return vfprintf(stderr, format, args); +} + +static int bump_memlock_rlimit(void) +{ + struct rlimit rlim_new = { + .rlim_cur = RLIM_INFINITY, + .rlim_max = RLIM_INFINITY, + }; + + return setrlimit(RLIMIT_MEMLOCK, &rlim_new); +} + +void handle_event(void *ctx, int cpu, void *data, __u32 data_sz) +{ + const struct event *e = data; + struct tm *tm; + char ts[32]; + time_t t; + + time(&t); + tm = localtime(&t); + strftime(ts, sizeof(ts), "%H:%M:%S", tm); + printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us); +} + +void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt) +{ + printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu); +} + +int main(int argc, char **argv) +{ + static const struct argp argp = { + .options = opts, + .parser = parse_arg, + .doc = argp_program_doc, + }; + struct perf_buffer_opts pb_opts; + struct perf_buffer *pb = NULL; + struct runqslower_bpf *obj; + int err; + + err = argp_parse(&argp, argc, argv, 0, NULL, NULL); + if (err) + return err; + + libbpf_set_print(libbpf_print_fn); + + err = bump_memlock_rlimit(); + if (err) { + fprintf(stderr, "failed to increase rlimit: %d", err); + return 1; + } + + obj = runqslower_bpf__open(); + if (!obj) { + fprintf(stderr, "failed to open and/or load BPF object\n"); + return 1; + } + + /* initialize global data (filtering options) */ + obj->rodata->targ_pid = env.pid; + obj->rodata->min_us = env.min_us; + + err = runqslower_bpf__load(obj); + if (err) { + fprintf(stderr, "failed to load BPF object: %d\n", err); + goto cleanup; + } + + err = runqslower_bpf__attach(obj); + if (err) { + fprintf(stderr, "failed to attach BPF programs\n"); + goto cleanup; + } + + printf("Tracing run queue latency higher than %llu us\n", env.min_us); + printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)"); + + pb_opts.sample_cb = handle_event; + pb_opts.lost_cb = handle_lost_events; + pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64, &pb_opts); + err = libbpf_get_error(pb); + if (err) { + pb = NULL; + fprintf(stderr, "failed to open perf buffer: %d\n", err); + goto cleanup; + } + + while ((err = perf_buffer__poll(pb, 100)) >= 0) + ; + printf("Error polling perf buffer: %d\n", err); + +cleanup: + perf_buffer__free(pb); + runqslower_bpf__destroy(obj); + + return err != 0; +} diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h new file mode 100644 index 000000000000..9db225425e5f --- /dev/null +++ b/tools/bpf/runqslower/runqslower.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __RUNQSLOWER_H +#define __RUNQSLOWER_H + +#define TASK_COMM_LEN 16 + +struct event { + char task[TASK_COMM_LEN]; + __u64 delta_us; + pid_t pid; +}; + +#endif /* __RUNQSLOWER_H */ From 3a0d3092a4edbbcd62360c44a931c8feaf4b4ff1 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 12 Jan 2020 23:31:43 -0800 Subject: [PATCH 6/6] selftests/bpf: Build runqslower from selftests Ensure runqslower tool is built as part of selftests to prevent it from bit rotting. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200113073143.1779940-7-andriin@fb.com --- tools/bpf/runqslower/Makefile | 2 +- tools/testing/selftests/bpf/Makefile | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile index f1363ae8e473..cff2fbcd29a8 100644 --- a/tools/bpf/runqslower/Makefile +++ b/tools/bpf/runqslower/Makefile @@ -10,7 +10,7 @@ CFLAGS := -g -Wall # Try to detect best kernel BTF source KERNEL_REL := $(shell uname -r) -ifneq ("$(wildcard /sys/kenerl/btf/vmlinux)","") +ifneq ("$(wildcard /sys/kernel/btf/vmlinux)","") VMLINUX_BTF := /sys/kernel/btf/vmlinux else ifneq ("$(wildcard /boot/vmlinux-$(KERNEL_REL))","") VMLINUX_BTF := /boot/vmlinux-$(KERNEL_REL) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index bf9f7e415e95..246d09ffb296 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -73,7 +73,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user xdping test_cpp + test_lirc_mode2_user xdping test_cpp runqslower TEST_CUSTOM_PROGS = urandom_read @@ -124,6 +124,11 @@ $(OUTPUT)/test_stub.o: test_stub.c $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< +.PHONY: $(OUTPUT)/runqslower +$(OUTPUT)/runqslower: force + $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ + OUTPUT=$(CURDIR)/tools/ + BPFOBJ := $(OUTPUT)/libbpf.a $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)