From b238290b965f23ac5f8aeb64cd731aef18aa796c Mon Sep 17 00:00:00 2001
From: Neil Spring <ntspring@fb.com>
Date: Mon, 30 Aug 2021 20:33:56 -0700
Subject: [PATCH 01/63] bpf: Permit ingress_ifindex in bpf_prog_test_run_xattr

bpf_prog_test_run_xattr takes a struct __sk_buff, but did not permit
that __skbuff to include an nonzero ingress_ifindex.

This patch updates to allow ingress_ifindex, convert the __sk_buff field to
sk_buff (skb_iif) and back, and tests that the value is present from on BPF
program side. The test sets an unlikely distinct value for ingress_ifindex
(11) from ifindex (1), which is in line with the rest of the synthetic field
tests.

Adding this support allows testing BPF that operates differently on
incoming and outgoing skbs by discriminating on this field.

Signed-off-by: Neil Spring <ntspring@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210831033356.1459316-1-ntspring@fb.com
---
 net/bpf/test_run.c                               | 8 +++-----
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c | 5 +++++
 tools/testing/selftests/bpf/progs/test_skb_ctx.c | 4 ++++
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2eb0e55ef54d..1153b89c9d93 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -483,11 +483,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 		return -EINVAL;
 
 	/* priority is allowed */
-
-	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
-			   offsetof(struct __sk_buff, ifindex)))
-		return -EINVAL;
-
+	/* ingress_ifindex is allowed */
 	/* ifindex is allowed */
 
 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
@@ -516,6 +512,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 
 	skb->mark = __skb->mark;
 	skb->priority = __skb->priority;
+	skb->skb_iif = __skb->ingress_ifindex;
 	skb->tstamp = __skb->tstamp;
 	memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
 
@@ -545,6 +542,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
 
 	__skb->mark = skb->mark;
 	__skb->priority = skb->priority;
+	__skb->ingress_ifindex = skb->skb_iif;
 	__skb->ifindex = skb->dev->ifindex;
 	__skb->tstamp = skb->tstamp;
 	memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index fafeddaad6a9..2bf8c687348b 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,6 +11,7 @@ void test_skb_ctx(void)
 		.cb[3] = 4,
 		.cb[4] = 5,
 		.priority = 6,
+		.ingress_ifindex = 11,
 		.ifindex = 1,
 		.tstamp = 7,
 		.wire_len = 100,
@@ -97,6 +98,10 @@ void test_skb_ctx(void)
 		   "ctx_out_ifindex",
 		   "skb->ifindex == %d, expected %d\n",
 		   skb.ifindex, 1);
+	CHECK_ATTR(skb.ingress_ifindex != 11,
+		   "ctx_out_ingress_ifindex",
+		   "skb->ingress_ifindex == %d, expected %d\n",
+		   skb.ingress_ifindex, 11);
 	CHECK_ATTR(skb.tstamp != 8,
 		   "ctx_out_tstamp",
 		   "skb->tstamp == %lld, expected %d\n",
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index b02ea589ce7e..bbd5a9c1c4df 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -25,6 +25,10 @@ int process(struct __sk_buff *skb)
 		return 1;
 	if (skb->gso_size != 10)
 		return 1;
+	if (skb->ingress_ifindex != 11)
+		return 1;
+	if (skb->ifindex != 1)
+		return 1;
 
 	return 0;
 }

From 03e601f48b2da6fb44d0f7b86957a8f6bacfb347 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Wed, 1 Sep 2021 13:48:12 +0200
Subject: [PATCH 02/63] libbpf: Don't crash on object files with no symbol
 tables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If libbpf encounters an ELF file that has been stripped of its symbol
table, it will crash in bpf_object__add_programs() when trying to
dereference the obj->efile.symbols pointer.

Fix this by erroring out of bpf_object__elf_collect() if it is not able
able to find the symbol table.

v2:
  - Move check into bpf_object__elf_collect() and add nice error message

Fixes: 6245947c1b3c ("libbpf: Allow gaps in BPF program sections to support overriden weak functions")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210901114812.204720-1-toke@redhat.com
---
 tools/lib/bpf/libbpf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 88d8825fc6f6..8f579c6666b2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2993,6 +2993,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 		}
 	}
 
+	if (!obj->efile.symbols) {
+		pr_warn("elf: couldn't find symbol table in %s, stripped object file?\n",
+			obj->path);
+		return -ENOENT;
+	}
+
 	scn = NULL;
 	while ((scn = elf_nextscn(elf, scn)) != NULL) {
 		idx++;

From 08a6f22ef6f843d0ea7252087787b5ab04610bec Mon Sep 17 00:00:00 2001
From: Matt Smith <alastorze@fb.com>
Date: Wed, 1 Sep 2021 12:44:37 -0700
Subject: [PATCH 03/63] libbpf: Change bpf_object_skeleton data field to const
 pointer

This change was necessary to enforce the implied contract
that bpf_object_skeleton->data should not be mutated.  The data
will be cast to `void *` during assignment to handle the case
where a user is compiling with older libbpf headers to avoid
a compiler warning of `const void *` data being cast to `void *`

Signed-off-by: Matt Smith <alastorze@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210901194439.3853238-2-alastorze@fb.com
---
 tools/lib/bpf/libbpf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f177d897c5f7..2f6f0e15d1e7 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -854,7 +854,7 @@ struct bpf_object_skeleton {
 	size_t sz; /* size of this struct, for forward/backward compatibility */
 
 	const char *name;
-	void *data;
+	const void *data;
 	size_t data_sz;
 
 	struct bpf_object **obj;

From a6cc6b34b93e3660149a7cb947be98a9b239ffce Mon Sep 17 00:00:00 2001
From: Matt Smith <alastorze@fb.com>
Date: Wed, 1 Sep 2021 12:44:38 -0700
Subject: [PATCH 04/63] bpftool: Provide a helper method for accessing
 skeleton's embedded ELF data

This adds a skeleton method X__elf_bytes() which returns the binary data of
the compiled and embedded BPF object file. It additionally sets the size of
the return data to the provided size_t pointer argument.

The assignment to s->data is cast to void * to ensure no warning is issued if
compiled with a previous version of libbpf where the bpf_object_skeleton field
is void * instead of const void *

Signed-off-by: Matt Smith <alastorze@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210901194439.3853238-3-alastorze@fb.com
---
 tools/bpf/bpftool/gen.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index d40d92bbf0e4..e3ec47a6a612 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -238,8 +238,8 @@ static void codegen(const char *template, ...)
 		} else if (c == '\n') {
 			break;
 		} else {
-			p_err("unrecognized character at pos %td in template '%s'",
-			      src - template - 1, template);
+			p_err("unrecognized character at pos %td in template '%s': '%c'",
+			      src - template - 1, template, c);
 			free(s);
 			exit(-1);
 		}
@@ -406,7 +406,7 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
 	}
 
 	bpf_object__for_each_map(map, obj) {
-		const char * ident;
+		const char *ident;
 
 		ident = get_map_ident(map);
 		if (!ident)
@@ -862,6 +862,8 @@ static int do_skeleton(int argc, char **argv)
 	codegen("\
 		\n\
 									    \n\
+		static inline const void *%1$s__elf_bytes(size_t *sz);	    \n\
+									    \n\
 		static inline int					    \n\
 		%1$s__create_skeleton(struct %1$s *obj)			    \n\
 		{							    \n\
@@ -943,10 +945,20 @@ static int do_skeleton(int argc, char **argv)
 	codegen("\
 		\n\
 									    \n\
-			s->data_sz = %d;				    \n\
-			s->data = (void *)\"\\				    \n\
-		",
-		file_sz);
+			s->data = (void *)%2$s__elf_bytes(&s->data_sz);	    \n\
+									    \n\
+			return 0;					    \n\
+		err:							    \n\
+			bpf_object__destroy_skeleton(s);		    \n\
+			return -ENOMEM;					    \n\
+		}							    \n\
+									    \n\
+		static inline const void *%2$s__elf_bytes(size_t *sz)	    \n\
+		{							    \n\
+			*sz = %1$d;					    \n\
+			return (const void *)\"\\			    \n\
+		"
+		, file_sz, obj_name);
 
 	/* embed contents of BPF object file */
 	print_hex(obj_data, file_sz);
@@ -954,11 +966,6 @@ static int do_skeleton(int argc, char **argv)
 	codegen("\
 		\n\
 		\";							    \n\
-									    \n\
-			return 0;					    \n\
-		err:							    \n\
-			bpf_object__destroy_skeleton(s);		    \n\
-			return -ENOMEM;					    \n\
 		}							    \n\
 									    \n\
 		#endif /* %s */						    \n\

From 980a1a4c342f353a62d64174d0a6a9466a741273 Mon Sep 17 00:00:00 2001
From: Matt Smith <alastorze@fb.com>
Date: Wed, 1 Sep 2021 12:44:39 -0700
Subject: [PATCH 05/63] selftests/bpf: Add checks for X__elf_bytes() skeleton
 helper

This patch adds two checks for the X__elf_bytes BPF skeleton helper
method. The first asserts that the pointer returned from the helper
method is valid, the second asserts that the provided size pointer is
set.

Signed-off-by: Matt Smith <alastorze@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210901194439.3853238-4-alastorze@fb.com
---
 tools/testing/selftests/bpf/prog_tests/skeleton.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index f6f130c99b8c..fe1e204a65c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -18,6 +18,8 @@ void test_skeleton(void)
 	struct test_skeleton__data *data;
 	struct test_skeleton__rodata *rodata;
 	struct test_skeleton__kconfig *kcfg;
+	const void *elf_bytes;
+	size_t elf_bytes_sz = 0;
 
 	skel = test_skeleton__open();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -91,6 +93,10 @@ void test_skeleton(void)
 	CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2",
 	      "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION);
 
+	elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
+	ASSERT_OK_PTR(elf_bytes, "elf_bytes");
+	ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
+
 cleanup:
 	test_skeleton__destroy(skel);
 }

From 006a5099fc18a43792b01d002e1e45c5541ea666 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 7 Sep 2021 15:10:23 -0700
Subject: [PATCH 06/63] libbpf: Fix build with latest gcc/binutils with LTO

After updating to binutils 2.35, the build began to fail with an
assembler error. A bug was opened on the Red Hat Bugzilla a few days
later for the same issue.

Work around the problem by using the new `symver` attribute (introduced
in GCC 10) as needed instead of assembler directives.

This addresses Red Hat ([0]) and OpenSUSE ([1]) bug reports, as well as libbpf
issue ([2]).

  [0]: https://bugzilla.redhat.com/show_bug.cgi?id=1863059
  [1]: https://bugzilla.opensuse.org/show_bug.cgi?id=1188749
  [2]: Closes: https://github.com/libbpf/libbpf/issues/338

Co-developed-by: Patrick McCarty <patrick.mccarty@intel.com>
Co-developed-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Patrick McCarty <patrick.mccarty@intel.com>
Signed-off-by: Michal Suchanek <msuchanek@suse.de>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210907221023.2660953-1-andrii@kernel.org
---
 tools/lib/bpf/libbpf_internal.h | 25 +++++++++++++++++++------
 tools/lib/bpf/xsk.c             |  4 ++--
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 533b0211f40a..4f6ff5c23695 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -90,17 +90,30 @@
 /* Symbol versioning is different between static and shared library.
  * Properly versioned symbols are needed for shared library, but
  * only the symbol of the new version is needed for static library.
+ * Starting with GNU C 10, use symver attribute instead of .symver assembler
+ * directive, which works better with GCC LTO builds.
  */
-#ifdef SHARED
-# define COMPAT_VERSION(internal_name, api_name, version) \
+#if defined(SHARED) && defined(__GNUC__) && __GNUC__ >= 10
+
+#define DEFAULT_VERSION(internal_name, api_name, version) \
+	__attribute__((symver(#api_name "@@" #version)))
+#define COMPAT_VERSION(internal_name, api_name, version) \
+	__attribute__((symver(#api_name "@" #version)))
+
+#elif defined(SHARED)
+
+#define COMPAT_VERSION(internal_name, api_name, version) \
 	asm(".symver " #internal_name "," #api_name "@" #version);
-# define DEFAULT_VERSION(internal_name, api_name, version) \
+#define DEFAULT_VERSION(internal_name, api_name, version) \
 	asm(".symver " #internal_name "," #api_name "@@" #version);
-#else
-# define COMPAT_VERSION(internal_name, api_name, version)
-# define DEFAULT_VERSION(internal_name, api_name, version) \
+
+#else /* !SHARED */
+
+#define COMPAT_VERSION(internal_name, api_name, version)
+#define DEFAULT_VERSION(internal_name, api_name, version) \
 	extern typeof(internal_name) api_name \
 	__attribute__((alias(#internal_name)));
+
 #endif
 
 extern void libbpf_print(enum libbpf_print_level level,
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e9b619aa0cdf..a2111696ba91 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -281,6 +281,7 @@ out_mmap:
 	return err;
 }
 
+DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
 int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
 			    __u64 size, struct xsk_ring_prod *fill,
 			    struct xsk_ring_cons *comp,
@@ -345,6 +346,7 @@ struct xsk_umem_config_v1 {
 	__u32 frame_headroom;
 };
 
+COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
 int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
 			    __u64 size, struct xsk_ring_prod *fill,
 			    struct xsk_ring_cons *comp,
@@ -358,8 +360,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,
 	return xsk_umem__create_v0_0_4(umem_ptr, umem_area, size, fill, comp,
 					&config);
 }
-COMPAT_VERSION(xsk_umem__create_v0_0_2, xsk_umem__create, LIBBPF_0.0.2)
-DEFAULT_VERSION(xsk_umem__create_v0_0_4, xsk_umem__create, LIBBPF_0.0.4)
 
 static enum xsk_prog get_xsk_prog(void)
 {

From 0b46b755056043dccfe078b96b256502b88f2464 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Wed, 8 Sep 2021 14:32:26 -0700
Subject: [PATCH 07/63] libbpf: Add LIBBPF_DEPRECATED_SINCE macro for
 scheduling API deprecations

Introduce a macro LIBBPF_DEPRECATED_SINCE(major, minor, message) to prepare
the deprecation of two API functions. This macro marks functions as deprecated
when libbpf's version reaches the values passed as an argument.

As part of this change libbpf_version.h header is added with recorded major
(LIBBPF_MAJOR_VERSION) and minor (LIBBPF_MINOR_VERSION) libbpf version macros.
They are now part of libbpf public API and can be relied upon by user code.
libbpf_version.h is installed system-wide along other libbpf public headers.

Due to this new build-time auto-generated header, in-kernel applications
relying on libbpf (resolve_btfids, bpftool, bpf_preload) are updated to
include libbpf's output directory as part of a list of include search paths.
Better fix would be to use libbpf's make_install target to install public API
headers, but that clean up is left out as a future improvement. The build
changes were tested by building kernel (with KBUILD_OUTPUT and O= specified
explicitly), bpftool, libbpf, selftests/bpf, and resolve_btfids builds. No
problems were detected.

Note that because of the constraints of the C preprocessor we have to write
a few lines of macro magic for each version used to prepare deprecation (0.6
for now).

Also, use LIBBPF_DEPRECATED_SINCE() to schedule deprecation of
btf__get_from_id() and btf__load(), which are replaced by
btf__load_from_kernel_by_id() and btf__load_into_kernel(), respectively,
starting from future libbpf v0.6. This is part of libbpf 1.0 effort ([0]).

  [0] Closes: https://github.com/libbpf/libbpf/issues/278

Co-developed-by: Quentin Monnet <quentin@isovalent.com>
Co-developed-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20210908213226.1871016-1-andrii@kernel.org
---
 kernel/bpf/preload/Makefile          |  7 +++++--
 tools/bpf/bpftool/Makefile           |  4 ++++
 tools/bpf/resolve_btfids/Makefile    |  6 ++++--
 tools/lib/bpf/Makefile               | 24 +++++++++++++++++-------
 tools/lib/bpf/btf.h                  |  2 ++
 tools/lib/bpf/libbpf_common.h        | 19 +++++++++++++++++++
 tools/testing/selftests/bpf/Makefile |  4 ++--
 7 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
index 1951332dd15f..ac29d4e9a384 100644
--- a/kernel/bpf/preload/Makefile
+++ b/kernel/bpf/preload/Makefile
@@ -10,12 +10,15 @@ LIBBPF_OUT = $(abspath $(obj))
 $(LIBBPF_A):
 	$(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a
 
-userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
+userccflags += -I$(LIBBPF_OUT) -I $(srctree)/tools/include/ \
+	-I $(srctree)/tools/include/uapi \
 	-I $(srctree)/tools/lib/ -Wno-unused-result
 
 userprogs := bpf_preload_umd
 
-clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/
+clean-files := $(userprogs) libbpf_version.h bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/
+
+$(obj)/iterators/iterators.o: $(LIBBPF_A)
 
 bpf_preload_umd-objs := iterators/iterators.o
 bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index d73232be1e99..06aa1616dabe 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -60,6 +60,7 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
 CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
 	-I$(if $(OUTPUT),$(OUTPUT),.) \
+	$(if $(LIBBPF_OUTPUT),-I$(LIBBPF_OUTPUT)) \
 	-I$(srctree)/kernel/bpf/ \
 	-I$(srctree)/tools/include \
 	-I$(srctree)/tools/include/uapi \
@@ -137,7 +138,10 @@ endif
 BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
 
 BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o xlated_dumper.o btf_dumper.o disasm.o)
+$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP)
+
 OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+$(OBJS): $(LIBBPF)
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index bb9fa8de7e62..edc0c329cf74 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -26,6 +26,7 @@ LIBBPF_SRC := $(srctree)/tools/lib/bpf/
 SUBCMD_SRC := $(srctree)/tools/lib/subcmd/
 
 BPFOBJ     := $(OUTPUT)/libbpf/libbpf.a
+LIBBPF_OUT := $(abspath $(dir $(BPFOBJ)))/
 SUBCMDOBJ  := $(OUTPUT)/libsubcmd/libsubcmd.a
 
 BINARY     := $(OUTPUT)/resolve_btfids
@@ -41,11 +42,12 @@ $(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd
 	$(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
 
 $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/libbpf
-	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)  OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
+	$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC)  OUTPUT=$(LIBBPF_OUT) $(abspath $@)
 
 CFLAGS := -g \
           -I$(srctree)/tools/include \
           -I$(srctree)/tools/include/uapi \
+          -I$(LIBBPF_OUT) \
           -I$(LIBBPF_SRC) \
           -I$(SUBCMD_SRC)
 
@@ -54,7 +56,7 @@ LIBS = -lelf -lz
 export srctree OUTPUT CFLAGS Q
 include $(srctree)/tools/build/Makefile.include
 
-$(BINARY_IN): fixdep FORCE | $(OUTPUT)
+$(BINARY_IN): $(BPFOBJ) fixdep FORCE | $(OUTPUT)
 	$(Q)$(MAKE) $(build)=resolve_btfids
 
 $(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN)
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 74c3b73a5fbe..dab21e0c7cc2 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -8,7 +8,8 @@ VERSION_SCRIPT := libbpf.map
 LIBBPF_VERSION := $(shell \
 	grep -oE '^LIBBPF_([0-9.]+)' $(VERSION_SCRIPT) | \
 	sort -rV | head -n1 | cut -d'_' -f2)
-LIBBPF_MAJOR_VERSION := $(firstword $(subst ., ,$(LIBBPF_VERSION)))
+LIBBPF_MAJOR_VERSION := $(word 1,$(subst ., ,$(LIBBPF_VERSION)))
+LIBBPF_MINOR_VERSION := $(word 2,$(subst ., ,$(LIBBPF_VERSION)))
 
 MAKEFLAGS += --no-print-directory
 
@@ -59,7 +60,8 @@ ifndef VERBOSE
   VERBOSE = 0
 endif
 
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
+INCLUDES = -I$(if $(OUTPUT),$(OUTPUT),.)				\
+	   -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
 
 export prefix libdir src obj
 
@@ -111,7 +113,9 @@ SHARED_OBJDIR	:= $(OUTPUT)sharedobjs/
 STATIC_OBJDIR	:= $(OUTPUT)staticobjs/
 BPF_IN_SHARED	:= $(SHARED_OBJDIR)libbpf-in.o
 BPF_IN_STATIC	:= $(STATIC_OBJDIR)libbpf-in.o
+VERSION_HDR	:= $(OUTPUT)libbpf_version.h
 BPF_HELPER_DEFS	:= $(OUTPUT)bpf_helper_defs.h
+BPF_GENERATED	:= $(BPF_HELPER_DEFS) $(VERSION_HDR)
 
 LIB_TARGET	:= $(addprefix $(OUTPUT),$(LIB_TARGET))
 LIB_FILE	:= $(addprefix $(OUTPUT),$(LIB_FILE))
@@ -136,7 +140,7 @@ all: fixdep
 
 all_cmd: $(CMD_TARGETS) check
 
-$(BPF_IN_SHARED): force $(BPF_HELPER_DEFS)
+$(BPF_IN_SHARED): force $(BPF_GENERATED)
 	@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \
 	(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \
 	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true
@@ -154,13 +158,19 @@ $(BPF_IN_SHARED): force $(BPF_HELPER_DEFS)
 	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true
 	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)"
 
-$(BPF_IN_STATIC): force $(BPF_HELPER_DEFS)
+$(BPF_IN_STATIC): force $(BPF_GENERATED)
 	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
 
 $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
 	$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
 		--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
 
+$(VERSION_HDR): force
+	$(QUIET_GEN)echo "/* This file was auto-generated. */" > $@
+	@echo "" >> $@
+	@echo "#define LIBBPF_MAJOR_VERSION $(LIBBPF_MAJOR_VERSION)" >> $@
+	@echo "#define LIBBPF_MINOR_VERSION $(LIBBPF_MINOR_VERSION)" >> $@
+
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
 
 $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
@@ -224,10 +234,10 @@ install_lib: all_cmd
 		cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
 INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
-		  bpf_helpers.h $(BPF_HELPER_DEFS) bpf_tracing.h	     \
+		  bpf_helpers.h $(BPF_GENERATED) bpf_tracing.h	     \
 		  bpf_endian.h bpf_core_read.h skel_internal.h
 
-install_headers: $(BPF_HELPER_DEFS)
+install_headers: $(BPF_GENERATED)
 	$(call QUIET_INSTALL, headers)					     \
 		$(foreach hdr,$(INSTALL_HEADERS),			     \
 			$(call do_install,$(hdr),$(prefix)/include/bpf,644);)
@@ -240,7 +250,7 @@ install: install_lib install_pkgconfig install_headers
 
 clean:
 	$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS)		     \
-		*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_HELPER_DEFS)		     \
+		*~ .*.d .*.cmd LIBBPF-CFLAGS $(BPF_GENERATED)		     \
 		$(SHARED_OBJDIR) $(STATIC_OBJDIR)			     \
 		$(addprefix $(OUTPUT),					     \
 			    *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 4a711f990904..f2e2fab950b7 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -50,9 +50,11 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
 
 LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
 LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
+LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_from_kernel_by_id instead")
 LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf);
 
 LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);
+LIBBPF_DEPRECATED_SINCE(0, 6, "use btf__load_into_kernel instead")
 LIBBPF_API int btf__load(struct btf *btf);
 LIBBPF_API int btf__load_into_kernel(struct btf *btf);
 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index 947d8bd8a7bb..36ac77f2bea2 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -10,6 +10,7 @@
 #define __LIBBPF_LIBBPF_COMMON_H
 
 #include <string.h>
+#include "libbpf_version.h"
 
 #ifndef LIBBPF_API
 #define LIBBPF_API __attribute__((visibility("default")))
@@ -17,6 +18,24 @@
 
 #define LIBBPF_DEPRECATED(msg) __attribute__((deprecated(msg)))
 
+/* Mark a symbol as deprecated when libbpf version is >= {major}.{minor} */
+#define LIBBPF_DEPRECATED_SINCE(major, minor, msg)			    \
+	__LIBBPF_MARK_DEPRECATED_ ## major ## _ ## minor		    \
+		(LIBBPF_DEPRECATED("libbpf v" # major "." # minor "+: " msg))
+
+#define __LIBBPF_CURRENT_VERSION_GEQ(major, minor)			    \
+	(LIBBPF_MAJOR_VERSION > (major) ||				    \
+	 (LIBBPF_MAJOR_VERSION == (major) && LIBBPF_MINOR_VERSION >= (minor)))
+
+/* Add checks for other versions below when planning deprecation of API symbols
+ * with the LIBBPF_DEPRECATED_SINCE macro.
+ */
+#if __LIBBPF_CURRENT_VERSION_GEQ(0, 6)
+#define __LIBBPF_MARK_DEPRECATED_0_6(X) X
+#else
+#define __LIBBPF_MARK_DEPRECATED_0_6(X)
+#endif
+
 /* Helper macro to declare and initialize libbpf options struct
  *
  * This dance with uninitialized declaration, followed by memset to zero,
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 866531c08e4f..1a4d30ff3275 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -512,14 +512,14 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
 
 # Benchmark runner
-$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
 	$(call msg,CC,,$@)
 	$(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
 $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
 $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
 $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
 			    $(OUTPUT)/perfbuf_bench.skel.h
-$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 		 $(OUTPUT)/bench_count.o \

From ed7b74dc777777254a97de3e18e5dcbcebf3f015 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:09 +0200
Subject: [PATCH 08/63] selftests: xsk: Simplify xsk and umem arrays

Simplify the xsk_info and umem_info allocation by allocating them
upfront in an array, instead of allocating an array of pointers to
future creations of these. Allocating them upfront also has the
advantage that configuration information can be stored in these
structures instead of relying on global variables. With the previous
structure, xsk_info and umem_info were created too late to be able to
store most configuration information. This will be used to eliminate
most global variables in later patches in this series.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-2-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 84 ++++++++++--------------
 tools/testing/selftests/bpf/xdpxceiver.h |  5 +-
 2 files changed, 37 insertions(+), 52 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index f53ce2683f8d..9639d8da516d 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -235,7 +235,7 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
 	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
 }
 
-static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
+static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size, int idx)
 {
 	struct xsk_umem_config cfg = {
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -244,21 +244,15 @@ static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, in
 		.frame_headroom = frame_headroom,
 		.flags = XSK_UMEM__DEFAULT_FLAGS
 	};
-	struct xsk_umem_info *umem;
 	int ret;
 
-	umem = calloc(1, sizeof(struct xsk_umem_info));
-	if (!umem)
-		exit_with_error(errno);
-
 	ret = xsk_umem__create(&umem->umem, buffer, size,
 			       &umem->fq, &umem->cq, &cfg);
 	if (ret)
-		exit_with_error(-ret);
+		return ret;
 
 	umem->buffer = buffer;
-
-	data->umem_arr[idx] = umem;
+	return 0;
 }
 
 static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
@@ -274,19 +268,14 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
 	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
 }
 
-static int xsk_configure_socket(struct ifobject *ifobject, int idx)
+static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+				struct ifobject *ifobject, u32 qid)
 {
 	struct xsk_socket_config cfg;
-	struct xsk_socket_info *xsk;
 	struct xsk_ring_cons *rxr;
 	struct xsk_ring_prod *txr;
-	int ret;
 
-	xsk = calloc(1, sizeof(struct xsk_socket_info));
-	if (!xsk)
-		exit_with_error(errno);
-
-	xsk->umem = ifobject->umem;
+	xsk->umem = umem;
 	cfg.rx_size = rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
@@ -301,14 +290,7 @@ static int xsk_configure_socket(struct ifobject *ifobject, int idx)
 		txr = &xsk->tx;
 	}
 
-	ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
-				 ifobject->umem->umem, rxr, txr, &cfg);
-	if (ret)
-		return 1;
-
-	ifobject->xsk_arr[idx] = xsk;
-
-	return 0;
+	return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg);
 }
 
 static struct option long_options[] = {
@@ -756,8 +738,7 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 	u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	size_t mmap_sz = umem_sz;
-	int ctr = 0;
-	int ret;
+	int ctr = 0, ret;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
@@ -769,31 +750,34 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 		exit_with_error(errno);
 
 	while (ctr++ < SOCK_RECONF_CTR) {
-		xsk_configure_umem(ifobject, bufs, umem_sz, 0);
-		ifobject->umem = ifobject->umem_arr[0];
-		ret = xsk_configure_socket(ifobject, 0);
+		ret = xsk_configure_umem(&ifobject->umem_arr[0], bufs, umem_sz, 0);
+		if (ret)
+			exit_with_error(-ret);
+
+		ret = xsk_configure_socket(&ifobject->xsk_arr[0], &ifobject->umem_arr[0],
+					   ifobject, 0);
 		if (!ret)
 			break;
 
 		/* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
-		usleep(USLEEP_MAX);
 		if (ctr >= SOCK_RECONF_CTR)
 			exit_with_error(-ret);
+		usleep(USLEEP_MAX);
+	}
+
+	if (test_type == TEST_TYPE_BPF_RES) {
+		ret = xsk_configure_umem(&ifobject->umem_arr[1], (u8 *)bufs + umem_sz, umem_sz, 1);
+		if (ret)
+			exit_with_error(-ret);
+
+		ret = xsk_configure_socket(&ifobject->xsk_arr[1], &ifobject->umem_arr[1],
+					   ifobject, 1);
+		if (ret)
+			exit_with_error(-ret);
 	}
 
-	ifobject->umem = ifobject->umem_arr[0];
-	ifobject->xsk = ifobject->xsk_arr[0];
-
-	if (test_type == TEST_TYPE_BPF_RES) {
-		xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
-		ifobject->umem = ifobject->umem_arr[1];
-		ret = xsk_configure_socket(ifobject, 1);
-	}
-
-	ifobject->umem = ifobject->umem_arr[0];
-	ifobject->xsk = ifobject->xsk_arr[0];
-	print_verbose("Interface [%s] vector [%s]\n",
-		      ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
+	ifobject->umem = &ifobject->umem_arr[0];
+	ifobject->xsk = &ifobject->xsk_arr[0];
 }
 
 static bool testapp_is_test_two_stepped(void)
@@ -941,10 +925,10 @@ static void swap_xsk_res(void)
 	xsk_umem__delete(ifdict_tx->umem->umem);
 	xsk_socket__delete(ifdict_rx->xsk->xsk);
 	xsk_umem__delete(ifdict_rx->umem->umem);
-	ifdict_tx->umem = ifdict_tx->umem_arr[1];
-	ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
-	ifdict_rx->umem = ifdict_rx->umem_arr[1];
-	ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
+	ifdict_tx->umem = &ifdict_tx->umem_arr[1];
+	ifdict_tx->xsk = &ifdict_tx->xsk_arr[1];
+	ifdict_rx->umem = &ifdict_rx->umem_arr[1];
+	ifdict_rx->xsk = &ifdict_rx->xsk_arr[1];
 }
 
 static void testapp_bpf_res(void)
@@ -1071,11 +1055,11 @@ static struct ifobject *ifobject_create(void)
 	if (!ifobj)
 		return NULL;
 
-	ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+	ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
 	if (!ifobj->xsk_arr)
 		goto out_xsk_arr;
 
-	ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+	ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr));
 	if (!ifobj->umem_arr)
 		goto out_umem_arr;
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 7e49b9fbe25e..de80516ac6c2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -21,6 +21,7 @@
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
+#define MAX_SOCKETS 2
 #define MAX_TEARDOWN_ITER 10
 #define MAX_BIDI_ITER 2
 #define MAX_BPF_ITER 2
@@ -119,9 +120,9 @@ struct ifobject {
 	char ifname[MAX_INTERFACE_NAME_CHARS];
 	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
 	struct xsk_socket_info *xsk;
-	struct xsk_socket_info **xsk_arr;
-	struct xsk_umem_info **umem_arr;
+	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
+	struct xsk_umem_info *umem_arr;
 	void *(*func_ptr)(void *arg);
 	struct flow_vector fv;
 	struct pkt_stream *pkt_stream;

From 744eb5c882e8133d97f656cb6f9c49817889fa64 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:10 +0200
Subject: [PATCH 09/63] selftests: xsk: Introduce type for thread function

Introduce a typedef of the thread function so this can be passed to
init_iface() in order to simplify that function.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-3-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 25 ++++++++++++------------
 tools/testing/selftests/bpf/xdpxceiver.h |  4 +++-
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 9639d8da516d..edf5b6cc6998 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -974,10 +974,9 @@ static void testapp_stats(void)
 	print_ksft_result();
 }
 
-static void init_iface(struct ifobject *ifobj, const char *dst_mac,
-		       const char *src_mac, const char *dst_ip,
-		       const char *src_ip, const u16 dst_port,
-		       const u16 src_port, enum fvector vector)
+static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
+		       const char *dst_ip, const char *src_ip, const u16 dst_port,
+		       const u16 src_port, enum fvector vector, thread_func_t func_ptr)
 {
 	struct in_addr ip;
 
@@ -993,15 +992,13 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac,
 	ifobj->dst_port = dst_port;
 	ifobj->src_port = src_port;
 
-	if (vector == tx) {
-		ifobj->fv.vector = tx;
-		ifobj->func_ptr = worker_testapp_validate_tx;
+	if (vector == tx)
 		ifdict_tx = ifobj;
-	} else {
-		ifobj->fv.vector = rx;
-		ifobj->func_ptr = worker_testapp_validate_rx;
+	else
 		ifdict_rx = ifobj;
-	}
+
+	ifobj->fv.vector = vector;
+	ifobj->func_ptr = func_ptr;
 }
 
 static void run_pkt_test(int mode, int type)
@@ -1097,8 +1094,10 @@ int main(int argc, char **argv)
 
 	parse_command_line(argc, argv);
 
-	init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
-	init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
+	init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx,
+		   worker_testapp_validate_tx);
+	init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx,
+		   worker_testapp_validate_rx);
 
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index de80516ac6c2..799d524eb425 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -116,6 +116,8 @@ struct pkt_stream {
 	struct pkt *pkts;
 };
 
+typedef void *(*thread_func_t)(void *arg);
+
 struct ifobject {
 	char ifname[MAX_INTERFACE_NAME_CHARS];
 	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
@@ -123,8 +125,8 @@ struct ifobject {
 	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
 	struct xsk_umem_info *umem_arr;
-	void *(*func_ptr)(void *arg);
 	struct flow_vector fv;
+	thread_func_t func_ptr;
 	struct pkt_stream *pkt_stream;
 	int ns_fd;
 	u32 dst_ip;

From ce74acaf015c5ac9365abc017af051560950e46b Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:11 +0200
Subject: [PATCH 10/63] selftests: xsk: Introduce test specifications

Introduce a test specification to be able to concisely describe a
test. Currently, a test is implemented by sprinkling test specific if
statements here and there, which is not scalable or easy to
understand. The end goal with this patch set is to come to the point
in which a test is completely specified by a test specification that
can easily be constructed in a single function so that new tests can
be added without too much trouble. This test specification will be run
by a test runner that has no idea about tests. It just executes the
what test specification states.

This patch introduces the test specification and, as a start, puts the
two interface objects in there, one containing the packet stream to be
sent and the other one the packet stream that is supposed to be
received for a test to pass. The global variables containing these can
then be eliminated. The following patches will convert each existing
test into a test specification and add the needed fields into it and
the functionality in the test runner that act on the test
specification. At the end, the test runner should contain no test
specific code and each test should be described in a single simple
function.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-4-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 203 ++++++++++++++---------
 tools/testing/selftests/bpf/xdpxceiver.h |   7 +-
 2 files changed, 126 insertions(+), 84 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index edf5b6cc6998..a6bcc7453860 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -336,45 +336,43 @@ static int switch_namespace(const char *nsname)
 	return nsfd;
 }
 
-static int validate_interfaces(void)
+static bool validate_interface(struct ifobject *ifobj)
 {
-	bool ret = true;
-
-	for (int i = 0; i < MAX_INTERFACES; i++) {
-		if (!strcmp(ifdict[i]->ifname, "")) {
-			ret = false;
-			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
-		}
-	}
-	return ret;
+	if (!strcmp(ifobj->ifname, ""))
+		return false;
+	return true;
 }
 
-static void parse_command_line(int argc, char **argv)
+static void parse_command_line(struct test_spec *test, int argc, char **argv)
 {
-	int option_index, interface_index = 0, c;
+	struct ifobject *ifobj;
+	u32 interface_nb = 0;
+	int option_index, c;
 
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
+		char *sptr, *token;
 
+		c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
 		if (c == -1)
 			break;
 
 		switch (c) {
 		case 'i':
-			if (interface_index == MAX_INTERFACES)
+			if (interface_nb == 0)
+				ifobj = test->ifobj_tx;
+			else if (interface_nb == 1)
+				ifobj = test->ifobj_rx;
+			else
 				break;
-			char *sptr, *token;
 
 			sptr = strndupa(optarg, strlen(optarg));
-			memcpy(ifdict[interface_index]->ifname,
-			       strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
+			memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
 			token = strsep(&sptr, ",");
 			if (token)
-				memcpy(ifdict[interface_index]->nsname, token,
-				       MAX_INTERFACES_NAMESPACE_CHARS);
-			interface_index++;
+				memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS);
+			interface_nb++;
 			break;
 		case 'D':
 			opt_pkt_dump = true;
@@ -387,11 +385,44 @@ static void parse_command_line(int argc, char **argv)
 			ksft_exit_xfail();
 		}
 	}
+}
 
-	if (!validate_interfaces()) {
-		usage(basename(argv[0]));
-		ksft_exit_xfail();
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			     struct ifobject *ifobj_rx)
+{
+	u32 i, j;
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->umem = &ifobj->umem_arr[0];
+		ifobj->xsk = &ifobj->xsk_arr[0];
+
+		if (i == tx)
+			ifobj->fv.vector = tx;
+		else
+			ifobj->fv.vector = rx;
+
+		for (j = 0; j < MAX_SOCKETS; j++) {
+			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
+			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+		}
 	}
+
+	test->ifobj_tx = ifobj_tx;
+	test->ifobj_rx = ifobj_rx;
+}
+
+static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			   struct ifobject *ifobj_rx)
+{
+	memset(test, 0, sizeof(*test));
+	__test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
 }
 
 static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
@@ -842,8 +873,10 @@ static void *worker_testapp_validate_rx(void *arg)
 	pthread_exit(NULL);
 }
 
-static void testapp_validate(void)
+static void testapp_validate_traffic(struct test_spec *test)
 {
+	struct ifobject *ifobj_tx = test->ifobj_tx;
+	struct ifobject *ifobj_rx = test->ifobj_rx;
 	bool bidi = test_type == TEST_TYPE_BIDI;
 	bool bpf = test_type == TEST_TYPE_BPF_RES;
 	struct pkt_stream *pkt_stream;
@@ -855,18 +888,18 @@ static void testapp_validate(void)
 		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
 	else
 		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
-	ifdict_tx->pkt_stream = pkt_stream;
-	ifdict_rx->pkt_stream = pkt_stream;
+	ifobj_tx->pkt_stream = pkt_stream;
+	ifobj_rx->pkt_stream = pkt_stream;
 
 	/*Spawn RX thread */
-	pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
+	pthread_create(&t0, NULL, ifobj_rx->func_ptr, ifobj_rx);
 
 	pthread_barrier_wait(&barr);
 	if (pthread_barrier_destroy(&barr))
 		exit_with_error(errno);
 
 	/*Spawn TX thread */
-	pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
+	pthread_create(&t1, NULL, ifobj_tx->func_ptr, ifobj_tx);
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
@@ -875,80 +908,82 @@ static void testapp_validate(void)
 		print_ksft_result();
 }
 
-static void testapp_teardown(void)
+static void testapp_teardown(struct test_spec *test)
 {
 	int i;
 
 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-		print_verbose("Creating socket\n");
-		testapp_validate();
+		testapp_validate_traffic(test);
+		test_spec_reset(test);
 	}
 
 	print_ksft_result();
 }
 
-static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
 {
-	void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
-	enum fvector tmp_vector = ifobj1->fv.vector;
+	thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+	enum fvector tmp_vector = (*ifobj1)->fv.vector;
+	struct ifobject *tmp_ifobj = (*ifobj1);
 
-	ifobj1->func_ptr = ifobj2->func_ptr;
-	ifobj1->fv.vector = ifobj2->fv.vector;
+	(*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+	(*ifobj1)->fv.vector = (*ifobj2)->fv.vector;
 
-	ifobj2->func_ptr = tmp_func_ptr;
-	ifobj2->fv.vector = tmp_vector;
+	(*ifobj2)->func_ptr = tmp_func_ptr;
+	(*ifobj2)->fv.vector = tmp_vector;
 
-	ifdict_tx = ifobj1;
-	ifdict_rx = ifobj2;
+	*ifobj1 = *ifobj2;
+	*ifobj2 = tmp_ifobj;
 }
 
-static void testapp_bidi(void)
+static void testapp_bidi(struct test_spec *test)
 {
 	for (int i = 0; i < MAX_BIDI_ITER; i++) {
 		print_verbose("Creating socket\n");
-		testapp_validate();
+		testapp_validate_traffic(test);
 		if (!second_step) {
 			print_verbose("Switching Tx/Rx vectors\n");
-			swap_vectors(ifdict[1], ifdict[0]);
+			swap_directions(&test->ifobj_rx, &test->ifobj_tx);
 		}
 		second_step = true;
 	}
 
-	swap_vectors(ifdict[0], ifdict[1]);
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
 
 	print_ksft_result();
 }
 
-static void swap_xsk_res(void)
+static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
 {
-	xsk_socket__delete(ifdict_tx->xsk->xsk);
-	xsk_umem__delete(ifdict_tx->umem->umem);
-	xsk_socket__delete(ifdict_rx->xsk->xsk);
-	xsk_umem__delete(ifdict_rx->umem->umem);
-	ifdict_tx->umem = &ifdict_tx->umem_arr[1];
-	ifdict_tx->xsk = &ifdict_tx->xsk_arr[1];
-	ifdict_rx->umem = &ifdict_rx->umem_arr[1];
-	ifdict_rx->xsk = &ifdict_rx->xsk_arr[1];
+	xsk_socket__delete(ifobj_tx->xsk->xsk);
+	xsk_umem__delete(ifobj_tx->umem->umem);
+	xsk_socket__delete(ifobj_rx->xsk->xsk);
+	xsk_umem__delete(ifobj_rx->umem->umem);
+	ifobj_tx->umem = &ifobj_tx->umem_arr[1];
+	ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
+	ifobj_rx->umem = &ifobj_rx->umem_arr[1];
+	ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
 }
 
-static void testapp_bpf_res(void)
+static void testapp_bpf_res(struct test_spec *test)
 {
 	int i;
 
 	for (i = 0; i < MAX_BPF_ITER; i++) {
 		print_verbose("Creating socket\n");
-		testapp_validate();
+		testapp_validate_traffic(test);
 		if (!second_step)
-			swap_xsk_res();
+			swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
 		second_step = true;
 	}
 
 	print_ksft_result();
 }
 
-static void testapp_stats(void)
+static void testapp_stats(struct test_spec *test)
 {
 	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
+		test_spec_reset(test);
 		stat_test_type = i;
 
 		/* reset defaults */
@@ -968,7 +1003,7 @@ static void testapp_stats(void)
 		default:
 			break;
 		}
-		testapp_validate();
+		testapp_validate_traffic(test);
 	}
 
 	print_ksft_result();
@@ -992,16 +1027,11 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 	ifobj->dst_port = dst_port;
 	ifobj->src_port = src_port;
 
-	if (vector == tx)
-		ifdict_tx = ifobj;
-	else
-		ifdict_rx = ifobj;
-
 	ifobj->fv.vector = vector;
 	ifobj->func_ptr = func_ptr;
 }
 
-static void run_pkt_test(int mode, int type)
+static void run_pkt_test(struct test_spec *test, int mode, int type)
 {
 	test_type = type;
 
@@ -1027,19 +1057,19 @@ static void run_pkt_test(int mode, int type)
 
 	switch (test_type) {
 	case TEST_TYPE_STATS:
-		testapp_stats();
+		testapp_stats(test);
 		break;
 	case TEST_TYPE_TEARDOWN:
-		testapp_teardown();
+		testapp_teardown(test);
 		break;
 	case TEST_TYPE_BIDI:
-		testapp_bidi();
+		testapp_bidi(test);
 		break;
 	case TEST_TYPE_BPF_RES:
-		testapp_bpf_res();
+		testapp_bpf_res(test);
 		break;
 	default:
-		testapp_validate();
+		testapp_validate_traffic(test);
 		break;
 	}
 }
@@ -1079,36 +1109,47 @@ static void ifobject_delete(struct ifobject *ifobj)
 int main(int argc, char **argv)
 {
 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
-	int i, j;
+	struct ifobject *ifobj_tx, *ifobj_rx;
+	struct test_spec test;
+	u32 i, j;
 
 	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
 		exit_with_error(errno);
 
-	for (i = 0; i < MAX_INTERFACES; i++) {
-		ifdict[i] = ifobject_create();
-		if (!ifdict[i])
-			exit_with_error(ENOMEM);
-	}
+	ifobj_tx = ifobject_create();
+	if (!ifobj_tx)
+		exit_with_error(ENOMEM);
+	ifobj_rx = ifobject_create();
+	if (!ifobj_rx)
+		exit_with_error(ENOMEM);
+
+	test_spec_init(&test, ifobj_tx, ifobj_rx);
 
 	setlocale(LC_ALL, "");
 
-	parse_command_line(argc, argv);
+	parse_command_line(&test, argc, argv);
 
-	init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx,
+	if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
+		usage(basename(argv[0]));
+		ksft_exit_xfail();
+	}
+
+	init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx,
 		   worker_testapp_validate_tx);
-	init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx,
+	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx,
 		   worker_testapp_validate_rx);
 
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
 	for (i = 0; i < TEST_MODE_MAX; i++)
 		for (j = 0; j < TEST_TYPE_MAX; j++) {
-			run_pkt_test(i, j);
+			test_spec_init(&test, ifobj_tx, ifobj_rx);
+			run_pkt_test(&test, i, j);
 			usleep(USLEEP_MAX);
 		}
 
-	for (i = 0; i < MAX_INTERFACES; i++)
-		ifobject_delete(ifdict[i]);
+	ifobject_delete(ifobj_tx);
+	ifobject_delete(ifobj_rx);
 
 	ksft_exit_pass();
 	return 0;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 799d524eb425..e279aa893438 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -137,9 +137,10 @@ struct ifobject {
 	u8 src_mac[ETH_ALEN];
 };
 
-static struct ifobject *ifdict[MAX_INTERFACES];
-static struct ifobject *ifdict_rx;
-static struct ifobject *ifdict_tx;
+struct test_spec {
+	struct ifobject *ifobj_tx;
+	struct ifobject *ifobj_rx;
+};
 
 /*threads*/
 pthread_barrier_t barr;

From 83f4ae2f26bd67c58d528c3967c5ad3e58098be3 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:12 +0200
Subject: [PATCH 11/63] selftests: xsk: Move num_frames and frame_headroom to
 xsk_umem_info

Move the global variables num_frames and frame_headroom to struct
xsk_umem_info. They describe properties of the umem so no reason for
them to be global.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-5-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 20 ++++++++++----------
 tools/testing/selftests/bpf/xdpxceiver.h |  4 ++--
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index a6bcc7453860..56ee03fda2b3 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -241,7 +241,7 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-		.frame_headroom = frame_headroom,
+		.frame_headroom = umem->frame_headroom,
 		.flags = XSK_UMEM__DEFAULT_FLAGS
 	};
 	int ret;
@@ -406,6 +406,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		for (j = 0; j < MAX_SOCKETS; j++) {
 			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+			ifobj->umem_arr[j].num_frames = DEFAULT_PKT_CNT / 4;
 		}
 	}
 
@@ -433,7 +434,7 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
 	return &pkt_stream->pkts[pkt_nb];
 }
 
-static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
 {
 	struct pkt_stream *pkt_stream;
 	u32 i;
@@ -448,7 +449,7 @@ static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
 
 	pkt_stream->nb_pkts = nb_pkts;
 	for (i = 0; i < nb_pkts; i++) {
-		pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+		pkt_stream->pkts[i].addr = (i % umem->num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
 		pkt_stream->pkts[i].len = pkt_len;
 		pkt_stream->pkts[i].payload = i;
 	}
@@ -766,7 +767,7 @@ static void tx_stats_validate(struct ifobject *ifobject)
 
 static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 {
-	u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	u64 umem_sz = ifobject->umem->num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	size_t mmap_sz = umem_sz;
 	int ctr = 0, ret;
@@ -885,9 +886,10 @@ static void testapp_validate_traffic(struct test_spec *test)
 		exit_with_error(errno);
 
 	if (stat_test_type == STAT_TEST_TX_INVALID)
-		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+		pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, DEFAULT_PKT_CNT,
+						 XSK_UMEM__INVALID_FRAME_SIZE);
 	else
-		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
+		pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
 	ifobj_tx->pkt_stream = pkt_stream;
 	ifobj_rx->pkt_stream = pkt_stream;
 
@@ -988,12 +990,11 @@ static void testapp_stats(struct test_spec *test)
 
 		/* reset defaults */
 		rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-		frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
 
 		switch (stat_test_type) {
 		case STAT_TEST_RX_DROPPED:
-			frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
-						XDP_PACKET_HEADROOM - 1;
+			test->ifobj_rx->umem->frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
+				XDP_PACKET_HEADROOM - 1;
 			break;
 		case STAT_TEST_RX_FULL:
 			rxqsize = RX_FULL_RXQSIZE;
@@ -1040,7 +1041,6 @@ static void run_pkt_test(struct test_spec *test, int mode, int type)
 	second_step = 0;
 	stat_test_type = -1;
 	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-	frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
 
 	configured_mode = mode;
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index e279aa893438..0d93a9e6c4f3 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -71,7 +71,6 @@ enum stat_test_type {
 
 static int configured_mode;
 static bool opt_pkt_dump;
-static u32 num_frames = DEFAULT_PKT_CNT / 4;
 static bool second_step;
 static int test_type;
 
@@ -81,12 +80,13 @@ static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
 static int stat_test_type;
 static u32 rxqsize;
-static u32 frame_headroom;
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
 	struct xsk_umem *umem;
+	u32 num_frames;
+	u32 frame_headroom;
 	void *buffer;
 };
 

From 4bf8ee65ba4ebf6b0d732cca8ecde2d5e1898d8a Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:13 +0200
Subject: [PATCH 12/63] selftests: xsk: Move rxqsize into xsk_socket_info

Move the global variable rxqsize to struct xsk_socket_info as it
describes the size of a ring in that struct. By default, it is set to
the size dictated by libbpf.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-6-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 9 +++------
 tools/testing/selftests/bpf/xdpxceiver.h | 2 +-
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 56ee03fda2b3..28bf62c56190 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -276,7 +276,7 @@ static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_inf
 	struct xsk_ring_prod *txr;
 
 	xsk->umem = umem;
-	cfg.rx_size = rxqsize;
+	cfg.rx_size = xsk->rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
 	cfg.xdp_flags = xdp_flags;
@@ -407,6 +407,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
 			ifobj->umem_arr[j].num_frames = DEFAULT_PKT_CNT / 4;
+			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 		}
 	}
 
@@ -988,16 +989,13 @@ static void testapp_stats(struct test_spec *test)
 		test_spec_reset(test);
 		stat_test_type = i;
 
-		/* reset defaults */
-		rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-
 		switch (stat_test_type) {
 		case STAT_TEST_RX_DROPPED:
 			test->ifobj_rx->umem->frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
 				XDP_PACKET_HEADROOM - 1;
 			break;
 		case STAT_TEST_RX_FULL:
-			rxqsize = RX_FULL_RXQSIZE;
+			test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
 			break;
 		case STAT_TEST_TX_INVALID:
 			continue;
@@ -1040,7 +1038,6 @@ static void run_pkt_test(struct test_spec *test, int mode, int type)
 	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	second_step = 0;
 	stat_test_type = -1;
-	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 
 	configured_mode = mode;
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 0d93a9e6c4f3..09e4e015b1bf 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -79,7 +79,6 @@ static bool opt_verbose;
 static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
 static int stat_test_type;
-static u32 rxqsize;
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
@@ -96,6 +95,7 @@ struct xsk_socket_info {
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	u32 outstanding_tx;
+	u32 rxqsize;
 };
 
 struct flow_vector {

From c160d7afba8f52c16b16111f8cbedd87f8d16d75 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:14 +0200
Subject: [PATCH 13/63] selftests: xsk: Make frame_size configurable

Make the frame size configurable instead of it being hard coded to a
default. This is a property of the umem and will make it possible to
implement tests for different umem frame sizes in a later patch.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-7-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 11 ++++++-----
 tools/testing/selftests/bpf/xdpxceiver.h |  1 +
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 28bf62c56190..79cf082a7581 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -240,7 +240,7 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 	struct xsk_umem_config cfg = {
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+		.frame_size = umem->frame_size,
 		.frame_headroom = umem->frame_headroom,
 		.flags = XSK_UMEM__DEFAULT_FLAGS
 	};
@@ -264,7 +264,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
 	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
 		exit_with_error(-ret);
 	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
-		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
+		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * umem->frame_size;
 	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
 }
 
@@ -407,6 +407,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
 			ifobj->umem_arr[j].num_frames = DEFAULT_PKT_CNT / 4;
+			ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 		}
 	}
@@ -450,7 +451,7 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 
 	pkt_stream->nb_pkts = nb_pkts;
 	for (i = 0; i < nb_pkts; i++) {
-		pkt_stream->pkts[i].addr = (i % umem->num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size;
 		pkt_stream->pkts[i].len = pkt_len;
 		pkt_stream->pkts[i].payload = i;
 	}
@@ -768,7 +769,7 @@ static void tx_stats_validate(struct ifobject *ifobject)
 
 static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 {
-	u64 umem_sz = ifobject->umem->num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	size_t mmap_sz = umem_sz;
 	int ctr = 0, ret;
@@ -991,7 +992,7 @@ static void testapp_stats(struct test_spec *test)
 
 		switch (stat_test_type) {
 		case STAT_TEST_RX_DROPPED:
-			test->ifobj_rx->umem->frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
+			test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
 				XDP_PACKET_HEADROOM - 1;
 			break;
 		case STAT_TEST_RX_FULL:
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 09e4e015b1bf..bfd14190abfc 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -87,6 +87,7 @@ struct xsk_umem_info {
 	u32 num_frames;
 	u32 frame_headroom;
 	void *buffer;
+	u32 frame_size;
 };
 
 struct xsk_socket_info {

From 53cb3cec2f1e2c2c0c3624f20675d01e84a8bbac Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:15 +0200
Subject: [PATCH 14/63] selftests: xsx: Introduce test name in test spec

Introduce the test name in the test specification. This so we can set
the name locally in the test function and simplify the logic for
printing out test results.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-8-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 49 ++++++++++++++----------
 tools/testing/selftests/bpf/xdpxceiver.h |  2 +
 2 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 79cf082a7581..8ef58081d4d2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -112,13 +112,9 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
-#define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
-			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
-			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
-			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
-			       test_type == TEST_TYPE_STATS ? "Stats" : "",\
-			       test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
+#define print_ksft_result(test)\
+	(ksft_test_result_pass("PASS: %s %s\n", configured_mode ? "DRV" : "SKB", \
+			       (test)->name))
 
 static void memset32_htonl(void *dest, u32 val, u32 size)
 {
@@ -428,6 +424,11 @@ static void test_spec_reset(struct test_spec *test)
 	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
 }
 
+static void test_spec_set_name(struct test_spec *test, const char *name)
+{
+	strncpy(test->name, name, MAX_TEST_NAME_SIZE);
+}
+
 static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
 {
 	if (pkt_nb >= pkt_stream->nb_pkts)
@@ -880,8 +881,6 @@ static void testapp_validate_traffic(struct test_spec *test)
 {
 	struct ifobject *ifobj_tx = test->ifobj_tx;
 	struct ifobject *ifobj_rx = test->ifobj_rx;
-	bool bidi = test_type == TEST_TYPE_BIDI;
-	bool bpf = test_type == TEST_TYPE_BPF_RES;
 	struct pkt_stream *pkt_stream;
 
 	if (pthread_barrier_init(&barr, NULL, 2))
@@ -907,21 +906,17 @@ static void testapp_validate_traffic(struct test_spec *test)
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
-
-	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
-		print_ksft_result();
 }
 
 static void testapp_teardown(struct test_spec *test)
 {
 	int i;
 
+	test_spec_set_name(test, "TEARDOWN");
 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
 		testapp_validate_traffic(test);
 		test_spec_reset(test);
 	}
-
-	print_ksft_result();
 }
 
 static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
@@ -942,6 +937,7 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
 
 static void testapp_bidi(struct test_spec *test)
 {
+	test_spec_set_name(test, "BIDIRECTIONAL");
 	for (int i = 0; i < MAX_BIDI_ITER; i++) {
 		print_verbose("Creating socket\n");
 		testapp_validate_traffic(test);
@@ -953,8 +949,6 @@ static void testapp_bidi(struct test_spec *test)
 	}
 
 	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-
-	print_ksft_result();
 }
 
 static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
@@ -973,6 +967,7 @@ static void testapp_bpf_res(struct test_spec *test)
 {
 	int i;
 
+	test_spec_set_name(test, "BPF_RES");
 	for (i = 0; i < MAX_BPF_ITER; i++) {
 		print_verbose("Creating socket\n");
 		testapp_validate_traffic(test);
@@ -980,8 +975,6 @@ static void testapp_bpf_res(struct test_spec *test)
 			swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
 		second_step = true;
 	}
-
-	print_ksft_result();
 }
 
 static void testapp_stats(struct test_spec *test)
@@ -992,21 +985,28 @@ static void testapp_stats(struct test_spec *test)
 
 		switch (stat_test_type) {
 		case STAT_TEST_RX_DROPPED:
+			test_spec_set_name(test, "STAT_RX_DROPPED");
 			test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
 				XDP_PACKET_HEADROOM - 1;
 			break;
 		case STAT_TEST_RX_FULL:
+			test_spec_set_name(test, "STAT_RX_FULL");
 			test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
 			break;
 		case STAT_TEST_TX_INVALID:
+			test_spec_set_name(test, "STAT_TX_INVALID");
 			continue;
+		case STAT_TEST_RX_FILL_EMPTY:
+			test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
+			break;
 		default:
 			break;
 		}
 		testapp_validate_traffic(test);
 	}
 
-	print_ksft_result();
+	/* To only see the whole stat set being completed unless an individual test fails. */
+	test_spec_set_name(test, "STATS");
 }
 
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
@@ -1066,10 +1066,19 @@ static void run_pkt_test(struct test_spec *test, int mode, int type)
 	case TEST_TYPE_BPF_RES:
 		testapp_bpf_res(test);
 		break;
-	default:
+	case TEST_TYPE_NOPOLL:
+		test_spec_set_name(test, "RUN_TO_COMPLETION");
 		testapp_validate_traffic(test);
 		break;
+	case TEST_TYPE_POLL:
+		test_spec_set_name(test, "POLL");
+		testapp_validate_traffic(test);
+		break;
+	default:
+		break;
 	}
+
+	print_ksft_result(test);
 }
 
 static struct ifobject *ifobject_create(void)
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index bfd14190abfc..15eab31b3b32 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -22,6 +22,7 @@
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
 #define MAX_SOCKS 1
 #define MAX_SOCKETS 2
+#define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10
 #define MAX_BIDI_ITER 2
 #define MAX_BPF_ITER 2
@@ -141,6 +142,7 @@ struct ifobject {
 struct test_spec {
 	struct ifobject *ifobj_tx;
 	struct ifobject *ifobj_rx;
+	char name[MAX_TEST_NAME_SIZE];
 };
 
 /*threads*/

From 119d4b02feb5c3c5045f0b42a6c48d853ab40917 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:16 +0200
Subject: [PATCH 15/63] selftests: xsk: Add use_poll to ifobject

Add a use_poll option to the ifobject so that we do not need to use a
test specific if-statement in the test runner.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-9-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 5 ++++-
 tools/testing/selftests/bpf/xdpxceiver.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 8ef58081d4d2..9a98c45933c5 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -393,6 +393,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 
 		ifobj->umem = &ifobj->umem_arr[0];
 		ifobj->xsk = &ifobj->xsk_arr[0];
+		ifobj->use_poll = false;
 
 		if (i == tx)
 			ifobj->fv.vector = tx;
@@ -684,7 +685,7 @@ static void send_pkts(struct ifobject *ifobject)
 	while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
 		u32 sent;
 
-		if (test_type == TEST_TYPE_POLL) {
+		if (ifobject->use_poll) {
 			int ret;
 
 			ret = poll(fds, 1, POLL_TMOUT);
@@ -1071,6 +1072,8 @@ static void run_pkt_test(struct test_spec *test, int mode, int type)
 		testapp_validate_traffic(test);
 		break;
 	case TEST_TYPE_POLL:
+		test->ifobj_tx->use_poll = true;
+		test->ifobj_rx->use_poll = true;
 		test_spec_set_name(test, "POLL");
 		testapp_validate_traffic(test);
 		break;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 15eab31b3b32..e02a4dd71bfb 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -135,6 +135,7 @@ struct ifobject {
 	u32 src_ip;
 	u16 src_port;
 	u16 dst_port;
+	bool use_poll;
 	u8 dst_mac[ETH_ALEN];
 	u8 src_mac[ETH_ALEN];
 };

From 1856c24db0a8c51122b7b87909a98f379309ff69 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:17 +0200
Subject: [PATCH 16/63] selftests: xsk: Introduce rx_on and tx_on in ifobject

Introduce rx_on and tx_on in the ifobject so that we can describe if
the thread should create a socket with only tx, rx, or both. This
eliminates some test specific if statements from the code. We can also
eliminate the flow vector structure now as this is fully specified
by the tx_on and rx_on variables.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-10-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 34 ++++++++++--------------
 tools/testing/selftests/bpf/xdpxceiver.h | 10 ++-----
 2 files changed, 16 insertions(+), 28 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 9a98c45933c5..a896d5845c0e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -278,14 +278,8 @@ static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_inf
 	cfg.xdp_flags = xdp_flags;
 	cfg.bind_flags = xdp_bind_flags;
 
-	if (test_type != TEST_TYPE_BIDI) {
-		rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
-		txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
-	} else {
-		rxr = &xsk->rx;
-		txr = &xsk->tx;
-	}
-
+	txr = ifobject->tx_on ? &xsk->tx : NULL;
+	rxr = ifobject->rx_on ? &xsk->rx : NULL;
 	return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg);
 }
 
@@ -395,10 +389,13 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		ifobj->xsk = &ifobj->xsk_arr[0];
 		ifobj->use_poll = false;
 
-		if (i == tx)
-			ifobj->fv.vector = tx;
-		else
-			ifobj->fv.vector = rx;
+		if (i == 0) {
+			ifobj->rx_on = false;
+			ifobj->tx_on = true;
+		} else {
+			ifobj->rx_on = true;
+			ifobj->tx_on = false;
+		}
 
 		for (j = 0; j < MAX_SOCKETS; j++) {
 			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
@@ -923,14 +920,10 @@ static void testapp_teardown(struct test_spec *test)
 static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
 {
 	thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
-	enum fvector tmp_vector = (*ifobj1)->fv.vector;
 	struct ifobject *tmp_ifobj = (*ifobj1);
 
 	(*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
-	(*ifobj1)->fv.vector = (*ifobj2)->fv.vector;
-
 	(*ifobj2)->func_ptr = tmp_func_ptr;
-	(*ifobj2)->fv.vector = tmp_vector;
 
 	*ifobj1 = *ifobj2;
 	*ifobj2 = tmp_ifobj;
@@ -939,6 +932,8 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
 static void testapp_bidi(struct test_spec *test)
 {
 	test_spec_set_name(test, "BIDIRECTIONAL");
+	test->ifobj_tx->rx_on = true;
+	test->ifobj_rx->tx_on = true;
 	for (int i = 0; i < MAX_BIDI_ITER; i++) {
 		print_verbose("Creating socket\n");
 		testapp_validate_traffic(test);
@@ -1012,7 +1007,7 @@ static void testapp_stats(struct test_spec *test)
 
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
 		       const char *dst_ip, const char *src_ip, const u16 dst_port,
-		       const u16 src_port, enum fvector vector, thread_func_t func_ptr)
+		       const u16 src_port, thread_func_t func_ptr)
 {
 	struct in_addr ip;
 
@@ -1028,7 +1023,6 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 	ifobj->dst_port = dst_port;
 	ifobj->src_port = src_port;
 
-	ifobj->fv.vector = vector;
 	ifobj->func_ptr = func_ptr;
 }
 
@@ -1144,9 +1138,9 @@ int main(int argc, char **argv)
 		ksft_exit_xfail();
 	}
 
-	init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx,
+	init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
 		   worker_testapp_validate_tx);
-	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx,
+	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
 		   worker_testapp_validate_rx);
 
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index e02a4dd71bfb..03ff52897d7b 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -100,13 +100,6 @@ struct xsk_socket_info {
 	u32 rxqsize;
 };
 
-struct flow_vector {
-	enum fvector {
-		tx,
-		rx,
-	} vector;
-};
-
 struct pkt {
 	u64 addr;
 	u32 len;
@@ -127,7 +120,6 @@ struct ifobject {
 	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
 	struct xsk_umem_info *umem_arr;
-	struct flow_vector fv;
 	thread_func_t func_ptr;
 	struct pkt_stream *pkt_stream;
 	int ns_fd;
@@ -135,6 +127,8 @@ struct ifobject {
 	u32 src_ip;
 	u16 src_port;
 	u16 dst_port;
+	bool tx_on;
+	bool rx_on;
 	bool use_poll;
 	u8 dst_mac[ETH_ALEN];
 	u8 src_mac[ETH_ALEN];

From 55be575dc13ccfc9bf27ebb5af938b70206a9eb5 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:18 +0200
Subject: [PATCH 17/63] selftests: xsk: Replace second_step global variable

Replace the second_step global variable with a test specification
variable called total_steps that a test can be set to indicate how
many times the packet stream should be sent without reinitializing any
sockets. This eliminates test specific code in the test runner around
the bidirectional test.

The total_steps variable is 1 by default as most tests only need a
single round of packets.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-11-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 77 +++++++++++-------------
 tools/testing/selftests/bpf/xdpxceiver.h |  4 +-
 2 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index a896d5845c0e..0a3e28c9e2a9 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -408,6 +408,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 
 	test->ifobj_tx = ifobj_tx;
 	test->ifobj_rx = ifobj_rx;
+	test->current_step = 0;
+	test->total_steps = 1;
 }
 
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
@@ -713,7 +715,7 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
-		ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+		ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
 				      __func__, -err, strerror(-err));
 		return true;
 	}
@@ -754,7 +756,7 @@ static void tx_stats_validate(struct ifobject *ifobject)
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
-		ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+		ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
 				      __func__, -err, strerror(-err));
 		return;
 	}
@@ -766,12 +768,13 @@ static void tx_stats_validate(struct ifobject *ifobject)
 			      __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
 }
 
-static void thread_common_ops(struct ifobject *ifobject, void *bufs)
+static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
 	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	size_t mmap_sz = umem_sz;
 	int ctr = 0, ret;
+	void *bufs;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
@@ -813,26 +816,19 @@ static void thread_common_ops(struct ifobject *ifobject, void *bufs)
 	ifobject->xsk = &ifobject->xsk_arr[0];
 }
 
-static bool testapp_is_test_two_stepped(void)
-{
-	return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
-}
-
 static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
 {
-	if (testapp_is_test_two_stepped()) {
-		xsk_socket__delete(ifobj->xsk->xsk);
-		(void)xsk_umem__delete(ifobj->umem->umem);
-	}
+	xsk_socket__delete(ifobj->xsk->xsk);
+	xsk_umem__delete(ifobj->umem->umem);
 }
 
 static void *worker_testapp_validate_tx(void *arg)
 {
-	struct ifobject *ifobject = (struct ifobject *)arg;
-	void *bufs = NULL;
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_tx;
 
-	if (!second_step)
-		thread_common_ops(ifobject, bufs);
+	if (test->current_step == 1)
+		thread_common_ops(test, ifobject);
 
 	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
 		      ifobject->ifname);
@@ -841,18 +837,19 @@ static void *worker_testapp_validate_tx(void *arg)
 	if (stat_test_type == STAT_TEST_TX_INVALID)
 		tx_stats_validate(ifobject);
 
-	testapp_cleanup_xsk_res(ifobject);
+	if (test->total_steps == test->current_step)
+		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
 static void *worker_testapp_validate_rx(void *arg)
 {
-	struct ifobject *ifobject = (struct ifobject *)arg;
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_rx;
 	struct pollfd fds[MAX_SOCKS] = { };
-	void *bufs = NULL;
 
-	if (!second_step)
-		thread_common_ops(ifobject, bufs);
+	if (test->current_step == 1)
+		thread_common_ops(test, ifobject);
 
 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
 		xsk_populate_fill_ring(ifobject->umem);
@@ -871,7 +868,8 @@ static void *worker_testapp_validate_rx(void *arg)
 	if (test_type == TEST_TYPE_TEARDOWN)
 		print_verbose("Destroying socket\n");
 
-	testapp_cleanup_xsk_res(ifobject);
+	if (test->total_steps == test->current_step)
+		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
@@ -891,16 +889,17 @@ static void testapp_validate_traffic(struct test_spec *test)
 		pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
 	ifobj_tx->pkt_stream = pkt_stream;
 	ifobj_rx->pkt_stream = pkt_stream;
+	test->current_step++;
 
 	/*Spawn RX thread */
-	pthread_create(&t0, NULL, ifobj_rx->func_ptr, ifobj_rx);
+	pthread_create(&t0, NULL, ifobj_rx->func_ptr, test);
 
 	pthread_barrier_wait(&barr);
 	if (pthread_barrier_destroy(&barr))
 		exit_with_error(errno);
 
 	/*Spawn TX thread */
-	pthread_create(&t1, NULL, ifobj_tx->func_ptr, ifobj_tx);
+	pthread_create(&t1, NULL, ifobj_tx->func_ptr, test);
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
@@ -934,15 +933,12 @@ static void testapp_bidi(struct test_spec *test)
 	test_spec_set_name(test, "BIDIRECTIONAL");
 	test->ifobj_tx->rx_on = true;
 	test->ifobj_rx->tx_on = true;
-	for (int i = 0; i < MAX_BIDI_ITER; i++) {
-		print_verbose("Creating socket\n");
-		testapp_validate_traffic(test);
-		if (!second_step) {
-			print_verbose("Switching Tx/Rx vectors\n");
-			swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-		}
-		second_step = true;
-	}
+	test->total_steps = 2;
+	testapp_validate_traffic(test);
+
+	print_verbose("Switching Tx/Rx vectors\n");
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+	testapp_validate_traffic(test);
 
 	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
 }
@@ -961,16 +957,12 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj
 
 static void testapp_bpf_res(struct test_spec *test)
 {
-	int i;
-
 	test_spec_set_name(test, "BPF_RES");
-	for (i = 0; i < MAX_BPF_ITER; i++) {
-		print_verbose("Creating socket\n");
-		testapp_validate_traffic(test);
-		if (!second_step)
-			swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
-		second_step = true;
-	}
+	test->total_steps = 2;
+	testapp_validate_traffic(test);
+
+	swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
+	testapp_validate_traffic(test);
 }
 
 static void testapp_stats(struct test_spec *test)
@@ -1032,7 +1024,6 @@ static void run_pkt_test(struct test_spec *test, int mode, int type)
 
 	/* reset defaults after potential previous test */
 	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-	second_step = 0;
 	stat_test_type = -1;
 
 	configured_mode = mode;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 03ff52897d7b..ea505a4cb8c0 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -24,7 +24,6 @@
 #define MAX_SOCKETS 2
 #define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10
-#define MAX_BIDI_ITER 2
 #define MAX_BPF_ITER 2
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
@@ -72,7 +71,6 @@ enum stat_test_type {
 
 static int configured_mode;
 static bool opt_pkt_dump;
-static bool second_step;
 static int test_type;
 
 static bool opt_verbose;
@@ -137,6 +135,8 @@ struct ifobject {
 struct test_spec {
 	struct ifobject *ifobj_tx;
 	struct ifobject *ifobj_rx;
+	u16 total_steps;
+	u16 current_step;
 	char name[MAX_TEST_NAME_SIZE];
 };
 

From 85c6c95739703620abb75e5fcc46ad3068c50f48 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:19 +0200
Subject: [PATCH 18/63] selftests: xsk: Specify number of sockets to create

Add the ability in the test specification to specify numbers of
sockets to create. The default is one socket. This is then used to
remove test specific if-statements around the bpf_res tests.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-12-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 59 +++++++++++-------------
 tools/testing/selftests/bpf/xdpxceiver.h |  2 +-
 2 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 0a3e28c9e2a9..06fa767191af 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -231,7 +231,7 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
 	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
 }
 
-static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size, int idx)
+static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size)
 {
 	struct xsk_umem_config cfg = {
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
@@ -410,6 +410,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	test->ifobj_rx = ifobj_rx;
 	test->current_step = 0;
 	test->total_steps = 1;
+	test->nb_sockets = 1;
 }
 
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
@@ -770,46 +771,37 @@ static void tx_stats_validate(struct ifobject *ifobject)
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
-	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
-	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-	size_t mmap_sz = umem_sz;
-	int ctr = 0, ret;
-	void *bufs;
+	u32 i;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
-	if (test_type == TEST_TYPE_BPF_RES)
-		mmap_sz *= 2;
+	for (i = 0; i < test->nb_sockets; i++) {
+		u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+		int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+		u32 ctr = 0;
+		void *bufs;
 
-	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-	if (bufs == MAP_FAILED)
-		exit_with_error(errno);
+		bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+		if (bufs == MAP_FAILED)
+			exit_with_error(errno);
 
-	while (ctr++ < SOCK_RECONF_CTR) {
-		ret = xsk_configure_umem(&ifobject->umem_arr[0], bufs, umem_sz, 0);
-		if (ret)
-			exit_with_error(-ret);
+		while (ctr++ < SOCK_RECONF_CTR) {
+			int ret;
 
-		ret = xsk_configure_socket(&ifobject->xsk_arr[0], &ifobject->umem_arr[0],
-					   ifobject, 0);
-		if (!ret)
-			break;
+			ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz);
+			if (ret)
+				exit_with_error(-ret);
 
-		/* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
-		if (ctr >= SOCK_RECONF_CTR)
-			exit_with_error(-ret);
-		usleep(USLEEP_MAX);
-	}
+			ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i],
+						   ifobject, i);
+			if (!ret)
+				break;
 
-	if (test_type == TEST_TYPE_BPF_RES) {
-		ret = xsk_configure_umem(&ifobject->umem_arr[1], (u8 *)bufs + umem_sz, umem_sz, 1);
-		if (ret)
-			exit_with_error(-ret);
-
-		ret = xsk_configure_socket(&ifobject->xsk_arr[1], &ifobject->umem_arr[1],
-					   ifobject, 1);
-		if (ret)
-			exit_with_error(-ret);
+			/* Retry if it fails as xsk_socket__create() is asynchronous */
+			if (ctr >= SOCK_RECONF_CTR)
+				exit_with_error(-ret);
+			usleep(USLEEP_MAX);
+		}
 	}
 
 	ifobject->umem = &ifobject->umem_arr[0];
@@ -959,6 +951,7 @@ static void testapp_bpf_res(struct test_spec *test)
 {
 	test_spec_set_name(test, "BPF_RES");
 	test->total_steps = 2;
+	test->nb_sockets = 2;
 	testapp_validate_traffic(test);
 
 	swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index ea505a4cb8c0..c09b73fd9878 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -24,7 +24,6 @@
 #define MAX_SOCKETS 2
 #define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10
-#define MAX_BPF_ITER 2
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -137,6 +136,7 @@ struct test_spec {
 	struct ifobject *ifobj_rx;
 	u16 total_steps;
 	u16 current_step;
+	u16 nb_sockets;
 	char name[MAX_TEST_NAME_SIZE];
 };
 

From af6731d1e1c69b4ee32063acd51ec86ece0a1ced Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:20 +0200
Subject: [PATCH 19/63] selftests: xsk: Make xdp_flags and bind_flags local

Make xdp_flags and bind_flags local instead of global by moving them
into the interface object. These flags decide if the socket should be
created in SKB mode or in DRV mode and therefore they are sticky and
will survive a test_spec_reset. Since every test is first run in SKB
mode then in DRV mode, this change only happens once. With this
change, the configured_mode global variable can also be
erradicated. The first test_spec_init() also becomes superfluous and
can be eliminated.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-13-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 57 ++++++++++++------------
 tools/testing/selftests/bpf/xdpxceiver.h |  6 +--
 2 files changed, 31 insertions(+), 32 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 06fa767191af..3a1afece7c2c 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -112,9 +112,10 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
-#define print_ksft_result(test)\
-	(ksft_test_result_pass("PASS: %s %s\n", configured_mode ? "DRV" : "SKB", \
-			       (test)->name))
+#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
+
+#define print_ksft_result(test)						\
+	(ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name))
 
 static void memset32_htonl(void *dest, u32 val, u32 size)
 {
@@ -275,8 +276,8 @@ static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_inf
 	cfg.rx_size = xsk->rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
-	cfg.xdp_flags = xdp_flags;
-	cfg.bind_flags = xdp_bind_flags;
+	cfg.xdp_flags = ifobject->xdp_flags;
+	cfg.bind_flags = ifobject->bind_flags;
 
 	txr = ifobject->tx_on ? &xsk->tx : NULL;
 	rxr = ifobject->rx_on ? &xsk->rx : NULL;
@@ -333,7 +334,8 @@ static bool validate_interface(struct ifobject *ifobj)
 	return true;
 }
 
-static void parse_command_line(struct test_spec *test, int argc, char **argv)
+static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc,
+			       char **argv)
 {
 	struct ifobject *ifobj;
 	u32 interface_nb = 0;
@@ -351,9 +353,9 @@ static void parse_command_line(struct test_spec *test, int argc, char **argv)
 		switch (c) {
 		case 'i':
 			if (interface_nb == 0)
-				ifobj = test->ifobj_tx;
+				ifobj = ifobj_tx;
 			else if (interface_nb == 1)
-				ifobj = test->ifobj_rx;
+				ifobj = ifobj_rx;
 			else
 				break;
 
@@ -414,9 +416,24 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 }
 
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-			   struct ifobject *ifobj_rx)
+			   struct ifobject *ifobj_rx, enum test_mode mode)
 {
+	u32 i;
+
 	memset(test, 0, sizeof(*test));
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+		if (mode == TEST_MODE_SKB)
+			ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE;
+		else
+			ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+		ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
+	}
+
 	__test_spec_init(test, ifobj_tx, ifobj_rx);
 }
 
@@ -1011,27 +1028,13 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 	ifobj->func_ptr = func_ptr;
 }
 
-static void run_pkt_test(struct test_spec *test, int mode, int type)
+static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
 {
 	test_type = type;
 
 	/* reset defaults after potential previous test */
-	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
 	stat_test_type = -1;
 
-	configured_mode = mode;
-
-	switch (mode) {
-	case (TEST_MODE_SKB):
-		xdp_flags |= XDP_FLAGS_SKB_MODE;
-		break;
-	case (TEST_MODE_DRV):
-		xdp_flags |= XDP_FLAGS_DRV_MODE;
-		break;
-	default:
-		break;
-	}
-
 	switch (test_type) {
 	case TEST_TYPE_STATS:
 		testapp_stats(test);
@@ -1111,11 +1114,9 @@ int main(int argc, char **argv)
 	if (!ifobj_rx)
 		exit_with_error(ENOMEM);
 
-	test_spec_init(&test, ifobj_tx, ifobj_rx);
-
 	setlocale(LC_ALL, "");
 
-	parse_command_line(&test, argc, argv);
+	parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
 
 	if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
 		usage(basename(argv[0]));
@@ -1131,7 +1132,7 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < TEST_MODE_MAX; i++)
 		for (j = 0; j < TEST_TYPE_MAX; j++) {
-			test_spec_init(&test, ifobj_tx, ifobj_rx);
+			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
 			run_pkt_test(&test, i, j);
 			usleep(USLEEP_MAX);
 		}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index c09b73fd9878..7ed16128f2ad 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -68,14 +68,10 @@ enum stat_test_type {
 	STAT_TEST_TYPE_MAX
 };
 
-static int configured_mode;
 static bool opt_pkt_dump;
 static int test_type;
 
 static bool opt_verbose;
-
-static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
 static int stat_test_type;
 
 struct xsk_umem_info {
@@ -122,6 +118,8 @@ struct ifobject {
 	int ns_fd;
 	u32 dst_ip;
 	u32 src_ip;
+	u32 xdp_flags;
+	u32 bind_flags;
 	u16 src_port;
 	u16 dst_port;
 	bool tx_on;

From e2d850d5346c3cd751b032a7dccdd472013b8a84 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:21 +0200
Subject: [PATCH 20/63] selftests: xsx: Make pthreads local scope

Make the pthread_t variables local scope instead of global. No reason
for them to be global.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-14-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 1 +
 tools/testing/selftests/bpf/xdpxceiver.h | 2 --
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 3a1afece7c2c..5ea78c503741 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -887,6 +887,7 @@ static void testapp_validate_traffic(struct test_spec *test)
 	struct ifobject *ifobj_tx = test->ifobj_tx;
 	struct ifobject *ifobj_rx = test->ifobj_rx;
 	struct pkt_stream *pkt_stream;
+	pthread_t t0, t1;
 
 	if (pthread_barrier_init(&barr, NULL, 2))
 		exit_with_error(errno);
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 7ed16128f2ad..34ae4e4ea4ac 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -138,8 +138,6 @@ struct test_spec {
 	char name[MAX_TEST_NAME_SIZE];
 };
 
-/*threads*/
 pthread_barrier_t barr;
-pthread_t t0, t1;
 
 #endif				/* XDPXCEIVER_H */

From 8ce7192b508df2c54932e245bbc6ce04cfe174a7 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:22 +0200
Subject: [PATCH 21/63] selftests: xsk: Eliminate MAX_SOCKS define

Remove the MAX_SOCKS define as it always will be one for the forseable
future and the code does not work for any other case anyway.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-15-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 18 +++++++++---------
 tools/testing/selftests/bpf/xdpxceiver.h |  1 -
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 5ea78c503741..0fb5cae974de 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -693,11 +693,11 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
 
 static void send_pkts(struct ifobject *ifobject)
 {
-	struct pollfd fds[MAX_SOCKS] = { };
+	struct pollfd fds = { };
 	u32 pkt_cnt = 0;
 
-	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
-	fds[0].events = POLLOUT;
+	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
+	fds.events = POLLOUT;
 
 	while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
 		u32 sent;
@@ -705,11 +705,11 @@ static void send_pkts(struct ifobject *ifobject)
 		if (ifobject->use_poll) {
 			int ret;
 
-			ret = poll(fds, 1, POLL_TMOUT);
+			ret = poll(&fds, 1, POLL_TMOUT);
 			if (ret <= 0)
 				continue;
 
-			if (!(fds[0].revents & POLLOUT))
+			if (!(fds.revents & POLLOUT))
 				continue;
 		}
 
@@ -855,7 +855,7 @@ static void *worker_testapp_validate_rx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_rx;
-	struct pollfd fds[MAX_SOCKS] = { };
+	struct pollfd fds = { };
 
 	if (test->current_step == 1)
 		thread_common_ops(test, ifobject);
@@ -863,8 +863,8 @@ static void *worker_testapp_validate_rx(void *arg)
 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
 		xsk_populate_fill_ring(ifobject->umem);
 
-	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
-	fds[0].events = POLLIN;
+	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
+	fds.events = POLLIN;
 
 	pthread_barrier_wait(&barr);
 
@@ -872,7 +872,7 @@ static void *worker_testapp_validate_rx(void *arg)
 		while (!rx_stats_are_valid(ifobject))
 			continue;
 	else
-		receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
+		receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds);
 
 	if (test_type == TEST_TYPE_TEARDOWN)
 		print_verbose("Destroying socket\n");
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 34ae4e4ea4ac..1e9a563380c8 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -20,7 +20,6 @@
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
-#define MAX_SOCKS 1
 #define MAX_SOCKETS 2
 #define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10

From 8abf6f725a9e50cf8e8bd44edb2a577d98f0d775 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:23 +0200
Subject: [PATCH 22/63] selftests: xsk: Allow for invalid packets

Allow for invalid packets to be sent. These are verified by the Rx
thread not to be received. Or put in another way, if they are
received, the test will fail. This feature will be used to eliminate
an if statement for a stats test and will also be used by other tests
in later patches. The previous code could only deal with valid
packets.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-16-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 16 ++++++++++------
 tools/testing/selftests/bpf/xdpxceiver.h |  1 +
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 0fb5cae974de..09d2854c10e6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -473,6 +473,11 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size;
 		pkt_stream->pkts[i].len = pkt_len;
 		pkt_stream->pkts[i].payload = i;
+
+		if (pkt_len > umem->frame_size)
+			pkt_stream->pkts[i].valid = false;
+		else
+			pkt_stream->pkts[i].valid = true;
 	}
 
 	return pkt_stream;
@@ -658,7 +663,7 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *
 static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 {
 	struct xsk_socket_info *xsk = ifobject->xsk;
-	u32 i, idx;
+	u32 i, idx, valid_pkts = 0;
 
 	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
 		complete_pkts(xsk, BATCH_SIZE);
@@ -673,14 +678,13 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 		tx_desc->addr = pkt->addr;
 		tx_desc->len = pkt->len;
 		pkt_nb++;
+		if (pkt->valid)
+			valid_pkts++;
 	}
 
 	xsk_ring_prod__submit(&xsk->tx, i);
-	if (stat_test_type != STAT_TEST_TX_INVALID)
-		xsk->outstanding_tx += i;
-	else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
-		kick_tx(xsk);
-	complete_pkts(xsk, i);
+	xsk->outstanding_tx += valid_pkts;
+	complete_pkts(xsk, BATCH_SIZE);
 
 	return i;
 }
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 1e9a563380c8..c5baa7c5f560 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -96,6 +96,7 @@ struct pkt {
 	u64 addr;
 	u32 len;
 	u32 payload;
+	bool valid;
 };
 
 struct pkt_stream {

From 605091c5100ddc1ae0d67a74b688d8755658d3c9 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:24 +0200
Subject: [PATCH 23/63] selftests: xsk: Introduce replacing the default packet
 stream

Introduce the concept of a default packet stream that is the set of
packets sent by most tests. Then add the ability to replace it for a
test that would like to send or receive something else through the use
of the function pkt_stream_replace() and then restored with
pkt_stream_restore_default(). These are then used to convert the
STAT_TEST_TX_INVALID to use these new APIs.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-17-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 65 +++++++++++++++++-------
 tools/testing/selftests/bpf/xdpxceiver.h |  1 +
 2 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 09d2854c10e6..70a8435e3d5e 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -390,6 +390,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		ifobj->umem = &ifobj->umem_arr[0];
 		ifobj->xsk = &ifobj->xsk_arr[0];
 		ifobj->use_poll = false;
+		ifobj->pkt_stream = test->pkt_stream_default;
 
 		if (i == 0) {
 			ifobj->rx_on = false;
@@ -418,9 +419,12 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 			   struct ifobject *ifobj_rx, enum test_mode mode)
 {
+	struct pkt_stream *pkt_stream;
 	u32 i;
 
+	pkt_stream = test->pkt_stream_default;
 	memset(test, 0, sizeof(*test));
+	test->pkt_stream_default = pkt_stream;
 
 	for (i = 0; i < MAX_INTERFACES; i++) {
 		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
@@ -455,6 +459,19 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
 	return &pkt_stream->pkts[pkt_nb];
 }
 
+static void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+	free(pkt_stream->pkts);
+	free(pkt_stream);
+}
+
+static void pkt_stream_restore_default(struct test_spec *test)
+{
+	pkt_stream_delete(test->ifobj_tx->pkt_stream);
+	test->ifobj_tx->pkt_stream = test->pkt_stream_default;
+	test->ifobj_rx->pkt_stream = test->pkt_stream_default;
+}
+
 static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
 {
 	struct pkt_stream *pkt_stream;
@@ -483,6 +500,15 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 	return pkt_stream;
 }
 
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
+	test->ifobj_tx->pkt_stream = pkt_stream;
+	test->ifobj_rx->pkt_stream = pkt_stream;
+}
+
 static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 {
 	struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
@@ -557,7 +583,7 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *d
 	if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
 		u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
 
-		if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
+		if (opt_pkt_dump)
 			pkt_dump(data, PKT_SIZE);
 
 		if (pkt->len != desc->len) {
@@ -598,9 +624,6 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 	unsigned int rcvd;
 	u32 idx;
 
-	if (!xsk->outstanding_tx)
-		return;
-
 	if (xsk_ring_prod__needs_wakeup(&xsk->tx))
 		kick_tx(xsk);
 
@@ -831,6 +854,7 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 
 static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
 {
+	print_verbose("Destroying socket\n");
 	xsk_socket__delete(ifobj->xsk->xsk);
 	xsk_umem__delete(ifobj->umem->umem);
 }
@@ -878,9 +902,6 @@ static void *worker_testapp_validate_rx(void *arg)
 	else
 		receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds);
 
-	if (test_type == TEST_TYPE_TEARDOWN)
-		print_verbose("Destroying socket\n");
-
 	if (test->total_steps == test->current_step)
 		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
@@ -890,19 +911,11 @@ static void testapp_validate_traffic(struct test_spec *test)
 {
 	struct ifobject *ifobj_tx = test->ifobj_tx;
 	struct ifobject *ifobj_rx = test->ifobj_rx;
-	struct pkt_stream *pkt_stream;
 	pthread_t t0, t1;
 
 	if (pthread_barrier_init(&barr, NULL, 2))
 		exit_with_error(errno);
 
-	if (stat_test_type == STAT_TEST_TX_INVALID)
-		pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, DEFAULT_PKT_CNT,
-						 XSK_UMEM__INVALID_FRAME_SIZE);
-	else
-		pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
-	ifobj_tx->pkt_stream = pkt_stream;
-	ifobj_rx->pkt_stream = pkt_stream;
 	test->current_step++;
 
 	/*Spawn RX thread */
@@ -982,7 +995,9 @@ static void testapp_bpf_res(struct test_spec *test)
 
 static void testapp_stats(struct test_spec *test)
 {
-	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
+	int i;
+
+	for (i = 0; i < STAT_TEST_TYPE_MAX; i++) {
 		test_spec_reset(test);
 		stat_test_type = i;
 
@@ -991,21 +1006,27 @@ static void testapp_stats(struct test_spec *test)
 			test_spec_set_name(test, "STAT_RX_DROPPED");
 			test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
 				XDP_PACKET_HEADROOM - 1;
+			testapp_validate_traffic(test);
 			break;
 		case STAT_TEST_RX_FULL:
 			test_spec_set_name(test, "STAT_RX_FULL");
 			test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
+			testapp_validate_traffic(test);
 			break;
 		case STAT_TEST_TX_INVALID:
 			test_spec_set_name(test, "STAT_TX_INVALID");
-			continue;
+			pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+			testapp_validate_traffic(test);
+
+			pkt_stream_restore_default(test);
+			break;
 		case STAT_TEST_RX_FILL_EMPTY:
 			test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
+			testapp_validate_traffic(test);
 			break;
 		default:
 			break;
 		}
-		testapp_validate_traffic(test);
 	}
 
 	/* To only see the whole stat set being completed unless an individual test fails. */
@@ -1105,6 +1126,7 @@ static void ifobject_delete(struct ifobject *ifobj)
 int main(int argc, char **argv)
 {
 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+	struct pkt_stream *pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
 	struct test_spec test;
 	u32 i, j;
@@ -1133,6 +1155,12 @@ int main(int argc, char **argv)
 	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
 		   worker_testapp_validate_rx);
 
+	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
+	pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
+	if (!pkt_stream_default)
+		exit_with_error(ENOMEM);
+	test.pkt_stream_default = pkt_stream_default;
+
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
 	for (i = 0; i < TEST_MODE_MAX; i++)
@@ -1142,6 +1170,7 @@ int main(int argc, char **argv)
 			usleep(USLEEP_MAX);
 		}
 
+	pkt_stream_delete(pkt_stream_default);
 	ifobject_delete(ifobj_tx);
 	ifobject_delete(ifobj_rx);
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index c5baa7c5f560..e27fe348ae50 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -132,6 +132,7 @@ struct ifobject {
 struct test_spec {
 	struct ifobject *ifobj_tx;
 	struct ifobject *ifobj_rx;
+	struct pkt_stream *pkt_stream_default;
 	u16 total_steps;
 	u16 current_step;
 	u16 nb_sockets;

From a4ba98dd0c693e4f0f4aa93ef210a77a72d32025 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:25 +0200
Subject: [PATCH 24/63] selftests: xsk: Add test for unaligned mode

Add a test for unaligned mode in which packet buffers can be placed
anywhere within the umem. Some packets are made to straddle page
boundaries in order to check for correctness. On the Tx side, buffers
are now allocated according to the addresses found in the packet
stream. Thus, the placement of buffers can be controlled with the
boolean use_addr_for_fill in the packet stream.

One new pkt_stream interface is introduced: pkt_stream_replace_half()
that replaces every other packet in the default packet stream with the
specified new packet. The constant DEFAULT_OFFSET is also
introduced. It specifies at what offset from the start of a chunk a Tx
packet is placed by the sending thread. This is just to be able to
test that it is possible to send packets at an offset not equal to
zero.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-18-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 127 +++++++++++++++++++----
 tools/testing/selftests/bpf/xdpxceiver.h |   4 +
 2 files changed, 108 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 70a8435e3d5e..7cc75d1481e2 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -19,7 +19,7 @@
  * Virtual Ethernet interfaces.
  *
  * For each mode, the following tests are run:
- *    a. nopoll - soft-irq processing
+ *    a. nopoll - soft-irq processing in run-to-completion mode
  *    b. poll - using poll() syscall
  *    c. Socket Teardown
  *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
@@ -45,6 +45,7 @@
  *       Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
  *       then remove xsk sockets from queue 0 on both veth interfaces and
  *       finally run a traffic on queues ids 1
+ *    g. unaligned mode
  *
  * Total tests: 12
  *
@@ -243,6 +244,9 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 	};
 	int ret;
 
+	if (umem->unaligned_mode)
+		cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
 	ret = xsk_umem__create(&umem->umem, buffer, size,
 			       &umem->fq, &umem->cq, &cfg);
 	if (ret)
@@ -252,19 +256,6 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 	return 0;
 }
 
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
-{
-	int ret, i;
-	u32 idx = 0;
-
-	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
-	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
-		exit_with_error(-ret);
-	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
-		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * umem->frame_size;
-	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
-}
-
 static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
 				struct ifobject *ifobject, u32 qid)
 {
@@ -477,7 +468,7 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 	struct pkt_stream *pkt_stream;
 	u32 i;
 
-	pkt_stream = malloc(sizeof(*pkt_stream));
+	pkt_stream = calloc(1, sizeof(*pkt_stream));
 	if (!pkt_stream)
 		exit_with_error(ENOMEM);
 
@@ -487,7 +478,8 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 
 	pkt_stream->nb_pkts = nb_pkts;
 	for (i = 0; i < nb_pkts; i++) {
-		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size;
+		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size +
+			DEFAULT_OFFSET;
 		pkt_stream->pkts[i].len = pkt_len;
 		pkt_stream->pkts[i].payload = i;
 
@@ -500,6 +492,12 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 	return pkt_stream;
 }
 
+static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem,
+					   struct pkt_stream *pkt_stream)
+{
+	return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
 static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
 {
 	struct pkt_stream *pkt_stream;
@@ -509,6 +507,22 @@ static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
 	test->ifobj_rx->pkt_stream = pkt_stream;
 }
 
+static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, u32 offset)
+{
+	struct xsk_umem_info *umem = test->ifobj_tx->umem;
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default);
+	for (i = 0; i < test->pkt_stream_default->nb_pkts; i += 2) {
+		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset;
+		pkt_stream->pkts[i].len = pkt_len;
+	}
+
+	test->ifobj_tx->pkt_stream = pkt_stream;
+	test->ifobj_rx->pkt_stream = pkt_stream;
+}
+
 static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 {
 	struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
@@ -570,9 +584,9 @@ static void pkt_dump(void *pkt, u32 len)
 	fprintf(stdout, "---------------------------------------\n");
 }
 
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 {
-	void *data = xsk_umem__get_data(buffer, desc->addr);
+	void *data = xsk_umem__get_data(buffer, addr);
 	struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
 
 	if (!pkt) {
@@ -586,10 +600,10 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *d
 		if (opt_pkt_dump)
 			pkt_dump(data, PKT_SIZE);
 
-		if (pkt->len != desc->len) {
+		if (pkt->len != len) {
 			ksft_test_result_fail
 				("ERROR: [%s] expected length [%d], got length [%d]\n",
-					__func__, pkt->len, desc->len);
+					__func__, pkt->len, len);
 			return false;
 		}
 
@@ -671,7 +685,7 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *
 
 			orig = xsk_umem__extract_addr(addr);
 			addr = xsk_umem__add_offset_to_addr(addr);
-			if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
+			if (!is_pkt_valid(pkt, xsk->umem->buffer, addr, desc->len))
 				return;
 
 			*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
@@ -815,13 +829,16 @@ static void tx_stats_validate(struct ifobject *ifobject)
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
+	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 	u32 i;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
+	if (ifobject->umem->unaligned_mode)
+		mmap_flags |= MAP_HUGETLB;
+
 	for (i = 0; i < test->nb_sockets; i++) {
 		u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
-		int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
 		u32 ctr = 0;
 		void *bufs;
 
@@ -879,6 +896,32 @@ static void *worker_testapp_validate_tx(void *arg)
 	pthread_exit(NULL);
 }
 
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
+{
+	u32 idx = 0, i;
+	int ret;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
+	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		exit_with_error(ENOSPC);
+	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) {
+		u64 addr;
+
+		if (pkt_stream->use_addr_for_fill) {
+			struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
+
+			if (!pkt)
+				break;
+			addr = pkt->addr;
+		} else {
+			addr = (i % umem->num_frames) * umem->frame_size + DEFAULT_OFFSET;
+		}
+
+		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+	}
+	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+}
+
 static void *worker_testapp_validate_rx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
@@ -889,7 +932,7 @@ static void *worker_testapp_validate_rx(void *arg)
 		thread_common_ops(test, ifobject);
 
 	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
-		xsk_populate_fill_ring(ifobject->umem);
+		xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
 
 	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
 	fds.events = POLLIN;
@@ -1033,6 +1076,40 @@ static void testapp_stats(struct test_spec *test)
 	test_spec_set_name(test, "STATS");
 }
 
+/* Simple test */
+static bool hugepages_present(struct ifobject *ifobject)
+{
+	const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size;
+	void *bufs;
+
+	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, -1, 0);
+	if (bufs == MAP_FAILED)
+		return false;
+
+	munmap(bufs, mmap_sz);
+	return true;
+}
+
+static bool testapp_unaligned(struct test_spec *test)
+{
+	if (!hugepages_present(test->ifobj_tx)) {
+		ksft_test_result_skip("No 2M huge pages present.\n");
+		return false;
+	}
+
+	test_spec_set_name(test, "UNALIGNED_MODE");
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	/* Let half of the packets straddle a buffer boundrary */
+	pkt_stream_replace_half(test, PKT_SIZE, test->ifobj_tx->umem->frame_size - 32);
+	test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
+	testapp_validate_traffic(test);
+
+	pkt_stream_restore_default(test);
+	return true;
+}
+
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
 		       const char *dst_ip, const char *src_ip, const u16 dst_port,
 		       const u16 src_port, thread_func_t func_ptr)
@@ -1084,6 +1161,10 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		test_spec_set_name(test, "POLL");
 		testapp_validate_traffic(test);
 		break;
+	case TEST_TYPE_UNALIGNED:
+		if (!testapp_unaligned(test))
+			return;
+		break;
 	default:
 		break;
 	}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index e27fe348ae50..129801eb013c 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -39,6 +39,7 @@
 #define POLL_TMOUT 1000
 #define DEFAULT_PKT_CNT (4 * 1024)
 #define RX_FULL_RXQSIZE 32
+#define DEFAULT_OFFSET 256
 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
 
 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
@@ -52,6 +53,7 @@ enum test_mode {
 enum test_type {
 	TEST_TYPE_NOPOLL,
 	TEST_TYPE_POLL,
+	TEST_TYPE_UNALIGNED,
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,
 	TEST_TYPE_STATS,
@@ -81,6 +83,7 @@ struct xsk_umem_info {
 	u32 frame_headroom;
 	void *buffer;
 	u32 frame_size;
+	bool unaligned_mode;
 };
 
 struct xsk_socket_info {
@@ -102,6 +105,7 @@ struct pkt {
 struct pkt_stream {
 	u32 nb_pkts;
 	struct pkt *pkts;
+	bool use_addr_for_fill;
 };
 
 typedef void *(*thread_func_t)(void *arg);

From 6ce67b5165e630a4f874e9e45bd9cca86aa4f4a6 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:26 +0200
Subject: [PATCH 25/63] selftests: xsk: Eliminate test specific if-statement in
 test runner

Eliminate a test specific if-statement for the RX_FILL_EMTPY stats
test that is present in the test runner. We can do this as we now have
the use_addr_for_fill option. Just create and empty Rx packet stream
and indicated that the test runner should use the addresses in that to
populate the fill ring. As there are no packets in the stream, the
fill ring will be empty and we will get the error stats that we want
to test.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-19-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 7cc75d1481e2..4d86c4b62aa9 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -458,8 +458,10 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream)
 
 static void pkt_stream_restore_default(struct test_spec *test)
 {
-	pkt_stream_delete(test->ifobj_tx->pkt_stream);
-	test->ifobj_tx->pkt_stream = test->pkt_stream_default;
+	if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) {
+		pkt_stream_delete(test->ifobj_tx->pkt_stream);
+		test->ifobj_tx->pkt_stream = test->pkt_stream_default;
+	}
 	test->ifobj_rx->pkt_stream = test->pkt_stream_default;
 }
 
@@ -931,8 +933,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	if (test->current_step == 1)
 		thread_common_ops(test, ifobject);
 
-	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
-		xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
+	xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
 
 	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
 	fds.events = POLLIN;
@@ -1065,7 +1066,14 @@ static void testapp_stats(struct test_spec *test)
 			break;
 		case STAT_TEST_RX_FILL_EMPTY:
 			test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
+			test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0,
+									 MIN_PKT_SIZE);
+			if (!test->ifobj_rx->pkt_stream)
+				exit_with_error(ENOMEM);
+			test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
 			testapp_validate_traffic(test);
+
+			pkt_stream_restore_default(test);
 			break;
 		default:
 			break;

From 0d1b7f3a00cf530c2cad643812f08220b7729b9b Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:27 +0200
Subject: [PATCH 26/63] selftests: xsk: Add tests for invalid xsk descriptors

Add tests for invalid xsk descriptors in the Tx ring. A number of
handcrafted nasty invalid descriptors are created and submitted to the
tx ring to check that they are validated correctly. Corner case valid
ones are also sent. The tests are run for both aligned and unaligned
mode.

pkt_stream_set() is introduced to be able to create a hand-crafted
packet stream where every single packet is specified in detail.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-20-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 145 ++++++++++++++++++++---
 tools/testing/selftests/bpf/xdpxceiver.h |   7 +-
 2 files changed, 133 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 4d86c4b62aa9..1a03f7941bb8 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -46,6 +46,8 @@
  *       then remove xsk sockets from queue 0 on both veth interfaces and
  *       finally run a traffic on queues ids 1
  *    g. unaligned mode
+ *    h. tests for invalid and corner case Tx descriptors so that the correct ones
+ *       are discarded and let through, respectively.
  *
  * Total tests: 12
  *
@@ -394,7 +396,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		for (j = 0; j < MAX_SOCKETS; j++) {
 			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
-			ifobj->umem_arr[j].num_frames = DEFAULT_PKT_CNT / 4;
+			ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS;
 			ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
 			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
 		}
@@ -450,6 +452,16 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
 	return &pkt_stream->pkts[pkt_nb];
 }
 
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream)
+{
+	while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
+		if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
+			return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
+		pkt_stream->rx_pkt_nb++;
+	}
+	return NULL;
+}
+
 static void pkt_stream_delete(struct pkt_stream *pkt_stream)
 {
 	free(pkt_stream->pkts);
@@ -465,19 +477,33 @@ static void pkt_stream_restore_default(struct test_spec *test)
 	test->ifobj_rx->pkt_stream = test->pkt_stream_default;
 }
 
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = calloc(1, sizeof(*pkt_stream));
+	if (!pkt_stream)
+		return NULL;
+
+	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+	if (!pkt_stream->pkts) {
+		free(pkt_stream);
+		return NULL;
+	}
+
+	pkt_stream->nb_pkts = nb_pkts;
+	return pkt_stream;
+}
+
 static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
 {
 	struct pkt_stream *pkt_stream;
 	u32 i;
 
-	pkt_stream = calloc(1, sizeof(*pkt_stream));
+	pkt_stream = __pkt_stream_alloc(nb_pkts);
 	if (!pkt_stream)
 		exit_with_error(ENOMEM);
 
-	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
-	if (!pkt_stream->pkts)
-		exit_with_error(ENOMEM);
-
 	pkt_stream->nb_pkts = nb_pkts;
 	for (i = 0; i < nb_pkts; i++) {
 		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size +
@@ -535,6 +561,8 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 
 	if (!pkt)
 		return NULL;
+	if (!pkt->valid || pkt->len < PKT_SIZE)
+		return pkt;
 
 	data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
 	udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
@@ -549,6 +577,26 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 	return pkt;
 }
 
+static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = __pkt_stream_alloc(nb_pkts);
+	if (!pkt_stream)
+		exit_with_error(ENOMEM);
+
+	test->ifobj_tx->pkt_stream = pkt_stream;
+	test->ifobj_rx->pkt_stream = pkt_stream;
+
+	for (i = 0; i < nb_pkts; i++) {
+		pkt_stream->pkts[i].addr = pkts[i].addr;
+		pkt_stream->pkts[i].len = pkts[i].len;
+		pkt_stream->pkts[i].payload = i;
+		pkt_stream->pkts[i].valid = pkts[i].valid;
+	}
+}
+
 static void pkt_dump(void *pkt, u32 len)
 {
 	char s[INET_ADDRSTRLEN];
@@ -596,19 +644,24 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 		return false;
 	}
 
+	if (len < PKT_SIZE) {
+		/*Do not try to verify packets that are smaller than minimum size. */
+		return true;
+	}
+
+	if (pkt->len != len) {
+		ksft_test_result_fail
+			("ERROR: [%s] expected length [%d], got length [%d]\n",
+			 __func__, pkt->len, len);
+		return false;
+	}
+
 	if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
 		u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
 
 		if (opt_pkt_dump)
 			pkt_dump(data, PKT_SIZE);
 
-		if (pkt->len != len) {
-			ksft_test_result_fail
-				("ERROR: [%s] expected length [%d], got length [%d]\n",
-					__func__, pkt->len, len);
-			return false;
-		}
-
 		if (pkt->payload != seqnum) {
 			ksft_test_result_fail
 				("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
@@ -645,6 +698,15 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 
 	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
 	if (rcvd) {
+		if (rcvd > xsk->outstanding_tx) {
+			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+			ksft_test_result_fail("ERROR: [%s] Too many packets completed\n",
+					      __func__);
+			ksft_print_msg("Last completion address: %llx\n", addr);
+			return;
+		}
+
 		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
 		xsk->outstanding_tx -= rcvd;
 	}
@@ -653,11 +715,10 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
 			 struct pollfd *fds)
 {
-	u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
-	struct pkt *pkt;
+	struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
+	u32 idx_rx = 0, idx_fq = 0, rcvd, i;
 	int ret;
 
-	pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
 	while (pkt) {
 		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
 		if (!rcvd) {
@@ -685,13 +746,21 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *
 			const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
 			u64 addr = desc->addr, orig;
 
+			if (!pkt) {
+				ksft_test_result_fail("ERROR: [%s] Received too many packets.\n",
+						      __func__);
+				ksft_print_msg("Last packet has addr: %llx len: %u\n",
+					       addr, desc->len);
+				return;
+			}
+
 			orig = xsk_umem__extract_addr(addr);
 			addr = xsk_umem__add_offset_to_addr(addr);
 			if (!is_pkt_valid(pkt, xsk->umem->buffer, addr, desc->len))
 				return;
 
 			*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
-			pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+			pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
 		}
 
 		xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
@@ -875,6 +944,7 @@ static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
 {
 	print_verbose("Destroying socket\n");
 	xsk_socket__delete(ifobj->xsk->xsk);
+	munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size);
 	xsk_umem__delete(ifobj->umem->umem);
 }
 
@@ -1118,6 +1188,35 @@ static bool testapp_unaligned(struct test_spec *test)
 	return true;
 }
 
+static void testapp_invalid_desc(struct test_spec *test)
+{
+	struct pkt pkts[] = {
+		/* Zero packet length at address zero allowed */
+		{0, 0, 0, true},
+		/* Zero packet length allowed */
+		{0x1000, 0, 0, true},
+		/* Straddling the start of umem */
+		{-2, PKT_SIZE, 0, false},
+		/* Packet too large */
+		{0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+		/* After umem ends */
+		{UMEM_SIZE, PKT_SIZE, 0, false},
+		/* Straddle the end of umem */
+		{UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false},
+		/* Straddle a page boundrary */
+		{0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
+		/* Valid packet for synch so that something is received */
+		{0x4000, PKT_SIZE, 0, true}};
+
+	if (test->ifobj_tx->umem->unaligned_mode) {
+		/* Crossing a page boundrary allowed */
+		pkts[6].valid = true;
+	}
+	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+	testapp_validate_traffic(test);
+	pkt_stream_restore_default(test);
+}
+
 static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
 		       const char *dst_ip, const char *src_ip, const u16 dst_port,
 		       const u16 src_port, thread_func_t func_ptr)
@@ -1159,7 +1258,7 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 	case TEST_TYPE_BPF_RES:
 		testapp_bpf_res(test);
 		break;
-	case TEST_TYPE_NOPOLL:
+	case TEST_TYPE_RUN_TO_COMPLETION:
 		test_spec_set_name(test, "RUN_TO_COMPLETION");
 		testapp_validate_traffic(test);
 		break;
@@ -1169,6 +1268,16 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		test_spec_set_name(test, "POLL");
 		testapp_validate_traffic(test);
 		break;
+	case TEST_TYPE_ALIGNED_INV_DESC:
+		test_spec_set_name(test, "ALIGNED_INV_DESC");
+		testapp_invalid_desc(test);
+		break;
+	case TEST_TYPE_UNALIGNED_INV_DESC:
+		test_spec_set_name(test, "UNALIGNED_INV_DESC");
+		test->ifobj_tx->umem->unaligned_mode = true;
+		test->ifobj_rx->umem->unaligned_mode = true;
+		testapp_invalid_desc(test);
+		break;
 	case TEST_TYPE_UNALIGNED:
 		if (!testapp_unaligned(test))
 			return;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 129801eb013c..2d9efb89ea28 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -38,6 +38,8 @@
 #define BATCH_SIZE 8
 #define POLL_TMOUT 1000
 #define DEFAULT_PKT_CNT (4 * 1024)
+#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
+#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
 #define RX_FULL_RXQSIZE 32
 #define DEFAULT_OFFSET 256
 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
@@ -51,9 +53,11 @@ enum test_mode {
 };
 
 enum test_type {
-	TEST_TYPE_NOPOLL,
+	TEST_TYPE_RUN_TO_COMPLETION,
 	TEST_TYPE_POLL,
 	TEST_TYPE_UNALIGNED,
+	TEST_TYPE_ALIGNED_INV_DESC,
+	TEST_TYPE_UNALIGNED_INV_DESC,
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,
 	TEST_TYPE_STATS,
@@ -104,6 +108,7 @@ struct pkt {
 
 struct pkt_stream {
 	u32 nb_pkts;
+	u32 rx_pkt_nb;
 	struct pkt *pkts;
 	bool use_addr_for_fill;
 };

From 909f0e28207ced098612653ebaafcbb2e13be834 Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Tue, 7 Sep 2021 09:19:28 +0200
Subject: [PATCH 27/63] selftests: xsk: Add tests for 2K frame size

Add tests for 2K frame size. Both a standard send and receive test and
one testing for invalid descriptors when the frame size is 2K.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20210907071928.9750-21-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xdpxceiver.c | 23 +++++++++++++++++++++++
 tools/testing/selftests/bpf/xdpxceiver.h |  2 ++
 2 files changed, 25 insertions(+)

diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 1a03f7941bb8..127bcde06c86 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -48,6 +48,7 @@
  *    g. unaligned mode
  *    h. tests for invalid and corner case Tx descriptors so that the correct ones
  *       are discarded and let through, respectively.
+ *    i. 2K frame size tests
  *
  * Total tests: 12
  *
@@ -1205,6 +1206,8 @@ static void testapp_invalid_desc(struct test_spec *test)
 		{UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false},
 		/* Straddle a page boundrary */
 		{0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
+		/* Straddle a 2K boundrary */
+		{0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true},
 		/* Valid packet for synch so that something is received */
 		{0x4000, PKT_SIZE, 0, true}};
 
@@ -1212,6 +1215,11 @@ static void testapp_invalid_desc(struct test_spec *test)
 		/* Crossing a page boundrary allowed */
 		pkts[6].valid = true;
 	}
+	if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+		/* Crossing a 2K frame size boundrary not allowed */
+		pkts[7].valid = false;
+	}
+
 	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
 	testapp_validate_traffic(test);
 	pkt_stream_restore_default(test);
@@ -1262,6 +1270,15 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		test_spec_set_name(test, "RUN_TO_COMPLETION");
 		testapp_validate_traffic(test);
 		break;
+	case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
+		test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
+		test->ifobj_tx->umem->frame_size = 2048;
+		test->ifobj_rx->umem->frame_size = 2048;
+		pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+		testapp_validate_traffic(test);
+
+		pkt_stream_restore_default(test);
+		break;
 	case TEST_TYPE_POLL:
 		test->ifobj_tx->use_poll = true;
 		test->ifobj_rx->use_poll = true;
@@ -1272,6 +1289,12 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		test_spec_set_name(test, "ALIGNED_INV_DESC");
 		testapp_invalid_desc(test);
 		break;
+	case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
+		test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
+		test->ifobj_tx->umem->frame_size = 2048;
+		test->ifobj_rx->umem->frame_size = 2048;
+		testapp_invalid_desc(test);
+		break;
 	case TEST_TYPE_UNALIGNED_INV_DESC:
 		test_spec_set_name(test, "UNALIGNED_INV_DESC");
 		test->ifobj_tx->umem->unaligned_mode = true;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 2d9efb89ea28..5ac4a5e64744 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -54,9 +54,11 @@ enum test_mode {
 
 enum test_type {
 	TEST_TYPE_RUN_TO_COMPLETION,
+	TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
 	TEST_TYPE_POLL,
 	TEST_TYPE_UNALIGNED,
 	TEST_TYPE_ALIGNED_INV_DESC,
+	TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
 	TEST_TYPE_UNALIGNED_INV_DESC,
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,

From f64c4acea51fbe2c08c0b0f48b7f5d1657d7a5e4 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Fri, 10 Sep 2021 01:04:08 +0300
Subject: [PATCH 28/63] bpf: Add hardware timestamp field to __sk_buff

BPF programs may want to know hardware timestamps if NIC supports
such timestamping.

Expose this data as hwtstamp field of __sk_buff the same way as
gso_segs/gso_size. This field could be accessed from the same
programs as tstamp field, but it's read-only field. Explicit test
to deny access to padding data is added to bpf_skb_is_valid_access.

Also update BPF_PROG_TEST_RUN tests of the feature.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210909220409.8804-2-vfedorenko@novek.ru
---
 include/uapi/linux/bpf.h       |  2 ++
 net/core/filter.c              | 21 +++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  2 ++
 3 files changed, 25 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 791f31dd0abe..51cfd91cc387 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5284,6 +5284,8 @@ struct __sk_buff {
 	__u32 gso_segs;
 	__bpf_md_ptr(struct bpf_sock *, sk);
 	__u32 gso_size;
+	__u32 :32;		/* Padding, future use. */
+	__u64 hwtstamp;
 };
 
 struct bpf_tunnel_key {
diff --git a/net/core/filter.c b/net/core/filter.c
index 2e32cee2c469..4bace37a6a44 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7765,6 +7765,10 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 		break;
 	case bpf_ctx_range_ptr(struct __sk_buff, flow_keys):
 		return false;
+	case bpf_ctx_range(struct __sk_buff, hwtstamp):
+		if (type == BPF_WRITE || size != sizeof(__u64))
+			return false;
+		break;
 	case bpf_ctx_range(struct __sk_buff, tstamp):
 		if (size != sizeof(__u64))
 			return false;
@@ -7774,6 +7778,9 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type
 			return false;
 		info->reg_type = PTR_TO_SOCK_COMMON_OR_NULL;
 		break;
+	case offsetofend(struct __sk_buff, gso_size) ... offsetof(struct __sk_buff, hwtstamp) - 1:
+		/* Explicitly prohibit access to padding in __sk_buff. */
+		return false;
 	default:
 		/* Only narrow read access allowed for now. */
 		if (type == BPF_WRITE) {
@@ -7802,6 +7809,7 @@ static bool sk_filter_is_valid_access(int off, int size,
 	case bpf_ctx_range_till(struct __sk_buff, family, local_port):
 	case bpf_ctx_range(struct __sk_buff, tstamp):
 	case bpf_ctx_range(struct __sk_buff, wire_len):
+	case bpf_ctx_range(struct __sk_buff, hwtstamp):
 		return false;
 	}
 
@@ -7872,6 +7880,7 @@ static bool lwt_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, tstamp):
 	case bpf_ctx_range(struct __sk_buff, wire_len):
+	case bpf_ctx_range(struct __sk_buff, hwtstamp):
 		return false;
 	}
 
@@ -8373,6 +8382,7 @@ static bool sk_skb_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct __sk_buff, data_meta):
 	case bpf_ctx_range(struct __sk_buff, tstamp):
 	case bpf_ctx_range(struct __sk_buff, wire_len):
+	case bpf_ctx_range(struct __sk_buff, hwtstamp):
 		return false;
 	}
 
@@ -8884,6 +8894,17 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct sk_buff, sk));
 		break;
+	case offsetof(struct __sk_buff, hwtstamp):
+		BUILD_BUG_ON(sizeof_field(struct skb_shared_hwtstamps, hwtstamp) != 8);
+		BUILD_BUG_ON(offsetof(struct skb_shared_hwtstamps, hwtstamp) != 0);
+
+		insn = bpf_convert_shinfo_access(si, insn);
+		*insn++ = BPF_LDX_MEM(BPF_DW,
+				      si->dst_reg, si->dst_reg,
+				      bpf_target_off(struct skb_shared_info,
+						     hwtstamps, 8,
+						     target_size));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 791f31dd0abe..51cfd91cc387 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5284,6 +5284,8 @@ struct __sk_buff {
 	__u32 gso_segs;
 	__bpf_md_ptr(struct bpf_sock *, sk);
 	__u32 gso_size;
+	__u32 :32;		/* Padding, future use. */
+	__u64 hwtstamp;
 };
 
 struct bpf_tunnel_key {

From 3384c7c7641b44987e35eadbc9df6c16a0520159 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Fri, 10 Sep 2021 01:04:09 +0300
Subject: [PATCH 29/63] selftests/bpf: Test new __sk_buff field hwtstamp

Analogous to the gso_segs selftests introduced in commit d9ff286a0f59
("bpf: allow BPF programs access skb_shared_info->gso_segs field").

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210909220409.8804-3-vfedorenko@novek.ru
---
 lib/test_bpf.c                                |  1 +
 net/bpf/test_run.c                            |  8 +++
 .../selftests/bpf/prog_tests/skb_ctx.c        |  1 +
 .../selftests/bpf/progs/test_skb_ctx.c        |  2 +
 .../testing/selftests/bpf/verifier/ctx_skb.c  | 60 +++++++++++++++++++
 5 files changed, 72 insertions(+)

diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 830a18ecffc8..0018d51b93b0 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -8800,6 +8800,7 @@ static __init struct sk_buff *build_test_skb(void)
 	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
 	skb_shinfo(skb[0])->gso_segs = 0;
 	skb_shinfo(skb[0])->frag_list = skb[1];
+	skb_shinfo(skb[0])->hwtstamps.hwtstamp = 1000;
 
 	/* adjust skb[0]'s len */
 	skb[0]->len += skb[1]->len;
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 1153b89c9d93..fcb2f493f710 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -507,6 +507,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 	/* gso_size is allowed */
 
 	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
+			   offsetof(struct __sk_buff, hwtstamp)))
+		return -EINVAL;
+
+	/* hwtstamp is allowed */
+
+	if (!range_is_zero(__skb, offsetofend(struct __sk_buff, hwtstamp),
 			   sizeof(struct __sk_buff)))
 		return -EINVAL;
 
@@ -529,6 +535,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 		return -EINVAL;
 	skb_shinfo(skb)->gso_segs = __skb->gso_segs;
 	skb_shinfo(skb)->gso_size = __skb->gso_size;
+	skb_shinfo(skb)->hwtstamps.hwtstamp = __skb->hwtstamp;
 
 	return 0;
 }
@@ -548,6 +555,7 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
 	memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
 	__skb->wire_len = cb->pkt_len;
 	__skb->gso_segs = skb_shinfo(skb)->gso_segs;
+	__skb->hwtstamp = skb_shinfo(skb)->hwtstamps.hwtstamp;
 }
 
 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 2bf8c687348b..c437e6ba8fe2 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -18,6 +18,7 @@ void test_skb_ctx(void)
 		.gso_segs = 8,
 		.mark = 9,
 		.gso_size = 10,
+		.hwtstamp = 11,
 	};
 	struct bpf_prog_test_run_attr tattr = {
 		.data_in = &pkt_v4,
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index bbd5a9c1c4df..ba4dab09d19c 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -29,6 +29,8 @@ int process(struct __sk_buff *skb)
 		return 1;
 	if (skb->ifindex != 1)
 		return 1;
+	if (skb->hwtstamp != 11)
+		return 1;
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index 2022c0f2cd75..9e1a30b94197 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -1057,6 +1057,66 @@
 	.result = ACCEPT,
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
+{
+	"padding after gso_size is not accessible",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetofend(struct __sk_buff, gso_size)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.result_unpriv = REJECT,
+	.errstr = "invalid bpf_context access off=180 size=4",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+	"read hwtstamp from CGROUP_SKB",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"read hwtstamp from CGROUP_SKB",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"write hwtstamp from CGROUP_SKB",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.result_unpriv = REJECT,
+	.errstr = "invalid bpf_context access off=184 size=8",
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"read hwtstamp from CLS",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
 {
 	"check wire_len is not readable by sockets",
 	.insns = {

From c22ac2a3d4bd83411ebf0b1726e9e5fc4f5e7ebf Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 10 Sep 2021 11:33:50 -0700
Subject: [PATCH 30/63] perf: Enable branch record for software events

The typical way to access branch record (e.g. Intel LBR) is via hardware
perf_event. For CPUs with FREEZE_LBRS_ON_PMI support, PMI could capture
reliable LBR. On the other hand, LBR could also be useful in non-PMI
scenario. For example, in kretprobe or bpf fexit program, LBR could
provide a lot of information on what happened with the function. Add API
to use branch record for software use.

Note that, when the software event triggers, it is necessary to stop the
branch record hardware asap. Therefore, static_call is used to remove some
branch instructions in this process.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/bpf/20210910183352.3151445-2-songliubraving@fb.com
---
 arch/x86/events/intel/core.c | 67 ++++++++++++++++++++++++++++++++----
 arch/x86/events/intel/ds.c   |  2 +-
 arch/x86/events/intel/lbr.c  | 20 +++--------
 arch/x86/events/perf_event.h | 19 ++++++++++
 include/linux/perf_event.h   | 23 +++++++++++++
 kernel/events/core.c         |  2 ++
 6 files changed, 111 insertions(+), 22 deletions(-)

diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 7011e87be6d0..1248fc1937f8 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2143,19 +2143,19 @@ static __initconst const u64 knl_hw_cache_extra_regs
  * However, there are some cases which may change PEBS status, e.g. PMI
  * throttle. The PEBS_ENABLE should be updated where the status changes.
  */
-static void __intel_pmu_disable_all(void)
+static __always_inline void __intel_pmu_disable_all(bool bts)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
-	if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
+	if (bts && test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 		intel_pmu_disable_bts();
 }
 
-static void intel_pmu_disable_all(void)
+static __always_inline void intel_pmu_disable_all(void)
 {
-	__intel_pmu_disable_all();
+	__intel_pmu_disable_all(true);
 	intel_pmu_pebs_disable_all();
 	intel_pmu_lbr_disable_all();
 }
@@ -2186,6 +2186,49 @@ static void intel_pmu_enable_all(int added)
 	__intel_pmu_enable_all(added, false);
 }
 
+static noinline int
+__intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries,
+				  unsigned int cnt, unsigned long flags)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+	intel_pmu_lbr_read();
+	cnt = min_t(unsigned int, cnt, x86_pmu.lbr_nr);
+
+	memcpy(entries, cpuc->lbr_entries, sizeof(struct perf_branch_entry) * cnt);
+	intel_pmu_enable_all(0);
+	local_irq_restore(flags);
+	return cnt;
+}
+
+static int
+intel_pmu_snapshot_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+	unsigned long flags;
+
+	/* must not have branches... */
+	local_irq_save(flags);
+	__intel_pmu_disable_all(false); /* we don't care about BTS */
+	__intel_pmu_pebs_disable_all();
+	__intel_pmu_lbr_disable();
+	/*            ... until here */
+	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
+}
+
+static int
+intel_pmu_snapshot_arch_branch_stack(struct perf_branch_entry *entries, unsigned int cnt)
+{
+	unsigned long flags;
+
+	/* must not have branches... */
+	local_irq_save(flags);
+	__intel_pmu_disable_all(false); /* we don't care about BTS */
+	__intel_pmu_pebs_disable_all();
+	__intel_pmu_arch_lbr_disable();
+	/*            ... until here */
+	return __intel_pmu_snapshot_branch_stack(entries, cnt, flags);
+}
+
 /*
  * Workaround for:
  *   Intel Errata AAK100 (model 26)
@@ -2929,7 +2972,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
 		apic_write(APIC_LVTPC, APIC_DM_NMI);
 	intel_bts_disable_local();
 	cpuc->enabled = 0;
-	__intel_pmu_disable_all();
+	__intel_pmu_disable_all(true);
 	handled = intel_pmu_drain_bts_buffer();
 	handled += intel_bts_interrupt();
 	status = intel_pmu_get_status();
@@ -6283,9 +6326,21 @@ __init int intel_pmu_init(void)
 			x86_pmu.lbr_nr = 0;
 	}
 
-	if (x86_pmu.lbr_nr)
+	if (x86_pmu.lbr_nr) {
 		pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
 
+		/* only support branch_stack snapshot for perfmon >= v2 */
+		if (x86_pmu.disable_all == intel_pmu_disable_all) {
+			if (boot_cpu_has(X86_FEATURE_ARCH_LBR)) {
+				static_call_update(perf_snapshot_branch_stack,
+						   intel_pmu_snapshot_arch_branch_stack);
+			} else {
+				static_call_update(perf_snapshot_branch_stack,
+						   intel_pmu_snapshot_branch_stack);
+			}
+		}
+	}
+
 	intel_pmu_check_extra_regs(x86_pmu.extra_regs);
 
 	/* Support full width counters using alternative MSR range */
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8647713276a7..ac5991fea9ee 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1301,7 +1301,7 @@ void intel_pmu_pebs_disable_all(void)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
 	if (cpuc->pebs_enabled)
-		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+		__intel_pmu_pebs_disable_all();
 }
 
 static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 9e6d6eaeb4cb..6b72e9b55c69 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -228,20 +228,6 @@ static void __intel_pmu_lbr_enable(bool pmi)
 		wrmsrl(MSR_ARCH_LBR_CTL, lbr_select | ARCH_LBR_CTL_LBREN);
 }
 
-static void __intel_pmu_lbr_disable(void)
-{
-	u64 debugctl;
-
-	if (static_cpu_has(X86_FEATURE_ARCH_LBR)) {
-		wrmsrl(MSR_ARCH_LBR_CTL, 0);
-		return;
-	}
-
-	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-	debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
-	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
-}
-
 void intel_pmu_lbr_reset_32(void)
 {
 	int i;
@@ -779,8 +765,12 @@ void intel_pmu_lbr_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 
-	if (cpuc->lbr_users && !vlbr_exclude_host())
+	if (cpuc->lbr_users && !vlbr_exclude_host()) {
+		if (static_cpu_has(X86_FEATURE_ARCH_LBR))
+			return __intel_pmu_arch_lbr_disable();
+
 		__intel_pmu_lbr_disable();
+	}
 }
 
 void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index e3ac05c97b5e..0e3e596e33cd 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1240,6 +1240,25 @@ static inline bool intel_pmu_has_bts(struct perf_event *event)
 	return intel_pmu_has_bts_period(event, hwc->sample_period);
 }
 
+static __always_inline void __intel_pmu_pebs_disable_all(void)
+{
+	wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+}
+
+static __always_inline void __intel_pmu_arch_lbr_disable(void)
+{
+	wrmsrl(MSR_ARCH_LBR_CTL, 0);
+}
+
+static __always_inline void __intel_pmu_lbr_disable(void)
+{
+	u64 debugctl;
+
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+	debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
 int intel_pmu_save_and_restart(struct perf_event *event);
 
 struct event_constraint *
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fe156a8170aa..0cbc5dfe1110 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -57,6 +57,7 @@ struct perf_guest_info_callbacks {
 #include <linux/cgroup.h>
 #include <linux/refcount.h>
 #include <linux/security.h>
+#include <linux/static_call.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -1612,4 +1613,26 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event,
 extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
 #endif
 
+/*
+ * Snapshot branch stack on software events.
+ *
+ * Branch stack can be very useful in understanding software events. For
+ * example, when a long function, e.g. sys_perf_event_open, returns an
+ * errno, it is not obvious why the function failed. Branch stack could
+ * provide very helpful information in this type of scenarios.
+ *
+ * On software event, it is necessary to stop the hardware branch recorder
+ * fast. Otherwise, the hardware register/buffer will be flushed with
+ * entries of the triggering event. Therefore, static call is used to
+ * stop the hardware recorder.
+ */
+
+/*
+ * cnt is the number of entries allocated for entries.
+ * Return number of entries copied to .
+ */
+typedef int (perf_snapshot_branch_stack_t)(struct perf_branch_entry *entries,
+					   unsigned int cnt);
+DECLARE_STATIC_CALL(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);
+
 #endif /* _LINUX_PERF_EVENT_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 744e8726c5b2..349f80aa9e7d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13435,3 +13435,5 @@ struct cgroup_subsys perf_event_cgrp_subsys = {
 	.threaded	= true,
 };
 #endif /* CONFIG_CGROUP_PERF */
+
+DEFINE_STATIC_CALL_RET0(perf_snapshot_branch_stack, perf_snapshot_branch_stack_t);

From 856c02dbce4f8d6a5644083db22c11750aa11481 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 10 Sep 2021 11:33:51 -0700
Subject: [PATCH 31/63] bpf: Introduce helper bpf_get_branch_snapshot

Introduce bpf_get_branch_snapshot(), which allows tracing pogram to get
branch trace from hardware (e.g. Intel LBR). To use the feature, the
user need to create perf_event with proper branch_record filtering
on each cpu, and then calls bpf_get_branch_snapshot in the bpf function.
On Intel CPUs, VLBR event (raw event 0x1b00) can be use for this.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210910183352.3151445-3-songliubraving@fb.com
---
 include/uapi/linux/bpf.h       | 22 ++++++++++++++++++++++
 kernel/bpf/trampoline.c        |  3 ++-
 kernel/trace/bpf_trace.c       | 30 ++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 22 ++++++++++++++++++++++
 4 files changed, 76 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 51cfd91cc387..d21326558d42 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4877,6 +4877,27 @@ union bpf_attr {
  *		Get the struct pt_regs associated with **task**.
  *	Return
  *		A pointer to struct pt_regs.
+ *
+ * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
+ *	Description
+ *		Get branch trace from hardware engines like Intel LBR. The
+ *		hardware engine is stopped shortly after the helper is
+ *		called. Therefore, the user need to filter branch entries
+ *		based on the actual use case. To capture branch trace
+ *		before the trigger point of the BPF program, the helper
+ *		should be called at the beginning of the BPF program.
+ *
+ *		The data is stored as struct perf_branch_entry into output
+ *		buffer *entries*. *size* is the size of *entries* in bytes.
+ *		*flags* is reserved for now and must be zero.
+ *
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		**-EINVAL** if *flags* is not zero.
+ *
+ *		**-ENOENT** if architecture does not support branch records.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5055,6 +5076,7 @@ union bpf_attr {
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
+	FN(get_branch_snapshot),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index fe1e857324e6..39eaaff81953 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -10,6 +10,7 @@
 #include <linux/rcupdate_trace.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/module.h>
+#include <linux/static_call.h>
 
 /* dummy _ops. The verifier will operate on target program's ops. */
 const struct bpf_verifier_ops bpf_extension_verifier_ops = {
@@ -526,7 +527,7 @@ out:
 }
 
 #define NO_START_TIME 1
-static u64 notrace bpf_prog_start_time(void)
+static __always_inline u64 notrace bpf_prog_start_time(void)
 {
 	u64 start = NO_START_TIME;
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8e2eb950aa82..067e88c3d2ee 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1017,6 +1017,34 @@ static const struct bpf_func_proto bpf_get_attach_cookie_proto_pe = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+BPF_CALL_3(bpf_get_branch_snapshot, void *, buf, u32, size, u64, flags)
+{
+#ifndef CONFIG_X86
+	return -ENOENT;
+#else
+	static const u32 br_entry_size = sizeof(struct perf_branch_entry);
+	u32 entry_cnt = size / br_entry_size;
+
+	entry_cnt = static_call(perf_snapshot_branch_stack)(buf, entry_cnt);
+
+	if (unlikely(flags))
+		return -EINVAL;
+
+	if (!entry_cnt)
+		return -ENOENT;
+
+	return entry_cnt * br_entry_size;
+#endif
+}
+
+static const struct bpf_func_proto bpf_get_branch_snapshot_proto = {
+	.func		= bpf_get_branch_snapshot,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+};
+
 static const struct bpf_func_proto *
 bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
@@ -1132,6 +1160,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_snprintf_proto;
 	case BPF_FUNC_get_func_ip:
 		return &bpf_get_func_ip_proto_tracing;
+	case BPF_FUNC_get_branch_snapshot:
+		return &bpf_get_branch_snapshot_proto;
 	default:
 		return bpf_base_func_proto(func_id);
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 51cfd91cc387..d21326558d42 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4877,6 +4877,27 @@ union bpf_attr {
  *		Get the struct pt_regs associated with **task**.
  *	Return
  *		A pointer to struct pt_regs.
+ *
+ * long bpf_get_branch_snapshot(void *entries, u32 size, u64 flags)
+ *	Description
+ *		Get branch trace from hardware engines like Intel LBR. The
+ *		hardware engine is stopped shortly after the helper is
+ *		called. Therefore, the user need to filter branch entries
+ *		based on the actual use case. To capture branch trace
+ *		before the trigger point of the BPF program, the helper
+ *		should be called at the beginning of the BPF program.
+ *
+ *		The data is stored as struct perf_branch_entry into output
+ *		buffer *entries*. *size* is the size of *entries* in bytes.
+ *		*flags* is reserved for now and must be zero.
+ *
+ *	Return
+ *		On success, number of bytes written to *buf*. On error, a
+ *		negative value.
+ *
+ *		**-EINVAL** if *flags* is not zero.
+ *
+ *		**-ENOENT** if architecture does not support branch records.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5055,6 +5076,7 @@ union bpf_attr {
 	FN(get_func_ip),		\
 	FN(get_attach_cookie),		\
 	FN(task_pt_regs),		\
+	FN(get_branch_snapshot),	\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper

From 025bd7c753aab18cd594924a46ab46ac47209df9 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Fri, 10 Sep 2021 11:33:52 -0700
Subject: [PATCH 32/63] selftests/bpf: Add test for bpf_get_branch_snapshot

This test uses bpf_get_branch_snapshot from a fexit program. The test uses
a target function (bpf_testmod_loop_test) and compares the record against
kallsyms. If there isn't enough record matching kallsyms, the test fails.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20210910183352.3151445-4-songliubraving@fb.com
---
 .../selftests/bpf/bpf_testmod/bpf_testmod.c   |  19 +++-
 .../selftests/bpf/prog_tests/core_reloc.c     |  14 +--
 .../bpf/prog_tests/get_branch_snapshot.c      | 100 ++++++++++++++++++
 .../selftests/bpf/prog_tests/module_attach.c  |  39 -------
 .../selftests/bpf/progs/get_branch_snapshot.c |  40 +++++++
 tools/testing/selftests/bpf/test_progs.c      |  39 +++++++
 tools/testing/selftests/bpf/test_progs.h      |   2 +
 tools/testing/selftests/bpf/trace_helpers.c   |  37 +++++++
 tools/testing/selftests/bpf/trace_helpers.h   |   5 +
 9 files changed, 243 insertions(+), 52 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
 create mode 100644 tools/testing/selftests/bpf/progs/get_branch_snapshot.c

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 141d8da687d2..50fc5561110a 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -13,6 +13,18 @@
 
 DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
 
+noinline int bpf_testmod_loop_test(int n)
+{
+	int i, sum = 0;
+
+	/* the primary goal of this test is to test LBR. Create a lot of
+	 * branches in the function, so we can catch it easily.
+	 */
+	for (i = 0; i < n; i++)
+		sum += i;
+	return sum;
+}
+
 noinline ssize_t
 bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 		      struct bin_attribute *bin_attr,
@@ -24,7 +36,11 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 		.len = len,
 	};
 
-	trace_bpf_testmod_test_read(current, &ctx);
+	/* This is always true. Use the check to make sure the compiler
+	 * doesn't remove bpf_testmod_loop_test.
+	 */
+	if (bpf_testmod_loop_test(101) > 100)
+		trace_bpf_testmod_test_read(current, &ctx);
 
 	return -EIO; /* always fail */
 }
@@ -71,4 +87,3 @@ module_exit(bpf_testmod_exit);
 MODULE_AUTHOR("Andrii Nakryiko");
 MODULE_DESCRIPTION("BPF selftests module");
 MODULE_LICENSE("Dual BSD/GPL");
-
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 4739b15b2a97..15d355af8d1d 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -30,7 +30,7 @@ static int duration = 0;
 	.output_len = sizeof(struct core_reloc_module_output),		\
 	.prog_sec_name = sec_name,					\
 	.raw_tp_name = tp_name,						\
-	.trigger = trigger_module_test_read,				\
+	.trigger = __trigger_module_test_read,				\
 	.needs_testmod = true,						\
 }
 
@@ -475,19 +475,11 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test)
 	return 0;
 }
 
-static int trigger_module_test_read(const struct core_reloc_test_case *test)
+static int __trigger_module_test_read(const struct core_reloc_test_case *test)
 {
 	struct core_reloc_module_output *exp = (void *)test->output;
-	int fd, err;
-
-	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
-	err = -errno;
-	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
-		return err;
-
-	read(fd, NULL, exp->len); /* request expected number of bytes */
-	close(fd);
 
+	trigger_module_test_read(exp->len);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
new file mode 100644
index 000000000000..f81db9135ae4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "get_branch_snapshot.skel.h"
+
+static int *pfd_array;
+static int cpu_cnt;
+
+static int create_perf_events(void)
+{
+	struct perf_event_attr attr = {0};
+	int cpu;
+
+	/* create perf event */
+	attr.size = sizeof(attr);
+	attr.type = PERF_TYPE_RAW;
+	attr.config = 0x1b00;
+	attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+	attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL |
+		PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
+
+	cpu_cnt = libbpf_num_possible_cpus();
+	pfd_array = malloc(sizeof(int) * cpu_cnt);
+	if (!pfd_array) {
+		cpu_cnt = 0;
+		return 1;
+	}
+
+	for (cpu = 0; cpu < cpu_cnt; cpu++) {
+		pfd_array[cpu] = syscall(__NR_perf_event_open, &attr,
+					 -1, cpu, -1, PERF_FLAG_FD_CLOEXEC);
+		if (pfd_array[cpu] < 0)
+			break;
+	}
+
+	return cpu == 0;
+}
+
+static void close_perf_events(void)
+{
+	int cpu = 0;
+	int fd;
+
+	while (cpu++ < cpu_cnt) {
+		fd = pfd_array[cpu];
+		if (fd < 0)
+			break;
+		close(fd);
+	}
+	free(pfd_array);
+}
+
+void test_get_branch_snapshot(void)
+{
+	struct get_branch_snapshot *skel = NULL;
+	int err;
+
+	if (create_perf_events()) {
+		test__skip();  /* system doesn't support LBR */
+		goto cleanup;
+	}
+
+	skel = get_branch_snapshot__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "get_branch_snapshot__open_and_load"))
+		goto cleanup;
+
+	err = kallsyms_find("bpf_testmod_loop_test", &skel->bss->address_low);
+	if (!ASSERT_OK(err, "kallsyms_find"))
+		goto cleanup;
+
+	err = kallsyms_find_next("bpf_testmod_loop_test", &skel->bss->address_high);
+	if (!ASSERT_OK(err, "kallsyms_find_next"))
+		goto cleanup;
+
+	err = get_branch_snapshot__attach(skel);
+	if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
+		goto cleanup;
+
+	trigger_module_test_read(100);
+
+	if (skel->bss->total_entries < 16) {
+		/* too few entries for the hit/waste test */
+		test__skip();
+		goto cleanup;
+	}
+
+	ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr");
+
+	/* Given we stop LBR in software, we will waste a few entries.
+	 * But we should try to waste as few as possible entries. We are at
+	 * about 7 on x86_64 systems.
+	 * Add a check for < 10 so that we get heads-up when something
+	 * changes and wastes too many entries.
+	 */
+	ASSERT_LT(skel->bss->wasted_entries, 10, "check_wasted_entries");
+
+cleanup:
+	get_branch_snapshot__destroy(skel);
+	close_perf_events();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index d85a69b7ce44..1797a6e4d6d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -6,45 +6,6 @@
 
 static int duration;
 
-static int trigger_module_test_read(int read_sz)
-{
-	int fd, err;
-
-	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
-	err = -errno;
-	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
-		return err;
-
-	read(fd, NULL, read_sz);
-	close(fd);
-
-	return 0;
-}
-
-static int trigger_module_test_write(int write_sz)
-{
-	int fd, err;
-	char *buf = malloc(write_sz);
-
-	if (!buf)
-		return -ENOMEM;
-
-	memset(buf, 'a', write_sz);
-	buf[write_sz-1] = '\0';
-
-	fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
-	err = -errno;
-	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) {
-		free(buf);
-		return err;
-	}
-
-	write(fd, buf, write_sz);
-	close(fd);
-	free(buf);
-	return 0;
-}
-
 static int delete_module(const char *name, int flags)
 {
 	return syscall(__NR_delete_module, name, flags);
diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
new file mode 100644
index 000000000000..a1b139888048
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_hits = 0;
+__u64 address_low = 0;
+__u64 address_high = 0;
+int wasted_entries = 0;
+long total_entries = 0;
+
+#define ENTRY_CNT 32
+struct perf_branch_entry entries[ENTRY_CNT] = {};
+
+static inline bool in_range(__u64 val)
+{
+	return (val >= address_low) && (val < address_high);
+}
+
+SEC("fexit/bpf_testmod_loop_test")
+int BPF_PROG(test1, int n, int ret)
+{
+	long i;
+
+	total_entries = bpf_get_branch_snapshot(entries, sizeof(entries), 0);
+	total_entries /= sizeof(struct perf_branch_entry);
+
+	for (i = 0; i < ENTRY_CNT; i++) {
+		if (i >= total_entries)
+			break;
+		if (in_range(entries[i].from) && in_range(entries[i].to))
+			test1_hits++;
+		else if (!test1_hits)
+			wasted_entries++;
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index cc1cd240445d..2ed01f615d20 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -743,6 +743,45 @@ int cd_flavor_subdir(const char *exec_name)
 	return chdir(flavor);
 }
 
+int trigger_module_test_read(int read_sz)
+{
+	int fd, err;
+
+	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
+	err = -errno;
+	if (!ASSERT_GE(fd, 0, "testmod_file_open"))
+		return err;
+
+	read(fd, NULL, read_sz);
+	close(fd);
+
+	return 0;
+}
+
+int trigger_module_test_write(int write_sz)
+{
+	int fd, err;
+	char *buf = malloc(write_sz);
+
+	if (!buf)
+		return -ENOMEM;
+
+	memset(buf, 'a', write_sz);
+	buf[write_sz-1] = '\0';
+
+	fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
+	err = -errno;
+	if (!ASSERT_GE(fd, 0, "testmod_file_open")) {
+		free(buf);
+		return err;
+	}
+
+	write(fd, buf, write_sz);
+	close(fd);
+	free(buf);
+	return 0;
+}
+
 #define MAX_BACKTRACE_SZ 128
 void crash_handler(int signum)
 {
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index c8c2bf878f67..94bef0aa74cf 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -291,6 +291,8 @@ int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
 int extract_build_id(char *build_id, size_t size);
 int kern_sync_rcu(void);
+int trigger_module_test_read(int read_sz);
+int trigger_module_test_write(int write_sz);
 
 #ifdef __x86_64__
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index e7a19b04d4ea..5100a169b72b 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -117,6 +118,42 @@ out:
 	return err;
 }
 
+/* find the address of the next symbol of the same type, this can be used
+ * to determine the end of a function.
+ */
+int kallsyms_find_next(const char *sym, unsigned long long *addr)
+{
+	char type, found_type, name[500];
+	unsigned long long value;
+	bool found = false;
+	int err = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (!f)
+		return -EINVAL;
+
+	while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
+		/* Different types of symbols in kernel modules are mixed
+		 * in /proc/kallsyms. Only return the next matching type.
+		 * Use tolower() for type so that 'T' matches 't'.
+		 */
+		if (found && found_type == tolower(type)) {
+			*addr = value;
+			goto out;
+		}
+		if (strcmp(name, sym) == 0) {
+			found = true;
+			found_type = tolower(type);
+		}
+	}
+	err = -ENOENT;
+
+out:
+	fclose(f);
+	return err;
+}
+
 void read_trace_pipe(void)
 {
 	int trace_fd;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index d907b445524d..bc8ed86105d9 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -16,6 +16,11 @@ long ksym_get_addr(const char *name);
 /* open kallsyms and find addresses on the fly, faster than load + search. */
 int kallsyms_find(const char *sym, unsigned long long *addr);
 
+/* find the address of the next symbol, this can be used to determine the
+ * end of a function
+ */
+int kallsyms_find_next(const char *sym, unsigned long long *addr);
+
 void read_trace_pipe(void);
 
 ssize_t get_uprobe_offset(const void *addr, ssize_t base);

From dbd7eb14e0607afa1dd3aee7175f37022ecc5f03 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 10 Sep 2021 11:19:00 +0200
Subject: [PATCH 33/63] bpf, selftests: Replicate tailcall limit test for
 indirect call case

The tailcall_3 test program uses bpf_tail_call_static() where the JIT
would patch a direct jump. Add a new tailcall_6 test program replicating
exactly the same test just ensuring that bpf_tail_call() uses a map
index where the verifier cannot make assumptions this time.

In other words, this will now cover both on x86-64 JIT, meaning, JIT
images with emit_bpf_tail_call_direct() emission as well as JIT images
with emit_bpf_tail_call_indirect() emission.

  # echo 1 > /proc/sys/net/core/bpf_jit_enable
  # ./test_progs -t tailcalls
  #136/1 tailcalls/tailcall_1:OK
  #136/2 tailcalls/tailcall_2:OK
  #136/3 tailcalls/tailcall_3:OK
  #136/4 tailcalls/tailcall_4:OK
  #136/5 tailcalls/tailcall_5:OK
  #136/6 tailcalls/tailcall_6:OK
  #136/7 tailcalls/tailcall_bpf2bpf_1:OK
  #136/8 tailcalls/tailcall_bpf2bpf_2:OK
  #136/9 tailcalls/tailcall_bpf2bpf_3:OK
  #136/10 tailcalls/tailcall_bpf2bpf_4:OK
  #136/11 tailcalls/tailcall_bpf2bpf_5:OK
  #136 tailcalls:OK
  Summary: 1/11 PASSED, 0 SKIPPED, 0 FAILED

  # echo 0 > /proc/sys/net/core/bpf_jit_enable
  # ./test_progs -t tailcalls
  #136/1 tailcalls/tailcall_1:OK
  #136/2 tailcalls/tailcall_2:OK
  #136/3 tailcalls/tailcall_3:OK
  #136/4 tailcalls/tailcall_4:OK
  #136/5 tailcalls/tailcall_5:OK
  #136/6 tailcalls/tailcall_6:OK
  [...]

For interpreter, the tailcall_1-6 tests are passing as well. The later
tailcall_bpf2bpf_* are failing due lack of bpf2bpf + tailcall support
in interpreter, so this is expected.

Also, manual inspection shows that both loaded programs from tailcall_3
and tailcall_6 test case emit the expected opcodes:

* tailcall_3 disasm, emit_bpf_tail_call_direct():

  [...]
   b:   push   %rax
   c:   push   %rbx
   d:   push   %r13
   f:   mov    %rdi,%rbx
  12:   movabs $0xffff8d3f5afb0200,%r13
  1c:   mov    %rbx,%rdi
  1f:   mov    %r13,%rsi
  22:   xor    %edx,%edx                 _
  24:   mov    -0x4(%rbp),%eax          |  limit check
  2a:   cmp    $0x20,%eax               |
  2d:   ja     0x0000000000000046       |
  2f:   add    $0x1,%eax                |
  32:   mov    %eax,-0x4(%rbp)          |_
  38:   nopl   0x0(%rax,%rax,1)
  3d:   pop    %r13
  3f:   pop    %rbx
  40:   pop    %rax
  41:   jmpq   0xffffffffffffe377
  [...]

* tailcall_6 disasm, emit_bpf_tail_call_indirect():

  [...]
  47:   movabs $0xffff8d3f59143a00,%rsi
  51:   mov    %edx,%edx
  53:   cmp    %edx,0x24(%rsi)
  56:   jbe    0x0000000000000093        _
  58:   mov    -0x4(%rbp),%eax          |  limit check
  5e:   cmp    $0x20,%eax               |
  61:   ja     0x0000000000000093       |
  63:   add    $0x1,%eax                |
  66:   mov    %eax,-0x4(%rbp)          |_
  6c:   mov    0x110(%rsi,%rdx,8),%rcx
  74:   test   %rcx,%rcx
  77:   je     0x0000000000000093
  79:   pop    %rax
  7a:   mov    0x30(%rcx),%rcx
  7e:   add    $0xb,%rcx
  82:   callq  0x000000000000008e
  87:   pause
  89:   lfence
  8c:   jmp    0x0000000000000087
  8e:   mov    %rcx,(%rsp)
  92:   retq
  [...]

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Tested-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Johan Almbladh <johan.almbladh@anyfinetworks.com>
Acked-by: Paul Chaignon <paul@cilium.io>
Link: https://lore.kernel.org/bpf/CAM1=_QRyRVCODcXo_Y6qOm1iT163HoiSj8U2pZ8Rj3hzMTT=HQ@mail.gmail.com
Link: https://lore.kernel.org/bpf/20210910091900.16119-1-daniel@iogearbox.net
---
 .../selftests/bpf/prog_tests/tailcalls.c      | 25 +++++++++++---
 tools/testing/selftests/bpf/progs/tailcall6.c | 34 +++++++++++++++++++
 2 files changed, 54 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/tailcall6.c

diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index b5940e6ca67c..7bf3a7a97d7b 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -219,10 +219,7 @@ out:
 	bpf_object__close(obj);
 }
 
-/* test_tailcall_3 checks that the count value of the tail call limit
- * enforcement matches with expectations.
- */
-static void test_tailcall_3(void)
+static void test_tailcall_count(const char *which)
 {
 	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
 	struct bpf_map *prog_array, *data_map;
@@ -231,7 +228,7 @@ static void test_tailcall_3(void)
 	__u32 retval, duration;
 	char buff[128] = {};
 
-	err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+	err = bpf_prog_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
@@ -296,6 +293,22 @@ out:
 	bpf_object__close(obj);
 }
 
+/* test_tailcall_3 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses direct jump.
+ */
+static void test_tailcall_3(void)
+{
+	test_tailcall_count("tailcall3.o");
+}
+
+/* test_tailcall_6 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses indirect jump.
+ */
+static void test_tailcall_6(void)
+{
+	test_tailcall_count("tailcall6.o");
+}
+
 /* test_tailcall_4 checks that the kernel properly selects indirect jump
  * for the case where the key is not known. Latter is passed via global
  * data to select different targets we can compare return value of.
@@ -822,6 +835,8 @@ void test_tailcalls(void)
 		test_tailcall_4();
 	if (test__start_subtest("tailcall_5"))
 		test_tailcall_5();
+	if (test__start_subtest("tailcall_6"))
+		test_tailcall_6();
 	if (test__start_subtest("tailcall_bpf2bpf_1"))
 		test_tailcall_bpf2bpf_1();
 	if (test__start_subtest("tailcall_bpf2bpf_2"))
diff --git a/tools/testing/selftests/bpf/progs/tailcall6.c b/tools/testing/selftests/bpf/progs/tailcall6.c
new file mode 100644
index 000000000000..0f4a811cc028
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall6.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count, which;
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+	count++;
+	if (__builtin_constant_p(which))
+		__bpf_unreachable();
+	bpf_tail_call(skb, &jmp_table, which);
+	return 1;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+	if (__builtin_constant_p(which))
+		__bpf_unreachable();
+	bpf_tail_call(skb, &jmp_table, which);
+	return 0;
+}
+
+char __license[] SEC("license") = "GPL";

From 2f383041278672332ab57c78570c30c47d6f35fd Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 13 Sep 2021 15:23:09 -0700
Subject: [PATCH 34/63] libbpf: Make libbpf_version.h non-auto-generated

Turn previously auto-generated libbpf_version.h header into a normal
header file. This prevents various tricky Makefile integration issues,
simplifies the overall build process, but also allows to further extend
it with some more versioning-related APIs in the future.

To prevent accidental out-of-sync versions as defined by libbpf.map and
libbpf_version.h, Makefile checks their consistency at build time.

Simultaneously with this change bump libbpf.map to v0.6.

Also undo adding libbpf's output directory into include path for
kernel/bpf/preload, bpftool, and resolve_btfids, which is not necessary
because libbpf_version.h is just a normal header like any other.

Fixes: 0b46b7550560 ("libbpf: Add LIBBPF_DEPRECATED_SINCE macro for scheduling API deprecations")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210913222309.3220849-1-andrii@kernel.org
---
 kernel/bpf/preload/Makefile       |  7 ++-----
 tools/bpf/bpftool/Makefile        |  1 -
 tools/bpf/resolve_btfids/Makefile |  1 -
 tools/lib/bpf/.gitignore          |  1 -
 tools/lib/bpf/Makefile            | 33 ++++++++++++++++++++-----------
 tools/lib/bpf/libbpf.map          |  3 +++
 tools/lib/bpf/libbpf_version.h    |  9 +++++++++
 7 files changed, 35 insertions(+), 20 deletions(-)
 create mode 100644 tools/lib/bpf/libbpf_version.h

diff --git a/kernel/bpf/preload/Makefile b/kernel/bpf/preload/Makefile
index ac29d4e9a384..1951332dd15f 100644
--- a/kernel/bpf/preload/Makefile
+++ b/kernel/bpf/preload/Makefile
@@ -10,15 +10,12 @@ LIBBPF_OUT = $(abspath $(obj))
 $(LIBBPF_A):
 	$(Q)$(MAKE) -C $(LIBBPF_SRCS) O=$(LIBBPF_OUT)/ OUTPUT=$(LIBBPF_OUT)/ $(LIBBPF_OUT)/libbpf.a
 
-userccflags += -I$(LIBBPF_OUT) -I $(srctree)/tools/include/ \
-	-I $(srctree)/tools/include/uapi \
+userccflags += -I $(srctree)/tools/include/ -I $(srctree)/tools/include/uapi \
 	-I $(srctree)/tools/lib/ -Wno-unused-result
 
 userprogs := bpf_preload_umd
 
-clean-files := $(userprogs) libbpf_version.h bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/
-
-$(obj)/iterators/iterators.o: $(LIBBPF_A)
+clean-files := $(userprogs) bpf_helper_defs.h FEATURE-DUMP.libbpf staticobjs/ feature/
 
 bpf_preload_umd-objs := iterators/iterators.o
 bpf_preload_umd-userldlibs := $(LIBBPF_A) -lelf -lz
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 06aa1616dabe..1fcf5b01a193 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -60,7 +60,6 @@ CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
 CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
 CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
 	-I$(if $(OUTPUT),$(OUTPUT),.) \
-	$(if $(LIBBPF_OUTPUT),-I$(LIBBPF_OUTPUT)) \
 	-I$(srctree)/kernel/bpf/ \
 	-I$(srctree)/tools/include \
 	-I$(srctree)/tools/include/uapi \
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index edc0c329cf74..08b75e314ae7 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -47,7 +47,6 @@ $(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(OUTPUT)/l
 CFLAGS := -g \
           -I$(srctree)/tools/include \
           -I$(srctree)/tools/include/uapi \
-          -I$(LIBBPF_OUT) \
           -I$(LIBBPF_SRC) \
           -I$(SUBCMD_SRC)
 
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index 5d4cfac671d5..0da84cb9e66d 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-libbpf_version.h
 libbpf.pc
 libbpf.so.*
 TAGS
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index dab21e0c7cc2..0f766345506f 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -113,9 +113,8 @@ SHARED_OBJDIR	:= $(OUTPUT)sharedobjs/
 STATIC_OBJDIR	:= $(OUTPUT)staticobjs/
 BPF_IN_SHARED	:= $(SHARED_OBJDIR)libbpf-in.o
 BPF_IN_STATIC	:= $(STATIC_OBJDIR)libbpf-in.o
-VERSION_HDR	:= $(OUTPUT)libbpf_version.h
 BPF_HELPER_DEFS	:= $(OUTPUT)bpf_helper_defs.h
-BPF_GENERATED	:= $(BPF_HELPER_DEFS) $(VERSION_HDR)
+BPF_GENERATED	:= $(BPF_HELPER_DEFS)
 
 LIB_TARGET	:= $(addprefix $(OUTPUT),$(LIB_TARGET))
 LIB_FILE	:= $(addprefix $(OUTPUT),$(LIB_FILE))
@@ -165,12 +164,6 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
 	$(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
 		--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
 
-$(VERSION_HDR): force
-	$(QUIET_GEN)echo "/* This file was auto-generated. */" > $@
-	@echo "" >> $@
-	@echo "#define LIBBPF_MAJOR_VERSION $(LIBBPF_MAJOR_VERSION)" >> $@
-	@echo "#define LIBBPF_MINOR_VERSION $(LIBBPF_MINOR_VERSION)" >> $@
-
 $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
 
 $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT)
@@ -189,7 +182,7 @@ $(OUTPUT)libbpf.pc:
 		-e "s|@VERSION@|$(LIBBPF_VERSION)|" \
 		< libbpf.pc.template > $@
 
-check: check_abi
+check: check_abi check_version
 
 check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
 	@if [ "$(GLOBAL_SYM_COUNT)" != "$(VERSIONED_SYM_COUNT)" ]; then	 \
@@ -215,6 +208,21 @@ check_abi: $(OUTPUT)libbpf.so $(VERSION_SCRIPT)
 		exit 1;							 \
 	fi
 
+HDR_MAJ_VERSION := $(shell grep -oE '^\#define LIBBPF_MAJOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3)
+HDR_MIN_VERSION := $(shell grep -oE '^\#define LIBBPF_MINOR_VERSION ([0-9]+)$$' libbpf_version.h | cut -d' ' -f3)
+
+check_version: $(VERSION_SCRIPT) libbpf_version.h
+	@if [ "$(HDR_MAJ_VERSION)" != "$(LIBBPF_MAJOR_VERSION)" ]; then        \
+		echo "Error: libbpf major version mismatch detected: "	       \
+		     "'$(HDR_MAJ_VERSION)' != '$(LIBBPF_MAJOR_VERSION)'" >&2;  \
+		exit 1;							       \
+	fi
+	@if [ "$(HDR_MIN_VERSION)" != "$(LIBBPF_MINOR_VERSION)" ]; then	       \
+		echo "Error: libbpf minor version mismatch detected: "	       \
+		     "'$(HDR_MIN_VERSION)' != '$(LIBBPF_MINOR_VERSION)'" >&2;  \
+		exit 1;							       \
+	fi
+
 define do_install_mkdir
 	if [ ! -d '$(DESTDIR_SQ)$1' ]; then		\
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1';	\
@@ -234,8 +242,9 @@ install_lib: all_cmd
 		cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ)
 
 INSTALL_HEADERS = bpf.h libbpf.h btf.h libbpf_common.h libbpf_legacy.h xsk.h \
-		  bpf_helpers.h $(BPF_GENERATED) bpf_tracing.h	     \
-		  bpf_endian.h bpf_core_read.h skel_internal.h
+		  bpf_helpers.h $(BPF_GENERATED) bpf_tracing.h		     \
+		  bpf_endian.h bpf_core_read.h skel_internal.h		     \
+		  libbpf_version.h
 
 install_headers: $(BPF_GENERATED)
 	$(call QUIET_INSTALL, headers)					     \
@@ -255,7 +264,7 @@ clean:
 		$(addprefix $(OUTPUT),					     \
 			    *.o *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) *.pc)
 
-PHONY += force cscope tags
+PHONY += force cscope tags check check_abi check_version
 force:
 
 cscope:
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index bbc53bb25f68..78ea62c9346f 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -386,3 +386,6 @@ LIBBPF_0.5.0 {
 		btf_dump__dump_type_data;
 		libbpf_set_strict_mode;
 } LIBBPF_0.4.0;
+
+LIBBPF_0.6.0 {
+} LIBBPF_0.5.0;
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
new file mode 100644
index 000000000000..dd56d76f291c
--- /dev/null
+++ b/tools/lib/bpf/libbpf_version.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (C) 2021 Facebook */
+#ifndef __LIBBPF_VERSION_H
+#define __LIBBPF_VERSION_H
+
+#define LIBBPF_MAJOR_VERSION 0
+#define LIBBPF_MINOR_VERSION 6
+
+#endif /* __LIBBPF_VERSION_H */

From ca304b40c20d5750f08200f0ad3445384646620c Mon Sep 17 00:00:00 2001
From: Rafael David Tinoco <rafaeldtinoco@gmail.com>
Date: Sun, 12 Sep 2021 03:48:44 -0300
Subject: [PATCH 35/63] libbpf: Introduce legacy kprobe events support

Allow kprobe tracepoint events creation through legacy interface, as the
kprobe dynamic PMUs support, used by default, was only created in v4.17.

Store legacy kprobe name in struct bpf_perf_link, instead of creating
a new "subclass" off of bpf_perf_link. This is ok as it's just two new
fields, which are also going to be reused for legacy uprobe support in
follow up patches.

Signed-off-by: Rafael David Tinoco <rafaeldtinoco@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210912064844.3181742-1-rafaeldtinoco@gmail.com
---
 tools/lib/bpf/libbpf.c | 128 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 124 insertions(+), 4 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f579c6666b2..9d99183bffba 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8993,9 +8993,57 @@ int bpf_link__unpin(struct bpf_link *link)
 	return 0;
 }
 
+static int poke_kprobe_events(bool add, const char *name, bool retprobe, uint64_t offset)
+{
+	int fd, ret = 0;
+	pid_t p = getpid();
+	char cmd[260], probename[128], probefunc[128];
+	const char *file = "/sys/kernel/debug/tracing/kprobe_events";
+
+	if (retprobe)
+		snprintf(probename, sizeof(probename), "kretprobes/%s_libbpf_%u", name, p);
+	else
+		snprintf(probename, sizeof(probename), "kprobes/%s_libbpf_%u", name, p);
+
+	if (offset)
+		snprintf(probefunc, sizeof(probefunc), "%s+%zu", name, (size_t)offset);
+
+	if (add) {
+		snprintf(cmd, sizeof(cmd), "%c:%s %s",
+			 retprobe ? 'r' : 'p',
+			 probename,
+			 offset ? probefunc : name);
+	} else {
+		snprintf(cmd, sizeof(cmd), "-:%s", probename);
+	}
+
+	fd = open(file, O_WRONLY | O_APPEND, 0);
+	if (!fd)
+		return -errno;
+	ret = write(fd, cmd, strlen(cmd));
+	if (ret < 0)
+		ret = -errno;
+	close(fd);
+
+	return ret;
+}
+
+static inline int add_kprobe_event_legacy(const char *name, bool retprobe, uint64_t offset)
+{
+	return poke_kprobe_events(true, name, retprobe, offset);
+}
+
+static inline int remove_kprobe_event_legacy(const char *name, bool retprobe)
+{
+	return poke_kprobe_events(false, name, retprobe, 0);
+}
+
 struct bpf_link_perf {
 	struct bpf_link link;
 	int perf_event_fd;
+	/* legacy kprobe support: keep track of probe identifier and type */
+	char *legacy_probe_name;
+	bool legacy_is_retprobe;
 };
 
 static int bpf_link_perf_detach(struct bpf_link *link)
@@ -9010,13 +9058,19 @@ static int bpf_link_perf_detach(struct bpf_link *link)
 		close(perf_link->perf_event_fd);
 	close(link->fd);
 
-	return libbpf_err(err);
+	/* legacy kprobe needs to be removed after perf event fd closure */
+	if (perf_link->legacy_probe_name)
+		err = remove_kprobe_event_legacy(perf_link->legacy_probe_name,
+						 perf_link->legacy_is_retprobe);
+
+	return err;
 }
 
 static void bpf_link_perf_dealloc(struct bpf_link *link)
 {
 	struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
 
+	free(perf_link->legacy_probe_name);
 	free(perf_link);
 }
 
@@ -9130,6 +9184,18 @@ static int parse_uint_from_file(const char *file, const char *fmt)
 	return ret;
 }
 
+static int determine_kprobe_perf_type_legacy(const char *func_name, bool is_retprobe)
+{
+	char file[192];
+
+	snprintf(file, sizeof(file),
+		 "/sys/kernel/debug/tracing/events/%s/%s_libbpf_%d/id",
+		 is_retprobe ? "kretprobes" : "kprobes",
+		 func_name, getpid());
+
+	return parse_uint_from_file(file, "%d\n");
+}
+
 static int determine_kprobe_perf_type(void)
 {
 	const char *file = "/sys/bus/event_source/devices/kprobe/type";
@@ -9212,6 +9278,41 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 	return pfd;
 }
 
+static int perf_event_kprobe_open_legacy(bool retprobe, const char *name, uint64_t offset, int pid)
+{
+	struct perf_event_attr attr = {};
+	char errmsg[STRERR_BUFSIZE];
+	int type, pfd, err;
+
+	err = add_kprobe_event_legacy(name, retprobe, offset);
+	if (err < 0) {
+		pr_warn("failed to add legacy kprobe event: %s\n",
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	type = determine_kprobe_perf_type_legacy(name, retprobe);
+	if (type < 0) {
+		pr_warn("failed to determine legacy kprobe event id: %s\n",
+			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));
+		return type;
+	}
+	attr.size = sizeof(attr);
+	attr.config = type;
+	attr.type = PERF_TYPE_TRACEPOINT;
+
+	pfd = syscall(__NR_perf_event_open, &attr,
+		      pid < 0 ? -1 : pid, /* pid */
+		      pid == -1 ? 0 : -1, /* cpu */
+		      -1 /* group_fd */,  PERF_FLAG_FD_CLOEXEC);
+	if (pfd < 0) {
+		err = -errno;
+		pr_warn("legacy kprobe perf_event_open() failed: %s\n",
+			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
+		return err;
+	}
+	return pfd;
+}
+
 struct bpf_link *
 bpf_program__attach_kprobe_opts(struct bpf_program *prog,
 				const char *func_name,
@@ -9219,9 +9320,10 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog,
 {
 	DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts);
 	char errmsg[STRERR_BUFSIZE];
+	char *legacy_probe = NULL;
 	struct bpf_link *link;
 	unsigned long offset;
-	bool retprobe;
+	bool retprobe, legacy;
 	int pfd, err;
 
 	if (!OPTS_VALID(opts, bpf_kprobe_opts))
@@ -9231,8 +9333,19 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog,
 	offset = OPTS_GET(opts, offset, 0);
 	pe_opts.bpf_cookie = OPTS_GET(opts, bpf_cookie, 0);
 
-	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,
-				    offset, -1 /* pid */, 0 /* ref_ctr_off */);
+	legacy = determine_kprobe_perf_type() < 0;
+	if (!legacy) {
+		pfd = perf_event_open_probe(false /* uprobe */, retprobe,
+					    func_name, offset,
+					    -1 /* pid */, 0 /* ref_ctr_off */);
+	} else {
+		legacy_probe = strdup(func_name);
+		if (!legacy_probe)
+			return libbpf_err_ptr(-ENOMEM);
+
+		pfd = perf_event_kprobe_open_legacy(retprobe, func_name,
+						    offset, -1 /* pid */);
+	}
 	if (pfd < 0) {
 		pr_warn("prog '%s': failed to create %s '%s' perf event: %s\n",
 			prog->name, retprobe ? "kretprobe" : "kprobe", func_name,
@@ -9248,6 +9361,13 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 		return libbpf_err_ptr(err);
 	}
+	if (legacy) {
+		struct bpf_link_perf *perf_link = container_of(link, struct bpf_link_perf, link);
+
+		perf_link->legacy_probe_name = legacy_probe;
+		perf_link->legacy_is_retprobe = retprobe;
+	}
+
 	return link;
 }
 

From 53df63ccdc0258118e53089197d0428c5330cc9c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 13 Sep 2021 18:47:30 -0700
Subject: [PATCH 36/63] selftests/bpf: Update selftests to always provide
 "struct_ops" SEC

Update struct_ops selftests to always specify "struct_ops" section
prefix. Libbpf will require a proper BPF program type set in the next
patch, so this prevents tests breaking.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210914014733.2768-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/bpf_cubic.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index f62df4d023f9..d9660e7200e2 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -169,11 +169,7 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
 	ca->sample_cnt = 0;
 }
 
-/* "struct_ops/" prefix is not a requirement
- * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS
- * as long as it is used in one of the func ptr
- * under SEC(".struct_ops").
- */
+/* "struct_ops/" prefix is a requirement */
 SEC("struct_ops/bpf_cubic_init")
 void BPF_PROG(bpf_cubic_init, struct sock *sk)
 {
@@ -188,10 +184,8 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-/* No prefix in SEC will also work.
- * The remaining tcp-cubic functions have an easier way.
- */
-SEC("no-sec-prefix-bictcp_cwnd_event")
+/* "struct_ops" prefix is a requirement */
+SEC("struct_ops/bpf_cubic_cwnd_event")
 void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_TX_START) {

From 91b4d1d1d54431c72f3a7ff034f30a635f787426 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 13 Sep 2021 18:47:31 -0700
Subject: [PATCH 37/63] libbpf: Ensure BPF prog types are set before
 relocations

Refactor bpf_object__open() sequencing to perform BPF program type
detection based on SEC() definitions before we get to relocations
collection. This allows to have more information about BPF program by
the time we get to, say, struct_ops relocation gathering. This,
subsequently, simplifies struct_ops logic and removes the need to
perform extra find_sec_def() resolution.

With this patch libbpf will require all struct_ops BPF programs to be
marked with SEC("struct_ops") or SEC("struct_ops/xxx") annotations.
Real-world applications are already doing that through something like
selftests's BPF_STRUCT_OPS() macro. This change streamlines libbpf's
internal handling of SEC() definitions and is in the sprit of
upcoming libbpf-1.0 section strictness changes ([0]).

  [0] https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0#stricter-and-more-uniform-bpf-program-section-name-sec-handling

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210914014733.2768-3-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 93 +++++++++++++++++++++++-------------------
 1 file changed, 51 insertions(+), 42 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9d99183bffba..6abfbf283092 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6373,12 +6373,37 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
 
 static const struct bpf_sec_def *find_sec_def(const char *sec_name);
 
+static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object_open_opts *opts)
+{
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj) {
+		prog->sec_def = find_sec_def(prog->sec_name);
+		if (!prog->sec_def) {
+			/* couldn't guess, but user might manually specify */
+			pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
+				prog->name, prog->sec_name);
+			continue;
+		}
+
+		if (prog->sec_def->is_sleepable)
+			prog->prog_flags |= BPF_F_SLEEPABLE;
+		bpf_program__set_type(prog, prog->sec_def->prog_type);
+		bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type);
+
+		if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
+		    prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
+			prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+	}
+
+	return 0;
+}
+
 static struct bpf_object *
 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 		   const struct bpf_object_open_opts *opts)
 {
 	const char *obj_name, *kconfig, *btf_tmp_path;
-	struct bpf_program *prog;
 	struct bpf_object *obj;
 	char tmp_name[64];
 	int err;
@@ -6436,31 +6461,13 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
 	err = err ? : bpf_object__collect_externs(obj);
 	err = err ? : bpf_object__finalize_btf(obj);
 	err = err ? : bpf_object__init_maps(obj, opts);
+	err = err ? : bpf_object_init_progs(obj, opts);
 	err = err ? : bpf_object__collect_relos(obj);
 	if (err)
 		goto out;
+
 	bpf_object__elf_finish(obj);
 
-	bpf_object__for_each_program(prog, obj) {
-		prog->sec_def = find_sec_def(prog->sec_name);
-		if (!prog->sec_def) {
-			/* couldn't guess, but user might manually specify */
-			pr_debug("prog '%s': unrecognized ELF section name '%s'\n",
-				prog->name, prog->sec_name);
-			continue;
-		}
-
-		if (prog->sec_def->is_sleepable)
-			prog->prog_flags |= BPF_F_SLEEPABLE;
-		bpf_program__set_type(prog, prog->sec_def->prog_type);
-		bpf_program__set_expected_attach_type(prog,
-				prog->sec_def->expected_attach_type);
-
-		if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
-		    prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
-			prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
-	}
-
 	return obj;
 out:
 	bpf_object__close(obj);
@@ -8250,35 +8257,37 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
 			return -EINVAL;
 		}
 
-		if (prog->type == BPF_PROG_TYPE_UNSPEC) {
-			const struct bpf_sec_def *sec_def;
+		/* prevent the use of BPF prog with invalid type */
+		if (prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
+			pr_warn("struct_ops reloc %s: prog %s is not struct_ops BPF program\n",
+				map->name, prog->name);
+			return -EINVAL;
+		}
 
-			sec_def = find_sec_def(prog->sec_name);
-			if (sec_def &&
-			    sec_def->prog_type != BPF_PROG_TYPE_STRUCT_OPS) {
-				/* for pr_warn */
-				prog->type = sec_def->prog_type;
-				goto invalid_prog;
-			}
-
-			prog->type = BPF_PROG_TYPE_STRUCT_OPS;
+		/* if we haven't yet processed this BPF program, record proper
+		 * attach_btf_id and member_idx
+		 */
+		if (!prog->attach_btf_id) {
 			prog->attach_btf_id = st_ops->type_id;
 			prog->expected_attach_type = member_idx;
-		} else if (prog->type != BPF_PROG_TYPE_STRUCT_OPS ||
-			   prog->attach_btf_id != st_ops->type_id ||
-			   prog->expected_attach_type != member_idx) {
-			goto invalid_prog;
 		}
+
+		/* struct_ops BPF prog can be re-used between multiple
+		 * .struct_ops as long as it's the same struct_ops struct
+		 * definition and the same function pointer field
+		 */
+		if (prog->attach_btf_id != st_ops->type_id ||
+		    prog->expected_attach_type != member_idx) {
+			pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
+				map->name, prog->name, prog->sec_name, prog->type,
+				prog->attach_btf_id, prog->expected_attach_type, name);
+			return -EINVAL;
+		}
+
 		st_ops->progs[member_idx] = prog;
 	}
 
 	return 0;
-
-invalid_prog:
-	pr_warn("struct_ops reloc %s: cannot use prog %s in sec %s with type %u attach_btf_id %u expected_attach_type %u for func ptr %s\n",
-		map->name, prog->name, prog->sec_name, prog->type,
-		prog->attach_btf_id, prog->expected_attach_type, name);
-	return -EINVAL;
 }
 
 #define BTF_TRACE_PREFIX "btf_trace_"

From 5532dfd42e4846e84d346a6dfe01e477e35baa65 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 13 Sep 2021 18:47:32 -0700
Subject: [PATCH 38/63] libbpf: Simplify BPF program auto-attach code

Remove the need to explicitly pass bpf_sec_def for auto-attachable BPF
programs, as it is already recorded at bpf_object__open() time for all
recognized type of BPF programs. This further reduces number of explicit
calls to find_sec_def(), simplifying further refactorings.

No functional changes are done by this patch.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210914014733.2768-4-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 61 +++++++++++++++---------------------------
 1 file changed, 22 insertions(+), 39 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6abfbf283092..e473fbd2b361 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -218,8 +218,7 @@ struct reloc_desc {
 
 struct bpf_sec_def;
 
-typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
-					struct bpf_program *prog);
+typedef struct bpf_link *(*attach_fn_t)(struct bpf_program *prog);
 
 struct bpf_sec_def {
 	const char *sec;
@@ -7920,18 +7919,12 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 	__VA_ARGS__							    \
 }
 
-static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
-				      struct bpf_program *prog);
-static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
-				  struct bpf_program *prog);
-static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
-				      struct bpf_program *prog);
-static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
-				     struct bpf_program *prog);
-static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
-				   struct bpf_program *prog);
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
-				    struct bpf_program *prog);
+static struct bpf_link *attach_kprobe(struct bpf_program *prog);
+static struct bpf_link *attach_tp(struct bpf_program *prog);
+static struct bpf_link *attach_raw_tp(struct bpf_program *prog);
+static struct bpf_link *attach_trace(struct bpf_program *prog);
+static struct bpf_link *attach_lsm(struct bpf_program *prog);
+static struct bpf_link *attach_iter(struct bpf_program *prog);
 
 static const struct bpf_sec_def section_defs[] = {
 	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
@@ -9391,8 +9384,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 	return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
 }
 
-static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
-				      struct bpf_program *prog)
+static struct bpf_link *attach_kprobe(struct bpf_program *prog)
 {
 	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
 	unsigned long offset = 0;
@@ -9401,8 +9393,8 @@ static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
 	char *func;
 	int n, err;
 
-	func_name = prog->sec_name + sec->len;
-	opts.retprobe = strcmp(sec->sec, "kretprobe/") == 0;
+	func_name = prog->sec_name + prog->sec_def->len;
+	opts.retprobe = strcmp(prog->sec_def->sec, "kretprobe/") == 0;
 
 	n = sscanf(func_name, "%m[a-zA-Z0-9_.]+%li", &func, &offset);
 	if (n < 1) {
@@ -9565,8 +9557,7 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
 	return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
 }
 
-static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
-				  struct bpf_program *prog)
+static struct bpf_link *attach_tp(struct bpf_program *prog)
 {
 	char *sec_name, *tp_cat, *tp_name;
 	struct bpf_link *link;
@@ -9576,7 +9567,7 @@ static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
 		return libbpf_err_ptr(-ENOMEM);
 
 	/* extract "tp/<category>/<name>" */
-	tp_cat = sec_name + sec->len;
+	tp_cat = sec_name + prog->sec_def->len;
 	tp_name = strchr(tp_cat, '/');
 	if (!tp_name) {
 		free(sec_name);
@@ -9620,10 +9611,9 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 	return link;
 }
 
-static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
-				      struct bpf_program *prog)
+static struct bpf_link *attach_raw_tp(struct bpf_program *prog)
 {
-	const char *tp_name = prog->sec_name + sec->len;
+	const char *tp_name = prog->sec_name + prog->sec_def->len;
 
 	return bpf_program__attach_raw_tracepoint(prog, tp_name);
 }
@@ -9668,14 +9658,12 @@ struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
 	return bpf_program__attach_btf_id(prog);
 }
 
-static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
-				     struct bpf_program *prog)
+static struct bpf_link *attach_trace(struct bpf_program *prog)
 {
 	return bpf_program__attach_trace(prog);
 }
 
-static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
-				   struct bpf_program *prog)
+static struct bpf_link *attach_lsm(struct bpf_program *prog)
 {
 	return bpf_program__attach_lsm(prog);
 }
@@ -9806,21 +9794,17 @@ bpf_program__attach_iter(struct bpf_program *prog,
 	return link;
 }
 
-static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
-				    struct bpf_program *prog)
+static struct bpf_link *attach_iter(struct bpf_program *prog)
 {
 	return bpf_program__attach_iter(prog, NULL);
 }
 
 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
 {
-	const struct bpf_sec_def *sec_def;
-
-	sec_def = find_sec_def(prog->sec_name);
-	if (!sec_def || !sec_def->attach_fn)
+	if (!prog->sec_def || !prog->sec_def->attach_fn)
 		return libbpf_err_ptr(-ESRCH);
 
-	return sec_def->attach_fn(sec_def, prog);
+	return prog->sec_def->attach_fn(prog);
 }
 
 static int bpf_link__detach_struct_ops(struct bpf_link *link)
@@ -10899,16 +10883,15 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 	for (i = 0; i < s->prog_cnt; i++) {
 		struct bpf_program *prog = *s->progs[i].prog;
 		struct bpf_link **link = s->progs[i].link;
-		const struct bpf_sec_def *sec_def;
 
 		if (!prog->load)
 			continue;
 
-		sec_def = find_sec_def(prog->sec_name);
-		if (!sec_def || !sec_def->attach_fn)
+		/* auto-attaching not supported for this program */
+		if (!prog->sec_def || !prog->sec_def->attach_fn)
 			continue;
 
-		*link = sec_def->attach_fn(sec_def, prog);
+		*link = prog->sec_def->attach_fn(prog);
 		err = libbpf_get_error(*link);
 		if (err) {
 			pr_warn("failed to auto-attach program '%s': %d\n",

From b6291a6f30d35bd4459dc35aac2f30669a4356ac Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 13 Sep 2021 18:47:33 -0700
Subject: [PATCH 39/63] libbpf: Minimize explicit iterator of section
 definition array

Remove almost all the code that explicitly iterated BPF program section
definitions in favor of using find_sec_def(). The only remaining user of
section_defs is libbpf_get_type_names that has to iterate all of them to
construct its result.

Having one internal API entry point for section definitions will
simplify further refactorings around libbpf's program section
definitions parsing.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20210914014733.2768-5-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 46 +++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e473fbd2b361..62ed073ea944 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8438,22 +8438,13 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
 	__u32 attach_prog_fd = prog->attach_prog_fd;
 	const char *name = prog->sec_name, *attach_name;
 	const struct bpf_sec_def *sec = NULL;
-	int i, err = 0;
+	int err = 0;
 
 	if (!name)
 		return -EINVAL;
 
-	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
-		if (!section_defs[i].is_attach_btf)
-			continue;
-		if (strncmp(name, section_defs[i].sec, section_defs[i].len))
-			continue;
-
-		sec = &section_defs[i];
-		break;
-	}
-
-	if (!sec) {
+	sec = find_sec_def(name);
+	if (!sec || !sec->is_attach_btf) {
 		pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name);
 		return -ESRCH;
 	}
@@ -8491,27 +8482,28 @@ int libbpf_attach_type_by_name(const char *name,
 			       enum bpf_attach_type *attach_type)
 {
 	char *type_names;
-	int i;
+	const struct bpf_sec_def *sec_def;
 
 	if (!name)
 		return libbpf_err(-EINVAL);
 
-	for (i = 0; i < ARRAY_SIZE(section_defs); i++) {
-		if (strncmp(name, section_defs[i].sec, section_defs[i].len))
-			continue;
-		if (!section_defs[i].is_attachable)
-			return libbpf_err(-EINVAL);
-		*attach_type = section_defs[i].expected_attach_type;
-		return 0;
-	}
-	pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
-	type_names = libbpf_get_type_names(true);
-	if (type_names != NULL) {
-		pr_debug("attachable section(type) names are:%s\n", type_names);
-		free(type_names);
+	sec_def = find_sec_def(name);
+	if (!sec_def) {
+		pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
+		type_names = libbpf_get_type_names(true);
+		if (type_names != NULL) {
+			pr_debug("attachable section(type) names are:%s\n", type_names);
+			free(type_names);
+		}
+
+		return libbpf_err(-EINVAL);
 	}
 
-	return libbpf_err(-EINVAL);
+	if (!sec_def->is_attachable)
+		return libbpf_err(-EINVAL);
+
+	*attach_type = sec_def->expected_attach_type;
+	return 0;
 }
 
 int bpf_map__fd(const struct bpf_map *map)

From c0354077439bc9adcc9f2c96d6bbaf8b13748317 Mon Sep 17 00:00:00 2001
From: Jie Meng <jmeng@fb.com>
Date: Mon, 13 Sep 2021 14:13:37 -0700
Subject: [PATCH 40/63] bpf,x64 Emit IMUL instead of MUL for x86-64

IMUL allows for multiple operands and saving and storing rax/rdx is no
longer needed. Signedness of the operands doesn't matter here because
the we only keep the lower 32/64 bit of the product for 32/64 bit
multiplications.

Signed-off-by: Jie Meng <jmeng@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210913211337.1564014-1-jmeng@fb.com
---
 arch/x86/net/bpf_jit_comp.c                | 55 ++++++++++------------
 tools/testing/selftests/bpf/verifier/jit.c | 22 +++++++--
 2 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 0fe6aacef3db..20d2d6a1f9de 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -1070,41 +1070,34 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			break;
 
 		case BPF_ALU | BPF_MUL | BPF_K:
-		case BPF_ALU | BPF_MUL | BPF_X:
 		case BPF_ALU64 | BPF_MUL | BPF_K:
-		case BPF_ALU64 | BPF_MUL | BPF_X:
-		{
-			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+			else if (is_ereg(dst_reg))
+				EMIT1(add_2mod(0x40, dst_reg, dst_reg));
 
-			if (dst_reg != BPF_REG_0)
-				EMIT1(0x50); /* push rax */
-			if (dst_reg != BPF_REG_3)
-				EMIT1(0x52); /* push rdx */
-
-			/* mov r11, dst_reg */
-			EMIT_mov(AUX_REG, dst_reg);
-
-			if (BPF_SRC(insn->code) == BPF_X)
-				emit_mov_reg(&prog, is64, BPF_REG_0, src_reg);
+			if (is_imm8(imm32))
+				/* imul dst_reg, dst_reg, imm8 */
+				EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg),
+				      imm32);
 			else
-				emit_mov_imm32(&prog, is64, BPF_REG_0, imm32);
-
-			if (is64)
-				EMIT1(add_1mod(0x48, AUX_REG));
-			else if (is_ereg(AUX_REG))
-				EMIT1(add_1mod(0x40, AUX_REG));
-			/* mul(q) r11 */
-			EMIT2(0xF7, add_1reg(0xE0, AUX_REG));
-
-			if (dst_reg != BPF_REG_3)
-				EMIT1(0x5A); /* pop rdx */
-			if (dst_reg != BPF_REG_0) {
-				/* mov dst_reg, rax */
-				EMIT_mov(dst_reg, BPF_REG_0);
-				EMIT1(0x58); /* pop rax */
-			}
+				/* imul dst_reg, dst_reg, imm32 */
+				EMIT2_off32(0x69,
+					    add_2reg(0xC0, dst_reg, dst_reg),
+					    imm32);
 			break;
-		}
+
+		case BPF_ALU | BPF_MUL | BPF_X:
+		case BPF_ALU64 | BPF_MUL | BPF_X:
+			if (BPF_CLASS(insn->code) == BPF_ALU64)
+				EMIT1(add_2mod(0x48, src_reg, dst_reg));
+			else if (is_ereg(dst_reg) || is_ereg(src_reg))
+				EMIT1(add_2mod(0x40, src_reg, dst_reg));
+
+			/* imul dst_reg, src_reg */
+			EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg));
+			break;
+
 			/* Shifts */
 		case BPF_ALU | BPF_LSH | BPF_K:
 		case BPF_ALU | BPF_RSH | BPF_K:
diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index df215e004566..eedcb752bf70 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -62,6 +62,11 @@
 	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+	BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
 	BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
 	BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
 	BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
@@ -73,11 +78,22 @@
 	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+	BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+	BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL),
+	BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
 	BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
 	BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
-	BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
-	BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
-	BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+	BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL),
+	BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_IMM(BPF_REG_0, 2),

From 8987ede3ed276f669eb88d952184e1d2bf42ea6b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Tue, 14 Sep 2021 09:22:28 -0700
Subject: [PATCH 41/63] selftests/bpf: Fix .gitignore to not ignore
 test_progs.c

List all possible test_progs flavors explicitly to avoid accidentally
ignoring valid source code files. In this case, test_progs.c was still
ignored after recent 809ed84de8b3 ("selftests/bpf: Whitelist test_progs.h
from .gitignore") fix that added exception only for test_progs.h.

Fixes: 74b5a5968fe8 ("selftests/bpf: Replace test_progs and test_maps w/ general rule")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210914162228.3995740-1-andrii@kernel.org
---
 tools/testing/selftests/bpf/.gitignore | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 433f8bef261e..1dad8d617da8 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -9,8 +9,9 @@ test_tag
 FEATURE-DUMP.libbpf
 fixdep
 test_dev_cgroup
-/test_progs*
-!test_progs.h
+/test_progs
+/test_progs-no_alu32
+/test_progs-bpf_gcc
 test_verifier_log
 feature
 test_sock

From 41ced4cd88020c9d4b71ff7c50d020f081efa4a0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:09 -0700
Subject: [PATCH 42/63] btf: Change BTF_KIND_* macros to enums

Change BTF_KIND_* macros to enums so they are encoded in dwarf and
appear in vmlinux.h. This will make it easier for bpf programs
to use these constants without macro definitions.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223009.245307-1-yhs@fb.com
---
 include/uapi/linux/btf.h       | 41 ++++++++++++++++++----------------
 tools/include/uapi/linux/btf.h | 41 ++++++++++++++++++----------------
 2 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index d27b1708efe9..10e401073dd1 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -56,25 +56,28 @@ struct btf_type {
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_INFO_KFLAG(info)	((info) >> 31)
 
-#define BTF_KIND_UNKN		0	/* Unknown	*/
-#define BTF_KIND_INT		1	/* Integer	*/
-#define BTF_KIND_PTR		2	/* Pointer	*/
-#define BTF_KIND_ARRAY		3	/* Array	*/
-#define BTF_KIND_STRUCT		4	/* Struct	*/
-#define BTF_KIND_UNION		5	/* Union	*/
-#define BTF_KIND_ENUM		6	/* Enumeration	*/
-#define BTF_KIND_FWD		7	/* Forward	*/
-#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
-#define BTF_KIND_VOLATILE	9	/* Volatile	*/
-#define BTF_KIND_CONST		10	/* Const	*/
-#define BTF_KIND_RESTRICT	11	/* Restrict	*/
-#define BTF_KIND_FUNC		12	/* Function	*/
-#define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
-#define BTF_KIND_VAR		14	/* Variable	*/
-#define BTF_KIND_DATASEC	15	/* Section	*/
-#define BTF_KIND_FLOAT		16	/* Floating point	*/
-#define BTF_KIND_MAX		BTF_KIND_FLOAT
-#define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
+enum {
+	BTF_KIND_UNKN		= 0,	/* Unknown	*/
+	BTF_KIND_INT		= 1,	/* Integer	*/
+	BTF_KIND_PTR		= 2,	/* Pointer	*/
+	BTF_KIND_ARRAY		= 3,	/* Array	*/
+	BTF_KIND_STRUCT		= 4,	/* Struct	*/
+	BTF_KIND_UNION		= 5,	/* Union	*/
+	BTF_KIND_ENUM		= 6,	/* Enumeration	*/
+	BTF_KIND_FWD		= 7,	/* Forward	*/
+	BTF_KIND_TYPEDEF	= 8,	/* Typedef	*/
+	BTF_KIND_VOLATILE	= 9,	/* Volatile	*/
+	BTF_KIND_CONST		= 10,	/* Const	*/
+	BTF_KIND_RESTRICT	= 11,	/* Restrict	*/
+	BTF_KIND_FUNC		= 12,	/* Function	*/
+	BTF_KIND_FUNC_PROTO	= 13,	/* Function Proto	*/
+	BTF_KIND_VAR		= 14,	/* Variable	*/
+	BTF_KIND_DATASEC	= 15,	/* Section	*/
+	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
+
+	NR_BTF_KINDS,
+	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
+};
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index d27b1708efe9..10e401073dd1 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -56,25 +56,28 @@ struct btf_type {
 #define BTF_INFO_VLEN(info)	((info) & 0xffff)
 #define BTF_INFO_KFLAG(info)	((info) >> 31)
 
-#define BTF_KIND_UNKN		0	/* Unknown	*/
-#define BTF_KIND_INT		1	/* Integer	*/
-#define BTF_KIND_PTR		2	/* Pointer	*/
-#define BTF_KIND_ARRAY		3	/* Array	*/
-#define BTF_KIND_STRUCT		4	/* Struct	*/
-#define BTF_KIND_UNION		5	/* Union	*/
-#define BTF_KIND_ENUM		6	/* Enumeration	*/
-#define BTF_KIND_FWD		7	/* Forward	*/
-#define BTF_KIND_TYPEDEF	8	/* Typedef	*/
-#define BTF_KIND_VOLATILE	9	/* Volatile	*/
-#define BTF_KIND_CONST		10	/* Const	*/
-#define BTF_KIND_RESTRICT	11	/* Restrict	*/
-#define BTF_KIND_FUNC		12	/* Function	*/
-#define BTF_KIND_FUNC_PROTO	13	/* Function Proto	*/
-#define BTF_KIND_VAR		14	/* Variable	*/
-#define BTF_KIND_DATASEC	15	/* Section	*/
-#define BTF_KIND_FLOAT		16	/* Floating point	*/
-#define BTF_KIND_MAX		BTF_KIND_FLOAT
-#define NR_BTF_KINDS		(BTF_KIND_MAX + 1)
+enum {
+	BTF_KIND_UNKN		= 0,	/* Unknown	*/
+	BTF_KIND_INT		= 1,	/* Integer	*/
+	BTF_KIND_PTR		= 2,	/* Pointer	*/
+	BTF_KIND_ARRAY		= 3,	/* Array	*/
+	BTF_KIND_STRUCT		= 4,	/* Struct	*/
+	BTF_KIND_UNION		= 5,	/* Union	*/
+	BTF_KIND_ENUM		= 6,	/* Enumeration	*/
+	BTF_KIND_FWD		= 7,	/* Forward	*/
+	BTF_KIND_TYPEDEF	= 8,	/* Typedef	*/
+	BTF_KIND_VOLATILE	= 9,	/* Volatile	*/
+	BTF_KIND_CONST		= 10,	/* Const	*/
+	BTF_KIND_RESTRICT	= 11,	/* Restrict	*/
+	BTF_KIND_FUNC		= 12,	/* Function	*/
+	BTF_KIND_FUNC_PROTO	= 13,	/* Function Proto	*/
+	BTF_KIND_VAR		= 14,	/* Variable	*/
+	BTF_KIND_DATASEC	= 15,	/* Section	*/
+	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
+
+	NR_BTF_KINDS,
+	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
+};
 
 /* For some specific BTF_KIND, "struct btf_type" is immediately
  * followed by extra data.

From b5ea834dde6b6e7f75e51d5f66dac8cd7c97b5ef Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:15 -0700
Subject: [PATCH 43/63] bpf: Support for new btf kind BTF_KIND_TAG

LLVM14 added support for a new C attribute ([1])
  __attribute__((btf_tag("arbitrary_str")))
This attribute will be emitted to dwarf ([2]) and pahole
will convert it to BTF. Or for bpf target, this
attribute will be emitted to BTF directly ([3], [4]).
The attribute is intended to provide additional
information for
  - struct/union type or struct/union member
  - static/global variables
  - static/global function or function parameter.

For linux kernel, the btf_tag can be applied
in various places to specify user pointer,
function pre- or post- condition, function
allow/deny in certain context, etc. Such information
will be encoded in vmlinux BTF and can be used
by verifier.

The btf_tag can also be applied to bpf programs
to help global verifiable functions, e.g.,
specifying preconditions, etc.

This patch added basic parsing and checking support
in kernel for new BTF_KIND_TAG kind.

 [1] https://reviews.llvm.org/D106614
 [2] https://reviews.llvm.org/D106621
 [3] https://reviews.llvm.org/D106622
 [4] https://reviews.llvm.org/D109560

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223015.245546-1-yhs@fb.com
---
 include/uapi/linux/btf.h       |  14 +++-
 kernel/bpf/btf.c               | 128 +++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/btf.h |  14 +++-
 3 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h
index 10e401073dd1..642b6ecb37d7 100644
--- a/include/uapi/linux/btf.h
+++ b/include/uapi/linux/btf.h
@@ -43,7 +43,7 @@ struct btf_type {
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-	 * FUNC, FUNC_PROTO and VAR.
+	 * FUNC, FUNC_PROTO, VAR and TAG.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -74,6 +74,7 @@ enum {
 	BTF_KIND_VAR		= 14,	/* Variable	*/
 	BTF_KIND_DATASEC	= 15,	/* Section	*/
 	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
+	BTF_KIND_TAG		= 17,	/* Tag */
 
 	NR_BTF_KINDS,
 	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
@@ -173,4 +174,15 @@ struct btf_var_secinfo {
 	__u32	size;
 };
 
+/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe
+ * additional information related to the tag applied location.
+ * If component_idx == -1, the tag is applied to a struct, union,
+ * variable or function. Otherwise, it is applied to a struct/union
+ * member or a func argument, and component_idx indicates which member
+ * or argument (0 ... vlen-1).
+ */
+struct btf_tag {
+       __s32   component_idx;
+};
+
 #endif /* _UAPI__LINUX_BTF_H__ */
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index dfe61df4f974..c3d605b22473 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -281,6 +281,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
 	[BTF_KIND_FLOAT]	= "FLOAT",
+	[BTF_KIND_TAG]		= "TAG",
 };
 
 const char *btf_type_str(const struct btf_type *t)
@@ -459,6 +460,17 @@ static bool btf_type_is_datasec(const struct btf_type *t)
 	return BTF_INFO_KIND(t->info) == BTF_KIND_DATASEC;
 }
 
+static bool btf_type_is_tag(const struct btf_type *t)
+{
+	return BTF_INFO_KIND(t->info) == BTF_KIND_TAG;
+}
+
+static bool btf_type_is_tag_target(const struct btf_type *t)
+{
+	return btf_type_is_func(t) || btf_type_is_struct(t) ||
+	       btf_type_is_var(t);
+}
+
 u32 btf_nr_types(const struct btf *btf)
 {
 	u32 total = 0;
@@ -537,6 +549,7 @@ const struct btf_type *btf_type_resolve_func_ptr(const struct btf *btf,
 static bool btf_type_is_resolve_source_only(const struct btf_type *t)
 {
 	return btf_type_is_var(t) ||
+	       btf_type_is_tag(t) ||
 	       btf_type_is_datasec(t);
 }
 
@@ -563,6 +576,7 @@ static bool btf_type_needs_resolve(const struct btf_type *t)
 	       btf_type_is_struct(t) ||
 	       btf_type_is_array(t) ||
 	       btf_type_is_var(t) ||
+	       btf_type_is_tag(t) ||
 	       btf_type_is_datasec(t);
 }
 
@@ -616,6 +630,11 @@ static const struct btf_var *btf_type_var(const struct btf_type *t)
 	return (const struct btf_var *)(t + 1);
 }
 
+static const struct btf_tag *btf_type_tag(const struct btf_type *t)
+{
+	return (const struct btf_tag *)(t + 1);
+}
+
 static const struct btf_kind_operations *btf_type_ops(const struct btf_type *t)
 {
 	return kind_ops[BTF_INFO_KIND(t->info)];
@@ -3801,6 +3820,110 @@ static const struct btf_kind_operations float_ops = {
 	.show = btf_df_show,
 };
 
+static s32 btf_tag_check_meta(struct btf_verifier_env *env,
+			      const struct btf_type *t,
+			      u32 meta_left)
+{
+	const struct btf_tag *tag;
+	u32 meta_needed = sizeof(*tag);
+	s32 component_idx;
+	const char *value;
+
+	if (meta_left < meta_needed) {
+		btf_verifier_log_basic(env, t,
+				       "meta_left:%u meta_needed:%u",
+				       meta_left, meta_needed);
+		return -EINVAL;
+	}
+
+	value = btf_name_by_offset(env->btf, t->name_off);
+	if (!value || !value[0]) {
+		btf_verifier_log_type(env, t, "Invalid value");
+		return -EINVAL;
+	}
+
+	if (btf_type_vlen(t)) {
+		btf_verifier_log_type(env, t, "vlen != 0");
+		return -EINVAL;
+	}
+
+	if (btf_type_kflag(t)) {
+		btf_verifier_log_type(env, t, "Invalid btf_info kind_flag");
+		return -EINVAL;
+	}
+
+	component_idx = btf_type_tag(t)->component_idx;
+	if (component_idx < -1) {
+		btf_verifier_log_type(env, t, "Invalid component_idx");
+		return -EINVAL;
+	}
+
+	btf_verifier_log_type(env, t, NULL);
+
+	return meta_needed;
+}
+
+static int btf_tag_resolve(struct btf_verifier_env *env,
+			   const struct resolve_vertex *v)
+{
+	const struct btf_type *next_type;
+	const struct btf_type *t = v->t;
+	u32 next_type_id = t->type;
+	struct btf *btf = env->btf;
+	s32 component_idx;
+	u32 vlen;
+
+	next_type = btf_type_by_id(btf, next_type_id);
+	if (!next_type || !btf_type_is_tag_target(next_type)) {
+		btf_verifier_log_type(env, v->t, "Invalid type_id");
+		return -EINVAL;
+	}
+
+	if (!env_type_is_resolve_sink(env, next_type) &&
+	    !env_type_is_resolved(env, next_type_id))
+		return env_stack_push(env, next_type, next_type_id);
+
+	component_idx = btf_type_tag(t)->component_idx;
+	if (component_idx != -1) {
+		if (btf_type_is_var(next_type)) {
+			btf_verifier_log_type(env, v->t, "Invalid component_idx");
+			return -EINVAL;
+		}
+
+		if (btf_type_is_struct(next_type)) {
+			vlen = btf_type_vlen(next_type);
+		} else {
+			/* next_type should be a function */
+			next_type = btf_type_by_id(btf, next_type->type);
+			vlen = btf_type_vlen(next_type);
+		}
+
+		if ((u32)component_idx >= vlen) {
+			btf_verifier_log_type(env, v->t, "Invalid component_idx");
+			return -EINVAL;
+		}
+	}
+
+	env_stack_pop_resolved(env, next_type_id, 0);
+
+	return 0;
+}
+
+static void btf_tag_log(struct btf_verifier_env *env, const struct btf_type *t)
+{
+	btf_verifier_log(env, "type=%u component_idx=%d", t->type,
+			 btf_type_tag(t)->component_idx);
+}
+
+static const struct btf_kind_operations tag_ops = {
+	.check_meta = btf_tag_check_meta,
+	.resolve = btf_tag_resolve,
+	.check_member = btf_df_check_member,
+	.check_kflag_member = btf_df_check_kflag_member,
+	.log_details = btf_tag_log,
+	.show = btf_df_show,
+};
+
 static int btf_func_proto_check(struct btf_verifier_env *env,
 				const struct btf_type *t)
 {
@@ -3935,6 +4058,7 @@ static const struct btf_kind_operations * const kind_ops[NR_BTF_KINDS] = {
 	[BTF_KIND_VAR] = &var_ops,
 	[BTF_KIND_DATASEC] = &datasec_ops,
 	[BTF_KIND_FLOAT] = &float_ops,
+	[BTF_KIND_TAG] = &tag_ops,
 };
 
 static s32 btf_check_meta(struct btf_verifier_env *env,
@@ -4019,6 +4143,10 @@ static bool btf_resolve_valid(struct btf_verifier_env *env,
 		return !btf_resolved_type_id(btf, type_id) &&
 		       !btf_resolved_type_size(btf, type_id);
 
+	if (btf_type_is_tag(t))
+		return btf_resolved_type_id(btf, type_id) &&
+		       !btf_resolved_type_size(btf, type_id);
+
 	if (btf_type_is_modifier(t) || btf_type_is_ptr(t) ||
 	    btf_type_is_var(t)) {
 		t = btf_type_id_resolve(btf, &type_id);
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 10e401073dd1..642b6ecb37d7 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -43,7 +43,7 @@ struct btf_type {
 	 * "size" tells the size of the type it is describing.
 	 *
 	 * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-	 * FUNC, FUNC_PROTO and VAR.
+	 * FUNC, FUNC_PROTO, VAR and TAG.
 	 * "type" is a type_id referring to another type.
 	 */
 	union {
@@ -74,6 +74,7 @@ enum {
 	BTF_KIND_VAR		= 14,	/* Variable	*/
 	BTF_KIND_DATASEC	= 15,	/* Section	*/
 	BTF_KIND_FLOAT		= 16,	/* Floating point	*/
+	BTF_KIND_TAG		= 17,	/* Tag */
 
 	NR_BTF_KINDS,
 	BTF_KIND_MAX		= NR_BTF_KINDS - 1,
@@ -173,4 +174,15 @@ struct btf_var_secinfo {
 	__u32	size;
 };
 
+/* BTF_KIND_TAG is followed by a single "struct btf_tag" to describe
+ * additional information related to the tag applied location.
+ * If component_idx == -1, the tag is applied to a struct, union,
+ * variable or function. Otherwise, it is applied to a struct/union
+ * member or a func argument, and component_idx indicates which member
+ * or argument (0 ... vlen-1).
+ */
+struct btf_tag {
+       __s32   component_idx;
+};
+
 #endif /* _UAPI__LINUX_BTF_H__ */

From 30025e8bd80fdc5a4159ec7f9511121ea561f456 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:20 -0700
Subject: [PATCH 44/63] libbpf: Rename btf_{hash,equal}_int to
 btf_{hash,equal}_int_tag

This patch renames functions btf_{hash,equal}_int() to
btf_{hash,equal}_int_tag() so they can be reused for
BTF_KIND_TAG support. There is no functionality change for
this patch.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223020.245829-1-yhs@fb.com
---
 tools/lib/bpf/btf.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 77dc24d58302..7cb6ebf1be37 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -3256,8 +3256,8 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
 	       t1->size == t2->size;
 }
 
-/* Calculate type signature hash of INT. */
-static long btf_hash_int(struct btf_type *t)
+/* Calculate type signature hash of INT or TAG. */
+static long btf_hash_int_tag(struct btf_type *t)
 {
 	__u32 info = *(__u32 *)(t + 1);
 	long h;
@@ -3267,8 +3267,8 @@ static long btf_hash_int(struct btf_type *t)
 	return h;
 }
 
-/* Check structural equality of two INTs. */
-static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2)
+/* Check structural equality of two INTs or TAGs. */
+static bool btf_equal_int_tag(struct btf_type *t1, struct btf_type *t2)
 {
 	__u32 info1, info2;
 
@@ -3535,7 +3535,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
 			h = btf_hash_common(t);
 			break;
 		case BTF_KIND_INT:
-			h = btf_hash_int(t);
+			h = btf_hash_int_tag(t);
 			break;
 		case BTF_KIND_ENUM:
 			h = btf_hash_enum(t);
@@ -3593,11 +3593,11 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 		return 0;
 
 	case BTF_KIND_INT:
-		h = btf_hash_int(t);
+		h = btf_hash_int_tag(t);
 		for_each_dedup_cand(d, hash_entry, h) {
 			cand_id = (__u32)(long)hash_entry->value;
 			cand = btf_type_by_id(d->btf, cand_id);
-			if (btf_equal_int(t, cand)) {
+			if (btf_equal_int_tag(t, cand)) {
 				new_id = cand_id;
 				break;
 			}
@@ -3881,7 +3881,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
 
 	switch (cand_kind) {
 	case BTF_KIND_INT:
-		return btf_equal_int(cand_type, canon_type);
+		return btf_equal_int_tag(cand_type, canon_type);
 
 	case BTF_KIND_ENUM:
 		if (d->opts.dont_resolve_fwds)

From 5b84bd10363e36ceb7c4c1ae749a3fc8adf8df45 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:25 -0700
Subject: [PATCH 45/63] libbpf: Add support for BTF_KIND_TAG

Add BTF_KIND_TAG support for parsing and dedup.
Also added sanitization for BTF_KIND_TAG. If BTF_KIND_TAG is not
supported in the kernel, sanitize it to INTs.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223025.246687-1-yhs@fb.com
---
 tools/lib/bpf/btf.c             | 68 +++++++++++++++++++++++++++++++++
 tools/lib/bpf/btf.h             | 15 ++++++++
 tools/lib/bpf/btf_dump.c        |  3 ++
 tools/lib/bpf/libbpf.c          | 31 +++++++++++++--
 tools/lib/bpf/libbpf.map        |  2 +
 tools/lib/bpf/libbpf_internal.h |  2 +
 6 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 7cb6ebf1be37..6ad63e4d418a 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -304,6 +304,8 @@ static int btf_type_size(const struct btf_type *t)
 		return base_size + sizeof(struct btf_var);
 	case BTF_KIND_DATASEC:
 		return base_size + vlen * sizeof(struct btf_var_secinfo);
+	case BTF_KIND_TAG:
+		return base_size + sizeof(struct btf_tag);
 	default:
 		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
 		return -EINVAL;
@@ -376,6 +378,9 @@ static int btf_bswap_type_rest(struct btf_type *t)
 			v->size = bswap_32(v->size);
 		}
 		return 0;
+	case BTF_KIND_TAG:
+		btf_tag(t)->component_idx = bswap_32(btf_tag(t)->component_idx);
+		return 0;
 	default:
 		pr_debug("Unsupported BTF_KIND:%u\n", btf_kind(t));
 		return -EINVAL;
@@ -586,6 +591,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
 		case BTF_KIND_CONST:
 		case BTF_KIND_RESTRICT:
 		case BTF_KIND_VAR:
+		case BTF_KIND_TAG:
 			type_id = t->type;
 			break;
 		case BTF_KIND_ARRAY:
@@ -2440,6 +2446,48 @@ int btf__add_datasec_var_info(struct btf *btf, int var_type_id, __u32 offset, __
 	return 0;
 }
 
+/*
+ * Append new BTF_KIND_TAG type with:
+ *   - *value* - non-empty/non-NULL string;
+ *   - *ref_type_id* - referenced type ID, it might not exist yet;
+ *   - *component_idx* - -1 for tagging reference type, otherwise struct/union
+ *     member or function argument index;
+ * Returns:
+ *   - >0, type ID of newly added BTF type;
+ *   - <0, on error.
+ */
+int btf__add_tag(struct btf *btf, const char *value, int ref_type_id,
+		 int component_idx)
+{
+	struct btf_type *t;
+	int sz, value_off;
+
+	if (!value || !value[0] || component_idx < -1)
+		return libbpf_err(-EINVAL);
+
+	if (validate_type_id(ref_type_id))
+		return libbpf_err(-EINVAL);
+
+	if (btf_ensure_modifiable(btf))
+		return libbpf_err(-ENOMEM);
+
+	sz = sizeof(struct btf_type) + sizeof(struct btf_tag);
+	t = btf_add_type_mem(btf, sz);
+	if (!t)
+		return libbpf_err(-ENOMEM);
+
+	value_off = btf__add_str(btf, value);
+	if (value_off < 0)
+		return value_off;
+
+	t->name_off = value_off;
+	t->info = btf_type_info(BTF_KIND_TAG, 0, false);
+	t->type = ref_type_id;
+	btf_tag(t)->component_idx = component_idx;
+
+	return btf_commit_type(btf, sz);
+}
+
 struct btf_ext_sec_setup_param {
 	__u32 off;
 	__u32 len;
@@ -3535,6 +3583,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
 			h = btf_hash_common(t);
 			break;
 		case BTF_KIND_INT:
+		case BTF_KIND_TAG:
 			h = btf_hash_int_tag(t);
 			break;
 		case BTF_KIND_ENUM:
@@ -3590,6 +3639,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
 	case BTF_KIND_FUNC_PROTO:
 	case BTF_KIND_VAR:
 	case BTF_KIND_DATASEC:
+	case BTF_KIND_TAG:
 		return 0;
 
 	case BTF_KIND_INT:
@@ -4210,6 +4260,23 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
 		}
 		break;
 
+	case BTF_KIND_TAG:
+		ref_type_id = btf_dedup_ref_type(d, t->type);
+		if (ref_type_id < 0)
+			return ref_type_id;
+		t->type = ref_type_id;
+
+		h = btf_hash_int_tag(t);
+		for_each_dedup_cand(d, hash_entry, h) {
+			cand_id = (__u32)(long)hash_entry->value;
+			cand = btf_type_by_id(d->btf, cand_id);
+			if (btf_equal_int_tag(t, cand)) {
+				new_id = cand_id;
+				break;
+			}
+		}
+		break;
+
 	case BTF_KIND_ARRAY: {
 		struct btf_array *info = btf_array(t);
 
@@ -4482,6 +4549,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct
 	case BTF_KIND_TYPEDEF:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_VAR:
+	case BTF_KIND_TAG:
 		return visit(&t->type, ctx);
 
 	case BTF_KIND_ARRAY: {
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index f2e2fab950b7..659ea8a2769b 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -143,6 +143,10 @@ LIBBPF_API int btf__add_datasec(struct btf *btf, const char *name, __u32 byte_sz
 LIBBPF_API int btf__add_datasec_var_info(struct btf *btf, int var_type_id,
 					 __u32 offset, __u32 byte_sz);
 
+/* tag construction API */
+LIBBPF_API int btf__add_tag(struct btf *btf, const char *value, int ref_type_id,
+			    int component_idx);
+
 struct btf_dedup_opts {
 	unsigned int dedup_table_size;
 	bool dont_resolve_fwds;
@@ -330,6 +334,11 @@ static inline bool btf_is_float(const struct btf_type *t)
 	return btf_kind(t) == BTF_KIND_FLOAT;
 }
 
+static inline bool btf_is_tag(const struct btf_type *t)
+{
+	return btf_kind(t) == BTF_KIND_TAG;
+}
+
 static inline __u8 btf_int_encoding(const struct btf_type *t)
 {
 	return BTF_INT_ENCODING(*(__u32 *)(t + 1));
@@ -398,6 +407,12 @@ btf_var_secinfos(const struct btf_type *t)
 	return (struct btf_var_secinfo *)(t + 1);
 }
 
+struct btf_tag;
+static inline struct btf_tag *btf_tag(const struct btf_type *t)
+{
+	return (struct btf_tag *)(t + 1);
+}
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index e4b483f15fb9..ad6df97295ae 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -316,6 +316,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
 		case BTF_KIND_TYPEDEF:
 		case BTF_KIND_FUNC:
 		case BTF_KIND_VAR:
+		case BTF_KIND_TAG:
 			d->type_states[t->type].referenced = 1;
 			break;
 
@@ -583,6 +584,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
 	case BTF_KIND_FUNC:
 	case BTF_KIND_VAR:
 	case BTF_KIND_DATASEC:
+	case BTF_KIND_TAG:
 		d->type_states[id].order_state = ORDERED;
 		return 0;
 
@@ -2215,6 +2217,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,
 	case BTF_KIND_FWD:
 	case BTF_KIND_FUNC:
 	case BTF_KIND_FUNC_PROTO:
+	case BTF_KIND_TAG:
 		err = btf_dump_unsupported_data(d, t, id);
 		break;
 	case BTF_KIND_INT:
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 62ed073ea944..62a43c408d73 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -195,6 +195,8 @@ enum kern_feature_id {
 	FEAT_BTF_FLOAT,
 	/* BPF perf link support */
 	FEAT_PERF_LINK,
+	/* BTF_KIND_TAG support */
+	FEAT_BTF_TAG,
 	__FEAT_CNT,
 };
 
@@ -1986,6 +1988,7 @@ static const char *__btf_kind_str(__u16 kind)
 	case BTF_KIND_VAR: return "var";
 	case BTF_KIND_DATASEC: return "datasec";
 	case BTF_KIND_FLOAT: return "float";
+	case BTF_KIND_TAG: return "tag";
 	default: return "unknown";
 	}
 }
@@ -2485,8 +2488,9 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
 	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
+	bool has_tag = kernel_supports(obj, FEAT_BTF_TAG);
 
-	return !has_func || !has_datasec || !has_func_global || !has_float;
+	return !has_func || !has_datasec || !has_func_global || !has_float || !has_tag;
 }
 
 static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
@@ -2495,14 +2499,15 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
 	bool has_datasec = kernel_supports(obj, FEAT_BTF_DATASEC);
 	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);
 	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);
+	bool has_tag = kernel_supports(obj, FEAT_BTF_TAG);
 	struct btf_type *t;
 	int i, j, vlen;
 
 	for (i = 1; i <= btf__get_nr_types(btf); i++) {
 		t = (struct btf_type *)btf__type_by_id(btf, i);
 
-		if (!has_datasec && btf_is_var(t)) {
-			/* replace VAR with INT */
+		if ((!has_datasec && btf_is_var(t)) || (!has_tag && btf_is_tag(t))) {
+			/* replace VAR/TAG with INT */
 			t->info = BTF_INFO_ENC(BTF_KIND_INT, 0, 0);
 			/*
 			 * using size = 1 is the safest choice, 4 will be too
@@ -4212,6 +4217,23 @@ static int probe_kern_btf_float(void)
 					     strs, sizeof(strs)));
 }
 
+static int probe_kern_btf_tag(void)
+{
+	static const char strs[] = "\0tag";
+	__u32 types[] = {
+		/* int */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
+		/* VAR x */                                     /* [2] */
+		BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+		BTF_VAR_STATIC,
+		/* attr */
+		BTF_TYPE_TAG_ENC(1, 2, -1),
+	};
+
+	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+					     strs, sizeof(strs)));
+}
+
 static int probe_kern_array_mmap(void)
 {
 	struct bpf_create_map_attr attr = {
@@ -4428,6 +4450,9 @@ static struct kern_feature_desc {
 	[FEAT_PERF_LINK] = {
 		"BPF perf link support", probe_perf_link,
 	},
+	[FEAT_BTF_TAG] = {
+		"BTF_KIND_TAG support", probe_kern_btf_tag,
+	},
 };
 
 static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 78ea62c9346f..9e649cf9e771 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -388,4 +388,6 @@ LIBBPF_0.5.0 {
 } LIBBPF_0.4.0;
 
 LIBBPF_0.6.0 {
+	global:
+		btf__add_tag;
 } LIBBPF_0.5.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 4f6ff5c23695..ceb0c98979bc 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -69,6 +69,8 @@
 #define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
 #define BTF_TYPE_FLOAT_ENC(name, sz) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+#define BTF_TYPE_TAG_ENC(value, type, component_idx) \
+	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx)
 
 #ifndef likely
 #define likely(x) __builtin_expect(!!(x), 1)

From 5c07f2fec00361fb5e8cff8ba7fdede7b29f38bd Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:31 -0700
Subject: [PATCH 46/63] bpftool: Add support for BTF_KIND_TAG

Added bpftool support to dump BTF_KIND_TAG information.
The new bpftool will be used in later patches to dump
btf in the test bpf program object file.

Currently, the tags are not emitted with
  bpftool btf dump file <path> format c
and they are silently ignored.  The tag information is
mostly used in the kernel for verification purpose and the kernel
uses its own btf to check. With adding these tags
to vmlinux.h, tags will be encoded in program's btf but
they will not be used by the kernel, at least for now.
So let us delay adding these tags to format C header files
until there is a real need.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223031.246951-1-yhs@fb.com
---
 tools/bpf/bpftool/btf.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index f7e5ff3586c9..49743ad96851 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -37,6 +37,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
 	[BTF_KIND_FLOAT]	= "FLOAT",
+	[BTF_KIND_TAG]		= "TAG",
 };
 
 struct btf_attach_table {
@@ -347,6 +348,17 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
 			printf(" size=%u", t->size);
 		break;
 	}
+	case BTF_KIND_TAG: {
+		const struct btf_tag *tag = (const void *)(t + 1);
+
+		if (json_output) {
+			jsonw_uint_field(w, "type_id", t->type);
+			jsonw_int_field(w, "component_idx", tag->component_idx);
+		} else {
+			printf(" type_id=%u component_idx=%d", t->type, tag->component_idx);
+		}
+		break;
+	}
 	default:
 		break;
 	}

From 71d29c2d47d112404fe23e31cf33f7cccde75a8c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:36 -0700
Subject: [PATCH 47/63] selftests/bpf: Test libbpf API function btf__add_tag()

Add btf_write tests with btf__add_tag() function.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223036.247560-1-yhs@fb.com
---
 tools/testing/selftests/bpf/btf_helpers.c     |  7 ++++++-
 .../selftests/bpf/prog_tests/btf_write.c      | 21 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index b692e6ead9b5..ce103fb0ad1b 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -24,11 +24,12 @@ static const char * const btf_kind_str_mapping[] = {
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
 	[BTF_KIND_FLOAT]	= "FLOAT",
+	[BTF_KIND_TAG]		= "TAG",
 };
 
 static const char *btf_kind_str(__u16 kind)
 {
-	if (kind > BTF_KIND_DATASEC)
+	if (kind > BTF_KIND_TAG)
 		return "UNKNOWN";
 	return btf_kind_str_mapping[kind];
 }
@@ -177,6 +178,10 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
 	case BTF_KIND_FLOAT:
 		fprintf(out, " size=%u", t->size);
 		break;
+	case BTF_KIND_TAG:
+		fprintf(out, " type_id=%u component_idx=%d",
+			t->type, btf_tag(t)->component_idx);
+		break;
 	default:
 		break;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index 022c7d89d6f4..76548eecce2c 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -281,5 +281,26 @@ void test_btf_write() {
 		     "[17] DATASEC 'datasec1' size=12 vlen=1\n"
 		     "\ttype_id=1 offset=4 size=8", "raw_dump");
 
+	/* TAG */
+	id = btf__add_tag(btf, "tag1", 16, -1);
+	ASSERT_EQ(id, 18, "tag_id");
+	t = btf__type_by_id(btf, 18);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_TAG, "tag_kind");
+	ASSERT_EQ(t->type, 16, "tag_type");
+	ASSERT_EQ(btf_tag(t)->component_idx, -1, "tag_component_idx");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 18),
+		     "[18] TAG 'tag1' type_id=16 component_idx=-1", "raw_dump");
+
+	id = btf__add_tag(btf, "tag2", 14, 1);
+	ASSERT_EQ(id, 19, "tag_id");
+	t = btf__type_by_id(btf, 19);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_TAG, "tag_kind");
+	ASSERT_EQ(t->type, 14, "tag_type");
+	ASSERT_EQ(btf_tag(t)->component_idx, 1, "tag_component_idx");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 19),
+		     "[19] TAG 'tag2' type_id=14 component_idx=1", "raw_dump");
+
 	btf__free(btf);
 }

From 3df3bd68d4811bccc74adc04d4d84512957a1a07 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:41 -0700
Subject: [PATCH 48/63] selftests/bpf: Change NAME_NTH/IS_NAME_NTH for
 BTF_KIND_TAG format

BTF_KIND_TAG ELF format has a component_idx which might have value -1.
test_btf may confuse it with common_type.name as NAME_NTH checkes
high 16bit to be 0xffff. Change NAME_NTH high 16bit check to be
0xfffe so it won't confuse with component_idx.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223041.248009-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 649f87382c8d..ad39f4d588d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -39,8 +39,8 @@ static bool always_log;
 #define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
 
-#define NAME_NTH(N) (0xffff0000 | N)
-#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000)
+#define NAME_NTH(N) (0xfffe0000 | N)
+#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xfffe0000)
 #define GET_NAME_NTH_IDX(X) (X & 0x0000ffff)
 
 #define MAX_NR_RAW_U32 1024

From 35baba7a832fa466b4aa2e0d00473d795cec8f20 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:47 -0700
Subject: [PATCH 49/63] selftests/bpf: Add BTF_KIND_TAG unit tests

Test good and bad variants of BTF_KIND_TAG encoding.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223047.248223-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 245 +++++++++++++++++++
 tools/testing/selftests/bpf/test_btf.h       |   3 +
 2 files changed, 248 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ad39f4d588d0..b4e86a8423cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3661,6 +3661,249 @@ static struct btf_raw_test raw_tests[] = {
 	.err_str = "Invalid type_size",
 },
 
+{
+	.descr = "tag test #1, struct/member, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_STRUCT_ENC(0, 2, 8),			/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+		BTF_TAG_ENC(NAME_TBD, 2, -1),
+		BTF_TAG_ENC(NAME_TBD, 2, 0),
+		BTF_TAG_ENC(NAME_TBD, 2, 1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+},
+{
+	.descr = "tag test #2, union/member, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_UNION_ENC(NAME_TBD, 2, 4),			/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_TAG_ENC(NAME_TBD, 2, -1),
+		BTF_TAG_ENC(NAME_TBD, 2, 0),
+		BTF_TAG_ENC(NAME_TBD, 2, 1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+},
+{
+	.descr = "tag test #3, variable, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_VAR_ENC(NAME_TBD, 1, 1),			/* [3] */
+		BTF_TAG_ENC(NAME_TBD, 2, -1),
+		BTF_TAG_ENC(NAME_TBD, 3, -1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0global\0tag1\0tag2"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
+	.descr = "tag test #4, func/parameter, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
+		BTF_TAG_ENC(NAME_TBD, 3, -1),
+		BTF_TAG_ENC(NAME_TBD, 3, 0),
+		BTF_TAG_ENC(NAME_TBD, 3, 1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
+	.descr = "tag test #5, invalid value",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_TAG_ENC(0, 2, -1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid value",
+},
+{
+	.descr = "tag test #6, invalid target type",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TAG_ENC(NAME_TBD, 1, -1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type",
+},
+{
+	.descr = "tag test #7, invalid vlen",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TAG, 0, 1), 2), (0),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "vlen != 0",
+},
+{
+	.descr = "tag test #8, invalid kflag",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_TAG, 1, 0), 2), (-1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid btf_info kind_flag",
+},
+{
+	.descr = "tag test #9, var, invalid component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_TAG_ENC(NAME_TBD, 2, 0),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+{
+	.descr = "tag test #10, struct member, invalid component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_STRUCT_ENC(0, 2, 8),			/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+		BTF_TAG_ENC(NAME_TBD, 2, 2),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0m1\0m2\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+{
+	.descr = "tag test #11, func parameter, invalid component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
+		BTF_TAG_ENC(NAME_TBD, 3, 2),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+{
+	.descr = "tag test #12, < -1 component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
+		BTF_TAG_ENC(NAME_TBD, 3, -2),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+
 }; /* struct btf_raw_test raw_tests[] */
 
 static const char *get_next_str(const char *start, const char *end)
@@ -6801,6 +7044,8 @@ static int btf_type_size(const struct btf_type *t)
 		return base_size + sizeof(struct btf_var);
 	case BTF_KIND_DATASEC:
 		return base_size + vlen * sizeof(struct btf_var_secinfo);
+	case BTF_KIND_TAG:
+		return base_size + sizeof(struct btf_tag);
 	default:
 		fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind);
 		return -EINVAL;
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index e2394eea4b7f..0619e06d745e 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -69,4 +69,7 @@
 #define BTF_TYPE_FLOAT_ENC(name, sz) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
 
+#define BTF_TAG_ENC(value, type, component_idx)	\
+	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TAG, 0, 0), type), (component_idx)
+
 #endif /* _TEST_BTF_H */

From ad526474aec1d9abb9838e3fb1330bb991715c22 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:52 -0700
Subject: [PATCH 50/63] selftests/bpf: Test BTF_KIND_TAG for deduplication

Add unit tests for BTF_KIND_TAG deduplication for
  - struct and struct member
  - variable
  - func and func argument

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223052.248535-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/btf.c | 192 +++++++++++++++++--
 1 file changed, 175 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index b4e86a8423cb..9c85d7d27409 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -6664,27 +6664,33 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_MEMBER_ENC(NAME_NTH(4), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(5), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(6), 1, 640),	/* int c;		*/
-				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),	/* float d;		*/
+				BTF_MEMBER_ENC(NAME_NTH(8), 15, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
 			BTF_PTR_ENC(6),							/* [5] */
 			/* const -> [1] int */
 			BTF_CONST_ENC(1),						/* [6] */
+			/* tag -> [3] struct s */
+			BTF_TAG_ENC(NAME_NTH(2), 3, -1),				/* [7] */
+			/* tag -> [3] struct s, member 1 */
+			BTF_TAG_ENC(NAME_NTH(2), 3, 1),					/* [8] */
 
 			/* full copy of the above */
-			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [7] */
-			BTF_TYPE_ARRAY_ENC(7, 7, 16),					/* [8] */
-			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [9] */
-				BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
-				BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
-				BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
-				BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
-				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
-			BTF_PTR_ENC(9),							/* [10] */
-			BTF_PTR_ENC(12),						/* [11] */
-			BTF_CONST_ENC(7),						/* [12] */
-			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [13] */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [9] */
+			BTF_TYPE_ARRAY_ENC(9, 9, 16),					/* [10] */
+			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [11] */
+				BTF_MEMBER_ENC(NAME_NTH(3), 12, 0),
+				BTF_MEMBER_ENC(NAME_NTH(4), 13, 64),
+				BTF_MEMBER_ENC(NAME_NTH(5), 10, 128),
+				BTF_MEMBER_ENC(NAME_NTH(6), 9, 640),
+				BTF_MEMBER_ENC(NAME_NTH(8), 15, 672),
+			BTF_PTR_ENC(11),						/* [12] */
+			BTF_PTR_ENC(14),						/* [13] */
+			BTF_CONST_ENC(9),						/* [14] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [15] */
+			BTF_TAG_ENC(NAME_NTH(2), 11, -1),				/* [16] */
+			BTF_TAG_ENC(NAME_NTH(2), 11, 1),				/* [17] */
 			BTF_END_RAW,
 		},
 		BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
@@ -6701,14 +6707,16 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_MEMBER_ENC(NAME_NTH(1), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(2), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(3), 1, 640),	/* int c;		*/
-				BTF_MEMBER_ENC(NAME_NTH(4), 7, 672),	/* float d;		*/
+				BTF_MEMBER_ENC(NAME_NTH(4), 9, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
 			BTF_PTR_ENC(6),							/* [5] */
 			/* const -> [1] int */
 			BTF_CONST_ENC(1),						/* [6] */
-			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [7] */
+			BTF_TAG_ENC(NAME_NTH(2), 3, -1),				/* [7] */
+			BTF_TAG_ENC(NAME_NTH(2), 3, 1),					/* [8] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [9] */
 			BTF_END_RAW,
 		},
 		BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
@@ -6833,9 +6841,11 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
 			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
+			BTF_TAG_ENC(NAME_TBD, 13, -1),					/* [15] tag */
+			BTF_TAG_ENC(NAME_TBD, 13, 1),					/* [16] tag */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"),
 	},
 	.expect = {
 		.raw_types = {
@@ -6859,9 +6869,11 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
 			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
+			BTF_TAG_ENC(NAME_TBD, 13, -1),					/* [15] tag */
+			BTF_TAG_ENC(NAME_TBD, 13, 1),					/* [16] tag */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
@@ -7010,6 +7022,152 @@ const struct btf_dedup_test dedup_tests[] = {
 		.dedup_table_size = 1
 	},
 },
+{
+	.descr = "dedup: func/func_arg/var tags",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* static int t */
+			BTF_VAR_ENC(NAME_NTH(1), 1, 0),			/* [2] */
+			/* void f(int a1, int a2) */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+			BTF_FUNC_ENC(NAME_NTH(4), 2),			/* [4] */
+			/* tag -> t */
+			BTF_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
+			BTF_TAG_ENC(NAME_NTH(5), 2, -1),		/* [6] */
+			/* tag -> func */
+			BTF_TAG_ENC(NAME_NTH(5), 4, -1),		/* [7] */
+			BTF_TAG_ENC(NAME_NTH(5), 4, -1),		/* [8] */
+			/* tag -> func arg a1 */
+			BTF_TAG_ENC(NAME_NTH(5), 4, 1),			/* [9] */
+			BTF_TAG_ENC(NAME_NTH(5), 4, 1),			/* [10] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_VAR_ENC(NAME_NTH(1), 1, 0),			/* [2] */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+			BTF_FUNC_ENC(NAME_NTH(4), 2),			/* [4] */
+			BTF_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
+			BTF_TAG_ENC(NAME_NTH(5), 4, -1),		/* [6] */
+			BTF_TAG_ENC(NAME_NTH(5), 4, 1),			/* [7] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: func/func_param tags",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* void f(int a1, int a2) */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+			BTF_FUNC_ENC(NAME_NTH(3), 2),			/* [3] */
+			/* void f(int a1, int a2) */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [4] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+			BTF_FUNC_ENC(NAME_NTH(3), 4),			/* [5] */
+			/* tag -> f: tag1, tag2 */
+			BTF_TAG_ENC(NAME_NTH(4), 3, -1),		/* [6] */
+			BTF_TAG_ENC(NAME_NTH(5), 3, -1),		/* [7] */
+			/* tag -> f/a2: tag1, tag2 */
+			BTF_TAG_ENC(NAME_NTH(4), 3, 1),			/* [8] */
+			BTF_TAG_ENC(NAME_NTH(5), 3, 1),			/* [9] */
+			/* tag -> f: tag1, tag3 */
+			BTF_TAG_ENC(NAME_NTH(4), 5, -1),		/* [10] */
+			BTF_TAG_ENC(NAME_NTH(6), 5, -1),		/* [11] */
+			/* tag -> f/a2: tag1, tag3 */
+			BTF_TAG_ENC(NAME_NTH(4), 5, 1),			/* [12] */
+			BTF_TAG_ENC(NAME_NTH(6), 5, 1),			/* [13] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+			BTF_FUNC_ENC(NAME_NTH(3), 2),			/* [3] */
+			BTF_TAG_ENC(NAME_NTH(4), 3, -1),		/* [4] */
+			BTF_TAG_ENC(NAME_NTH(5), 3, -1),		/* [5] */
+			BTF_TAG_ENC(NAME_NTH(6), 3, -1),		/* [6] */
+			BTF_TAG_ENC(NAME_NTH(4), 3, 1),			/* [7] */
+			BTF_TAG_ENC(NAME_NTH(5), 3, 1),			/* [8] */
+			BTF_TAG_ENC(NAME_NTH(6), 3, 1),			/* [9] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: struct/struct_member tags",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),		/* [2] */
+				BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+				BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),		/* [3] */
+				BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+				BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+			/* tag -> t: tag1, tag2 */
+			BTF_TAG_ENC(NAME_NTH(4), 2, -1),		/* [4] */
+			BTF_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
+			/* tag -> t/m2: tag1, tag2 */
+			BTF_TAG_ENC(NAME_NTH(4), 2, 1),			/* [6] */
+			BTF_TAG_ENC(NAME_NTH(5), 2, 1),			/* [7] */
+			/* tag -> t: tag1, tag3 */
+			BTF_TAG_ENC(NAME_NTH(4), 3, -1),		/* [8] */
+			BTF_TAG_ENC(NAME_NTH(6), 3, -1),		/* [9] */
+			/* tag -> t/m2: tag1, tag3 */
+			BTF_TAG_ENC(NAME_NTH(4), 3, 1),			/* [10] */
+			BTF_TAG_ENC(NAME_NTH(6), 3, 1),			/* [11] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),		/* [2] */
+				BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+				BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+			BTF_TAG_ENC(NAME_NTH(4), 2, -1),		/* [3] */
+			BTF_TAG_ENC(NAME_NTH(5), 2, -1),		/* [4] */
+			BTF_TAG_ENC(NAME_NTH(6), 2, -1),		/* [5] */
+			BTF_TAG_ENC(NAME_NTH(4), 2, 1),			/* [6] */
+			BTF_TAG_ENC(NAME_NTH(5), 2, 1),			/* [7] */
+			BTF_TAG_ENC(NAME_NTH(6), 2, 1),			/* [8] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
 
 };
 

From c240ba28789063690077d282f8f89e03f31037d0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:30:58 -0700
Subject: [PATCH 51/63] selftests/bpf: Add a test with a bpf program with
 btf_tag attributes

Add a bpf program with btf_tag attributes. The program is
loaded successfully with the kernel. With the command
  bpftool btf dump file ./tag.o
the following dump shows that tags are properly encoded:
  [8] STRUCT 'key_t' size=12 vlen=3
          'a' type_id=2 bits_offset=0
          'b' type_id=2 bits_offset=32
          'c' type_id=2 bits_offset=64
  [9] TAG 'tag1' type_id=8 component_id=-1
  [10] TAG 'tag2' type_id=8 component_id=-1
  [11] TAG 'tag1' type_id=8 component_id=1
  [12] TAG 'tag2' type_id=8 component_id=1
  ...
  [21] FUNC_PROTO '(anon)' ret_type_id=2 vlen=1
          'x' type_id=2
  [22] FUNC 'foo' type_id=21 linkage=static
  [23] TAG 'tag1' type_id=22 component_id=0
  [24] TAG 'tag2' type_id=22 component_id=0
  [25] TAG 'tag1' type_id=22 component_id=-1
  [26] TAG 'tag2' type_id=22 component_id=-1
  ...
  [29] VAR 'total' type_id=27, linkage=global
  [30] TAG 'tag1' type_id=29 component_id=-1
  [31] TAG 'tag2' type_id=29 component_id=-1

If an old clang compiler, which does not support btf_tag attribute,
is used, these btf_tag attributes will be silently ignored.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223058.248949-1-yhs@fb.com
---
 .../selftests/bpf/prog_tests/btf_tag.c        | 14 +++++++
 tools/testing/selftests/bpf/progs/tag.c       | 39 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_tag.c
 create mode 100644 tools/testing/selftests/bpf/progs/tag.c

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
new file mode 100644
index 000000000000..f939527ede77
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "tag.skel.h"
+
+void test_btf_tag(void)
+{
+	struct tag *skel;
+
+	skel = tag__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "btf_tag"))
+		return;
+	tag__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/tag.c
new file mode 100644
index 000000000000..17f88c58a6c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tag.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define __tag1 __attribute__((btf_tag("tag1")))
+#define __tag2 __attribute__((btf_tag("tag2")))
+
+struct key_t {
+	int a;
+	int b __tag1 __tag2;
+	int c;
+} __tag1 __tag2;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, struct key_t);
+	__type(value, __u64);
+} hashmap1 SEC(".maps");
+
+__u32 total __tag1 __tag2 = 0;
+
+static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
+{
+	struct key_t key;
+	__u64 val = 1;
+
+	key.a = key.b = key.c = x;
+	bpf_map_update_elem(&hashmap1, &key, &val, 0);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+	return foo(x);
+}

From 48f5a6c4162706f94523d7a0d828e4aee77d17e7 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 15:31:03 -0700
Subject: [PATCH 52/63] docs/bpf: Add documentation for BTF_KIND_TAG

Add BTF_KIND_TAG documentation in btf.rst.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210914223103.249100-1-yhs@fb.com
---
 Documentation/bpf/btf.rst | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/Documentation/bpf/btf.rst b/Documentation/bpf/btf.rst
index 846354cd2d69..1bfe4072f5fc 100644
--- a/Documentation/bpf/btf.rst
+++ b/Documentation/bpf/btf.rst
@@ -85,6 +85,7 @@ sequentially and type id is assigned to each recognized type starting from id
     #define BTF_KIND_VAR            14      /* Variable     */
     #define BTF_KIND_DATASEC        15      /* Section      */
     #define BTF_KIND_FLOAT          16      /* Floating point       */
+    #define BTF_KIND_TAG            17      /* Tag          */
 
 Note that the type section encodes debug info, not just pure types.
 ``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
@@ -106,7 +107,7 @@ Each type contains the following common data::
          * "size" tells the size of the type it is describing.
          *
          * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
-         * FUNC and FUNC_PROTO.
+         * FUNC, FUNC_PROTO and TAG.
          * "type" is a type_id referring to another type.
          */
         union {
@@ -465,6 +466,32 @@ map definition.
 
 No additional type data follow ``btf_type``.
 
+2.2.17 BTF_KIND_TAG
+~~~~~~~~~~~~~~~~~~~
+
+``struct btf_type`` encoding requirement:
+ * ``name_off``: offset to a non-empty string
+ * ``info.kind_flag``: 0
+ * ``info.kind``: BTF_KIND_TAG
+ * ``info.vlen``: 0
+ * ``type``: ``struct``, ``union``, ``func`` or ``var``
+
+``btf_type`` is followed by ``struct btf_tag``.::
+
+    struct btf_tag {
+        __u32   component_idx;
+    };
+
+The ``name_off`` encodes btf_tag attribute string.
+The ``type`` should be ``struct``, ``union``, ``func`` or ``var``.
+For ``var`` type, ``btf_tag.component_idx`` must be ``-1``.
+For the other three types, if the btf_tag attribute is
+applied to the ``struct``, ``union`` or ``func`` itself,
+``btf_tag.component_idx`` must be ``-1``. Otherwise,
+the attribute is applied to a ``struct``/``union`` member or
+a ``func`` argument, and ``btf_tag.component_idx`` should be a
+valid index (starting from 0) pointing to a member or an argument.
+
 3. BTF Kernel API
 *****************
 

From 2220ecf55c1b7aa36e99b00a6b964f4e5333f9bf Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Tue, 14 Sep 2021 23:10:36 -0700
Subject: [PATCH 53/63] selftests/bpf: Skip btf_tag test if btf_tag attribute
 not supported

Commit c240ba287890 ("selftests/bpf: Add a test with a bpf
program with btf_tag attributes") added btf_tag selftest
to test BTF_KIND_TAG generation from C source code, and to
test kernel validation of generated BTF types.
But if an old clang (clang 13 or earlier) is used, the
following compiler warning may be seen:
  progs/tag.c:23:20: warning: unknown attribute 'btf_tag' ignored
and the test itself is marked OK. The compiler warning is bad
and the test itself shouldn't be marked OK.

This patch added the check for btf_tag attribute support.
If btf_tag is not supported by the clang, the attribute will
not be used in the code and the test will be marked as skipped.
For example, with clang 13:
  ./test_progs -t btf_tag
  #21 btf_tag:SKIP
  Summary: 1/0 PASSED, 1 SKIPPED, 0 FAILED

The selftests/README.rst is updated to clarify when the btf_tag
test may be skipped.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210915061036.2577971-1-yhs@fb.com
---
 tools/testing/selftests/bpf/README.rst           | 14 ++++++++++++++
 tools/testing/selftests/bpf/prog_tests/btf_tag.c |  6 ++++++
 tools/testing/selftests/bpf/progs/tag.c          | 12 +++++++++++-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 9b17f2867488..8200c0da2769 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -201,6 +201,20 @@ Without it, the error from compiling bpf selftests looks like:
 
 __ https://reviews.llvm.org/D93563
 
+btf_tag test and Clang version
+==============================
+
+The btf_tag selftest require LLVM support to recognize the btf_tag attribute.
+It was introduced in `Clang 14`__.
+
+Without it, the btf_tag selftest will be skipped and you will observe:
+
+.. code-block:: console
+
+  #<test_num> btf_tag:SKIP
+
+__ https://reviews.llvm.org/D106614
+
 Clang dependencies for static linking tests
 ===========================================
 
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
index f939527ede77..91821f42714d 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_tag.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -10,5 +10,11 @@ void test_btf_tag(void)
 	skel = tag__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "btf_tag"))
 		return;
+
+	if (skel->rodata->skip_tests) {
+		printf("%s:SKIP: btf_tag attribute not supported", __func__);
+		test__skip();
+	}
+
 	tag__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/tag.c
index 17f88c58a6c5..b46b1bfac7da 100644
--- a/tools/testing/selftests/bpf/progs/tag.c
+++ b/tools/testing/selftests/bpf/progs/tag.c
@@ -4,8 +4,19 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#if __has_attribute(btf_tag)
 #define __tag1 __attribute__((btf_tag("tag1")))
 #define __tag2 __attribute__((btf_tag("tag2")))
+volatile const bool skip_tests __tag1 __tag2 = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
 
 struct key_t {
 	int a;
@@ -20,7 +31,6 @@ struct {
 	__type(value, __u64);
 } hashmap1 SEC(".maps");
 
-__u32 total __tag1 __tag2 = 0;
 
 static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
 {

From 69cd823956ba8ce266a901170b1060db8073bddd Mon Sep 17 00:00:00 2001
From: Grant Seltzer <grantseltzer@gmail.com>
Date: Tue, 14 Sep 2021 22:19:52 -0400
Subject: [PATCH 54/63] libbpf: Add sphinx code documentation comments

This adds comments above five functions in btf.h which document
their uses. These comments are of a format that doxygen and sphinx
can pick up and render. These are rendered by libbpf.readthedocs.org

Signed-off-by: Grant Seltzer <grantseltzer@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210915021951.117186-1-grantseltzer@gmail.com
---
 tools/lib/bpf/btf.h | 70 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 659ea8a2769b..2cfe31327920 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -1,5 +1,6 @@
 /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
 /* Copyright (c) 2018 Facebook */
+/*! \file */
 
 #ifndef __LIBBPF_BTF_H
 #define __LIBBPF_BTF_H
@@ -30,11 +31,80 @@ enum btf_endianness {
 	BTF_BIG_ENDIAN = 1,
 };
 
+/**
+ * @brief **btf__free()** frees all data of a BTF object
+ * @param btf BTF object to free
+ */
 LIBBPF_API void btf__free(struct btf *btf);
 
+/**
+ * @brief **btf__new()** creates a new instance of a BTF object from the raw
+ * bytes of an ELF's BTF section
+ * @param data raw bytes
+ * @param size number of bytes passed in `data`
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
 LIBBPF_API struct btf *btf__new(const void *data, __u32 size);
+
+/**
+ * @brief **btf__new_split()** create a new instance of a BTF object from the
+ * provided raw data bytes. It takes another BTF instance, **base_btf**, which
+ * serves as a base BTF, which is extended by types in a newly created BTF
+ * instance
+ * @param data raw bytes
+ * @param size length of raw bytes
+ * @param base_btf the base BTF object
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * If *base_btf* is NULL, `btf__new_split()` is equivalent to `btf__new()` and
+ * creates non-split BTF.
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
 LIBBPF_API struct btf *btf__new_split(const void *data, __u32 size, struct btf *base_btf);
+
+/**
+ * @brief **btf__new_empty()** creates an empty BTF object.  Use
+ * `btf__add_*()` to populate such BTF object.
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
 LIBBPF_API struct btf *btf__new_empty(void);
+
+/**
+ * @brief **btf__new_empty_split()** creates an unpopulated BTF object from an
+ * ELF BTF section except with a base BTF on top of which split BTF should be
+ * based
+ * @return new BTF object instance which has to be eventually freed with
+ * **btf__free()**
+ *
+ * If *base_btf* is NULL, `btf__new_empty_split()` is equivalent to
+ * `btf__new_empty()` and creates non-split BTF.
+ *
+ * On error, error-code-encoded-as-pointer is returned, not a NULL. To extract
+ * error code from such a pointer `libbpf_get_error()` should be used. If
+ * `libbpf_set_strict_mode(LIBBPF_STRICT_CLEAN_PTRS)` is enabled, NULL is
+ * returned on error instead. In both cases thread-local `errno` variable is
+ * always set to error code as well.
+ */
 LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
 
 LIBBPF_API struct btf *btf__parse(const char *path, struct btf_ext **btf_ext);

From 336562752acc1a723f9a24b5b8129ae22e0478c6 Mon Sep 17 00:00:00 2001
From: Matteo Croce <mcroce@microsoft.com>
Date: Wed, 15 Sep 2021 01:54:00 +0200
Subject: [PATCH 55/63] bpf: Update bpf_get_smp_processor_id() documentation

BPF programs run with migration disabled regardless of preemption, as
they are protected by migrate_disable(). Update the uapi documentation
accordingly.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210914235400.59427-1-mcroce@linux.microsoft.com
---
 include/uapi/linux/bpf.h       | 2 +-
 tools/include/uapi/linux/bpf.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d21326558d42..3e9785f1064a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1629,7 +1629,7 @@ union bpf_attr {
  * u32 bpf_get_smp_processor_id(void)
  * 	Description
  * 		Get the SMP (symmetric multiprocessing) processor id. Note that
- * 		all programs run with preemption disabled, which means that the
+ * 		all programs run with migration disabled, which means that the
  * 		SMP processor id is stable during all the execution of the
  * 		program.
  * 	Return
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d21326558d42..3e9785f1064a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1629,7 +1629,7 @@ union bpf_attr {
  * u32 bpf_get_smp_processor_id(void)
  * 	Description
  * 		Get the SMP (symmetric multiprocessing) processor id. Note that
- * 		all programs run with preemption disabled, which means that the
+ * 		all programs run with migration disabled, which means that the
  * 		SMP processor id is stable during all the execution of the
  * 		program.
  * 	Return

From f11f86a3931b5d533aed1be1720fbd55bd63174d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Sep 2021 18:58:30 -0700
Subject: [PATCH 56/63] libbpf: Use pre-setup sec_def in
 libbpf_find_attach_btf_id()

Don't perform another search for sec_def inside
libbpf_find_attach_btf_id(), as each recognized bpf_program already has
prog->sec_def set.

Also remove unnecessary NULL check for prog->sec_name, as it can never
be NULL.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210916015836.1248906-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 62a43c408d73..5ba11b249e9b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8461,19 +8461,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, int *btf_obj_fd,
 {
 	enum bpf_attach_type attach_type = prog->expected_attach_type;
 	__u32 attach_prog_fd = prog->attach_prog_fd;
-	const char *name = prog->sec_name, *attach_name;
-	const struct bpf_sec_def *sec = NULL;
+	const char *attach_name;
 	int err = 0;
 
-	if (!name)
-		return -EINVAL;
-
-	sec = find_sec_def(name);
-	if (!sec || !sec->is_attach_btf) {
-		pr_warn("failed to identify BTF ID based on ELF section name '%s'\n", name);
+	if (!prog->sec_def || !prog->sec_def->is_attach_btf) {
+		pr_warn("failed to identify BTF ID based on ELF section name '%s'\n",
+			prog->sec_name);
 		return -ESRCH;
 	}
-	attach_name = name + sec->len;
+	attach_name = prog->sec_name + prog->sec_def->len;
 
 	/* BPF program's BTF ID */
 	if (attach_prog_fd) {

From 23a7baaa93882c241ad3464cdeeb8ef0d1d40a12 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Sep 2021 18:58:31 -0700
Subject: [PATCH 57/63] selftests/bpf: Stop using relaxed_core_relocs which has
 no effect

relaxed_core_relocs option hasn't had any effect for a while now, stop
specifying it. Next patch marks it as deprecated.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210916015836.1248906-3-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/core_reloc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 15d355af8d1d..763302e63a29 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -249,8 +249,7 @@ static int duration = 0;
 #define SIZE_CASE_COMMON(name)						\
 	.case_name = #name,						\
 	.bpf_obj_file = "test_core_reloc_size.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
-	.relaxed_core_relocs = true
+	.btf_src_file = "btf__core_reloc_" #name ".o"
 
 #define SIZE_OUTPUT_DATA(type)						\
 	STRUCT_TO_CHAR_PTR(core_reloc_size_output) {			\

From 277641859e833549722eb82c97cbc2d55421df7c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Sep 2021 18:58:32 -0700
Subject: [PATCH 58/63] libbpf: Deprecated
 bpf_object_open_opts.relaxed_core_relocs

It's relevant and hasn't been doing anything for a long while now.
Deprecated it.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210916015836.1248906-4-andrii@kernel.org
---
 tools/lib/bpf/libbpf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 2f6f0e15d1e7..7111e8d651de 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -83,6 +83,7 @@ struct bpf_object_open_opts {
 	 * Non-relocatable instructions are replaced with invalid ones to
 	 * prevent accidental errors.
 	 * */
+	LIBBPF_DEPRECATED_SINCE(0, 6, "field has no effect")
 	bool relaxed_core_relocs;
 	/* maps that set the 'pinning' attribute in their definition will have
 	 * their pin_path attribute set to a file in this directory, and be

From 2d5ec1c66e25f0b4dd895a211e651a12dec2ef4f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Sep 2021 18:58:33 -0700
Subject: [PATCH 59/63] libbpf: Allow skipping attach_func_name in
 bpf_program__set_attach_target()

Allow to use bpf_program__set_attach_target to only set target attach
program FD, while letting libbpf to use target attach function name from
SEC() definition. This might be useful for some scenarios where
bpf_object contains multiple related freplace BPF programs intended to
replace different sub-programs in target BPF program. In such case all
programs will have the same attach_prog_fd, but different
attach_func_name. It's convenient to specify such target function names
declaratively in SEC() definitions, but attach_prog_fd is a dynamic
runtime setting.

To simplify such scenario, allow bpf_program__set_attach_target() to
delay BTF ID resolution till the BPF program load time by providing NULL
attach_func_name. In that case the behavior will be similar to using
bpf_object_open_opts.attach_prog_fd (which is marked deprecated since
v0.7), but has the benefit of allowing more control by user in what is
attached to what. Such setup allows having BPF programs attached to
different target attach_prog_fd with target functions still declaratively
recorded in BPF source code in SEC() definitions.

Selftests changes in the next patch should make this more obvious.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210916015836.1248906-5-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5ba11b249e9b..552d05a85cbb 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10643,18 +10643,29 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
 {
 	int btf_obj_fd = 0, btf_id = 0, err;
 
-	if (!prog || attach_prog_fd < 0 || !attach_func_name)
+	if (!prog || attach_prog_fd < 0)
 		return libbpf_err(-EINVAL);
 
 	if (prog->obj->loaded)
 		return libbpf_err(-EINVAL);
 
+	if (attach_prog_fd && !attach_func_name) {
+		/* remember attach_prog_fd and let bpf_program__load() find
+		 * BTF ID during the program load
+		 */
+		prog->attach_prog_fd = attach_prog_fd;
+		return 0;
+	}
+
 	if (attach_prog_fd) {
 		btf_id = libbpf_find_prog_btf_id(attach_func_name,
 						 attach_prog_fd);
 		if (btf_id < 0)
 			return libbpf_err(btf_id);
 	} else {
+		if (!attach_func_name)
+			return libbpf_err(-EINVAL);
+
 		/* load btf_vmlinux, if not yet */
 		err = bpf_object__load_vmlinux_btf(prog->obj, true);
 		if (err)

From 60aed22076b0d0ec2b7c7f9dba3ccd642520e1f3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Sep 2021 18:58:34 -0700
Subject: [PATCH 60/63] selftests/bpf: Switch fexit_bpf2bpf selftest to
 set_attach_target() API

Switch fexit_bpf2bpf selftest to bpf_program__set_attach_target()
instead of using bpf_object_open_opts.attach_prog_fd, which is going to
be deprecated. These changes also demonstrate the new mode of
set_attach_target() in which it allows NULL when the target is BPF
program (attach_prog_fd != 0).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210916015836.1248906-6-andrii@kernel.org
---
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c  | 43 +++++++++++--------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 73b4c76e6b86..c7c1816899bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -60,7 +60,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 	struct bpf_object *obj = NULL, *tgt_obj;
 	__u32 retval, tgt_prog_id, info_len;
 	struct bpf_prog_info prog_info = {};
-	struct bpf_program **prog = NULL;
+	struct bpf_program **prog = NULL, *p;
 	struct bpf_link **link = NULL;
 	int err, tgt_fd, i;
 	struct btf *btf;
@@ -69,9 +69,6 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 			    &tgt_obj, &tgt_fd);
 	if (!ASSERT_OK(err, "tgt_prog_load"))
 		return;
-	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-			    .attach_prog_fd = tgt_fd,
-			   );
 
 	info_len = sizeof(prog_info);
 	err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len);
@@ -89,10 +86,15 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 	if (!ASSERT_OK_PTR(prog, "prog_ptr"))
 		goto close_prog;
 
-	obj = bpf_object__open_file(obj_file, &opts);
+	obj = bpf_object__open_file(obj_file, NULL);
 	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		goto close_prog;
 
+	bpf_object__for_each_program(p, obj) {
+		err = bpf_program__set_attach_target(p, tgt_fd, NULL);
+		ASSERT_OK(err, "set_attach_target");
+	}
+
 	err = bpf_object__load(obj);
 	if (!ASSERT_OK(err, "obj_load"))
 		goto close_prog;
@@ -270,7 +272,7 @@ static void test_fmod_ret_freplace(void)
 	struct bpf_link *freplace_link = NULL;
 	struct bpf_program *prog;
 	__u32 duration = 0;
-	int err, pkt_fd;
+	int err, pkt_fd, attach_prog_fd;
 
 	err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
 			    &pkt_obj, &pkt_fd);
@@ -278,26 +280,32 @@ static void test_fmod_ret_freplace(void)
 	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
 		  tgt_name, err, errno))
 		return;
-	opts.attach_prog_fd = pkt_fd;
 
-	freplace_obj = bpf_object__open_file(freplace_name, &opts);
+	freplace_obj = bpf_object__open_file(freplace_name, NULL);
 	if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
 		goto out;
 
+	prog = bpf_program__next(NULL, freplace_obj);
+	err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+	ASSERT_OK(err, "freplace__set_attach_target");
+
 	err = bpf_object__load(freplace_obj);
 	if (CHECK(err, "freplace_obj_load", "err %d\n", err))
 		goto out;
 
-	prog = bpf_program__next(NULL, freplace_obj);
 	freplace_link = bpf_program__attach_trace(prog);
 	if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
 		goto out;
 
-	opts.attach_prog_fd = bpf_program__fd(prog);
-	fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
+	fmod_obj = bpf_object__open_file(fmod_ret_name, NULL);
 	if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
 		goto out;
 
+	attach_prog_fd = bpf_program__fd(prog);
+	prog = bpf_program__next(NULL, fmod_obj);
+	err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL);
+	ASSERT_OK(err, "fmod_ret_set_attach_target");
+
 	err = bpf_object__load(fmod_obj);
 	if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
 		goto out;
@@ -322,14 +330,14 @@ static void test_func_sockmap_update(void)
 }
 
 static void test_obj_load_failure_common(const char *obj_file,
-					  const char *target_obj_file)
-
+					 const char *target_obj_file)
 {
 	/*
 	 * standalone test that asserts failure to load freplace prog
 	 * because of invalid return code.
 	 */
 	struct bpf_object *obj = NULL, *pkt_obj;
+	struct bpf_program *prog;
 	int err, pkt_fd;
 	__u32 duration = 0;
 
@@ -339,14 +347,15 @@ static void test_obj_load_failure_common(const char *obj_file,
 	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
 		  target_obj_file, err, errno))
 		return;
-	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-			    .attach_prog_fd = pkt_fd,
-			   );
 
-	obj = bpf_object__open_file(obj_file, &opts);
+	obj = bpf_object__open_file(obj_file, NULL);
 	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		goto close_prog;
 
+	prog = bpf_program__next(NULL, obj);
+	err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+	ASSERT_OK(err, "set_attach_target");
+
 	/* It should fail to load the program */
 	err = bpf_object__load(obj);
 	if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))

From 91b555d73e53879fc6d4cf82c8c0e14c00ce212d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Sep 2021 18:58:35 -0700
Subject: [PATCH 61/63] libbpf: Schedule open_opts.attach_prog_fd deprecation
 since v0.7

bpf_object_open_opts.attach_prog_fd makes a pretty strong assumption
that bpf_object contains either only single freplace BPF program or all
of BPF programs in BPF object are freplaces intended to replace
different subprograms of the same target BPF program. This seems both
a bit confusing, too assuming, and limiting.

We've had bpf_program__set_attach_target() API which allows more
fine-grained control over this, on a per-program level. As such, mark
open_opts.attach_prog_fd as deprecated starting from v0.7, so that we
have one more universal way of setting freplace targets. With previous
change to allow NULL attach_func_name argument, and especially combined
with BPF skeleton, arguable bpf_program__set_attach_target() is a more
convenient and explicit API as well.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210916015836.1248906-7-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c        | 3 +++
 tools/lib/bpf/libbpf.h        | 2 ++
 tools/lib/bpf/libbpf_common.h | 5 +++++
 3 files changed, 10 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 552d05a85cbb..6aeeb0e82acc 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6415,9 +6415,12 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object
 		bpf_program__set_type(prog, prog->sec_def->prog_type);
 		bpf_program__set_expected_attach_type(prog, prog->sec_def->expected_attach_type);
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
 		if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
 		    prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
 			prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
+#pragma GCC diagnostic pop
 	}
 
 	return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 7111e8d651de..52b7ee090037 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -90,6 +90,8 @@ struct bpf_object_open_opts {
 	 * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
 	 */
 	const char *pin_root_path;
+
+	LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__set_attach_target() on each individual bpf_program")
 	__u32 attach_prog_fd;
 	/* Additional kernel config content that augments and overrides
 	 * system Kconfig for CONFIG_xxx externs.
diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h
index 36ac77f2bea2..aaa1efbf6f51 100644
--- a/tools/lib/bpf/libbpf_common.h
+++ b/tools/lib/bpf/libbpf_common.h
@@ -35,6 +35,11 @@
 #else
 #define __LIBBPF_MARK_DEPRECATED_0_6(X)
 #endif
+#if __LIBBPF_CURRENT_VERSION_GEQ(0, 7)
+#define __LIBBPF_MARK_DEPRECATED_0_7(X) X
+#else
+#define __LIBBPF_MARK_DEPRECATED_0_7(X)
+#endif
 
 /* Helper macro to declare and initialize libbpf options struct
  *

From 942025c9f37ee45e69eb5f39a2877afab66d9555 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 15 Sep 2021 18:58:36 -0700
Subject: [PATCH 62/63] libbpf: Constify all high-level program attach APIs

Attach APIs shouldn't need to modify bpf_program/bpf_map structs, so
change all struct bpf_program and struct bpf_map pointers to const
pointers. This is completely backwards compatible with no functional
change.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20210916015836.1248906-8-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 68 +++++++++++++++++++++---------------------
 tools/lib/bpf/libbpf.h | 36 +++++++++++-----------
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6aeeb0e82acc..da65a1666a5e 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -220,7 +220,7 @@ struct reloc_desc {
 
 struct bpf_sec_def;
 
-typedef struct bpf_link *(*attach_fn_t)(struct bpf_program *prog);
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog);
 
 struct bpf_sec_def {
 	const char *sec;
@@ -7947,12 +7947,12 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 	__VA_ARGS__							    \
 }
 
-static struct bpf_link *attach_kprobe(struct bpf_program *prog);
-static struct bpf_link *attach_tp(struct bpf_program *prog);
-static struct bpf_link *attach_raw_tp(struct bpf_program *prog);
-static struct bpf_link *attach_trace(struct bpf_program *prog);
-static struct bpf_link *attach_lsm(struct bpf_program *prog);
-static struct bpf_link *attach_iter(struct bpf_program *prog);
+static struct bpf_link *attach_kprobe(const struct bpf_program *prog);
+static struct bpf_link *attach_tp(const struct bpf_program *prog);
+static struct bpf_link *attach_raw_tp(const struct bpf_program *prog);
+static struct bpf_link *attach_trace(const struct bpf_program *prog);
+static struct bpf_link *attach_lsm(const struct bpf_program *prog);
+static struct bpf_link *attach_iter(const struct bpf_program *prog);
 
 static const struct bpf_sec_def section_defs[] = {
 	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
@@ -9092,7 +9092,7 @@ static void bpf_link_perf_dealloc(struct bpf_link *link)
 	free(perf_link);
 }
 
-struct bpf_link *bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
 						     const struct bpf_perf_event_opts *opts)
 {
 	char errmsg[STRERR_BUFSIZE];
@@ -9167,7 +9167,7 @@ err_out:
 	return libbpf_err_ptr(err);
 }
 
-struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, int pfd)
+struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd)
 {
 	return bpf_program__attach_perf_event_opts(prog, pfd, NULL);
 }
@@ -9332,7 +9332,7 @@ static int perf_event_kprobe_open_legacy(bool retprobe, const char *name, uint64
 }
 
 struct bpf_link *
-bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
 				const char *func_name,
 				const struct bpf_kprobe_opts *opts)
 {
@@ -9389,7 +9389,7 @@ bpf_program__attach_kprobe_opts(struct bpf_program *prog,
 	return link;
 }
 
-struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
+struct bpf_link *bpf_program__attach_kprobe(const struct bpf_program *prog,
 					    bool retprobe,
 					    const char *func_name)
 {
@@ -9400,7 +9400,7 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,
 	return bpf_program__attach_kprobe_opts(prog, func_name, &opts);
 }
 
-static struct bpf_link *attach_kprobe(struct bpf_program *prog)
+static struct bpf_link *attach_kprobe(const struct bpf_program *prog)
 {
 	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
 	unsigned long offset = 0;
@@ -9432,7 +9432,7 @@ static struct bpf_link *attach_kprobe(struct bpf_program *prog)
 }
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
 				const char *binary_path, size_t func_offset,
 				const struct bpf_uprobe_opts *opts)
 {
@@ -9472,7 +9472,7 @@ bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
 	return link;
 }
 
-struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,
+struct bpf_link *bpf_program__attach_uprobe(const struct bpf_program *prog,
 					    bool retprobe, pid_t pid,
 					    const char *binary_path,
 					    size_t func_offset)
@@ -9532,7 +9532,7 @@ static int perf_event_open_tracepoint(const char *tp_category,
 	return pfd;
 }
 
-struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
 						     const char *tp_category,
 						     const char *tp_name,
 						     const struct bpf_tracepoint_opts *opts)
@@ -9566,14 +9566,14 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
 	return link;
 }
 
-struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,
+struct bpf_link *bpf_program__attach_tracepoint(const struct bpf_program *prog,
 						const char *tp_category,
 						const char *tp_name)
 {
 	return bpf_program__attach_tracepoint_opts(prog, tp_category, tp_name, NULL);
 }
 
-static struct bpf_link *attach_tp(struct bpf_program *prog)
+static struct bpf_link *attach_tp(const struct bpf_program *prog)
 {
 	char *sec_name, *tp_cat, *tp_name;
 	struct bpf_link *link;
@@ -9597,7 +9597,7 @@ static struct bpf_link *attach_tp(struct bpf_program *prog)
 	return link;
 }
 
-struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+struct bpf_link *bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
 						    const char *tp_name)
 {
 	char errmsg[STRERR_BUFSIZE];
@@ -9627,7 +9627,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
 	return link;
 }
 
-static struct bpf_link *attach_raw_tp(struct bpf_program *prog)
+static struct bpf_link *attach_raw_tp(const struct bpf_program *prog)
 {
 	const char *tp_name = prog->sec_name + prog->sec_def->len;
 
@@ -9635,7 +9635,7 @@ static struct bpf_link *attach_raw_tp(struct bpf_program *prog)
 }
 
 /* Common logic for all BPF program types that attach to a btf_id */
-static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
+static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *prog)
 {
 	char errmsg[STRERR_BUFSIZE];
 	struct bpf_link *link;
@@ -9664,28 +9664,28 @@ static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
 	return (struct bpf_link *)link;
 }
 
-struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+struct bpf_link *bpf_program__attach_trace(const struct bpf_program *prog)
 {
 	return bpf_program__attach_btf_id(prog);
 }
 
-struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
+struct bpf_link *bpf_program__attach_lsm(const struct bpf_program *prog)
 {
 	return bpf_program__attach_btf_id(prog);
 }
 
-static struct bpf_link *attach_trace(struct bpf_program *prog)
+static struct bpf_link *attach_trace(const struct bpf_program *prog)
 {
 	return bpf_program__attach_trace(prog);
 }
 
-static struct bpf_link *attach_lsm(struct bpf_program *prog)
+static struct bpf_link *attach_lsm(const struct bpf_program *prog)
 {
 	return bpf_program__attach_lsm(prog);
 }
 
 static struct bpf_link *
-bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
+bpf_program__attach_fd(const struct bpf_program *prog, int target_fd, int btf_id,
 		       const char *target_name)
 {
 	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
@@ -9721,24 +9721,24 @@ bpf_program__attach_fd(struct bpf_program *prog, int target_fd, int btf_id,
 }
 
 struct bpf_link *
-bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd)
 {
 	return bpf_program__attach_fd(prog, cgroup_fd, 0, "cgroup");
 }
 
 struct bpf_link *
-bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
+bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd)
 {
 	return bpf_program__attach_fd(prog, netns_fd, 0, "netns");
 }
 
-struct bpf_link *bpf_program__attach_xdp(struct bpf_program *prog, int ifindex)
+struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex)
 {
 	/* target_fd/target_ifindex use the same field in LINK_CREATE */
 	return bpf_program__attach_fd(prog, ifindex, 0, "xdp");
 }
 
-struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
+struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog,
 					      int target_fd,
 					      const char *attach_func_name)
 {
@@ -9771,7 +9771,7 @@ struct bpf_link *bpf_program__attach_freplace(struct bpf_program *prog,
 }
 
 struct bpf_link *
-bpf_program__attach_iter(struct bpf_program *prog,
+bpf_program__attach_iter(const struct bpf_program *prog,
 			 const struct bpf_iter_attach_opts *opts)
 {
 	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts);
@@ -9810,12 +9810,12 @@ bpf_program__attach_iter(struct bpf_program *prog,
 	return link;
 }
 
-static struct bpf_link *attach_iter(struct bpf_program *prog)
+static struct bpf_link *attach_iter(const struct bpf_program *prog)
 {
 	return bpf_program__attach_iter(prog, NULL);
 }
 
-struct bpf_link *bpf_program__attach(struct bpf_program *prog)
+struct bpf_link *bpf_program__attach(const struct bpf_program *prog)
 {
 	if (!prog->sec_def || !prog->sec_def->attach_fn)
 		return libbpf_err_ptr(-ESRCH);
@@ -9833,7 +9833,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link)
 	return 0;
 }
 
-struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
+struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
 {
 	struct bpf_struct_ops *st_ops;
 	struct bpf_link *link;
@@ -10918,7 +10918,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 		if (!prog->sec_def || !prog->sec_def->attach_fn)
 			continue;
 
-		*link = prog->sec_def->attach_fn(prog);
+		*link = bpf_program__attach(prog);
 		err = libbpf_get_error(*link);
 		if (err) {
 			pr_warn("failed to auto-attach program '%s': %d\n",
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 52b7ee090037..c90e3d79e72c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -246,7 +246,7 @@ LIBBPF_API int bpf_link__detach(struct bpf_link *link);
 LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach(struct bpf_program *prog);
+bpf_program__attach(const struct bpf_program *prog);
 
 struct bpf_perf_event_opts {
 	/* size of this struct, for forward/backward compatiblity */
@@ -257,10 +257,10 @@ struct bpf_perf_event_opts {
 #define bpf_perf_event_opts__last_field bpf_cookie
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_perf_event(struct bpf_program *prog, int pfd);
+bpf_program__attach_perf_event(const struct bpf_program *prog, int pfd);
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_perf_event_opts(struct bpf_program *prog, int pfd,
+bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd,
 				    const struct bpf_perf_event_opts *opts);
 
 struct bpf_kprobe_opts {
@@ -277,10 +277,10 @@ struct bpf_kprobe_opts {
 #define bpf_kprobe_opts__last_field retprobe
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe,
+bpf_program__attach_kprobe(const struct bpf_program *prog, bool retprobe,
 			   const char *func_name);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_kprobe_opts(struct bpf_program *prog,
+bpf_program__attach_kprobe_opts(const struct bpf_program *prog,
                                 const char *func_name,
                                 const struct bpf_kprobe_opts *opts);
 
@@ -300,11 +300,11 @@ struct bpf_uprobe_opts {
 #define bpf_uprobe_opts__last_field retprobe
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe,
+bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe,
 			   pid_t pid, const char *binary_path,
 			   size_t func_offset);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_uprobe_opts(struct bpf_program *prog, pid_t pid,
+bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,
 				const char *binary_path, size_t func_offset,
 				const struct bpf_uprobe_opts *opts);
 
@@ -317,35 +317,35 @@ struct bpf_tracepoint_opts {
 #define bpf_tracepoint_opts__last_field bpf_cookie
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_tracepoint(struct bpf_program *prog,
+bpf_program__attach_tracepoint(const struct bpf_program *prog,
 			       const char *tp_category,
 			       const char *tp_name);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_tracepoint_opts(struct bpf_program *prog,
+bpf_program__attach_tracepoint_opts(const struct bpf_program *prog,
 				    const char *tp_category,
 				    const char *tp_name,
 				    const struct bpf_tracepoint_opts *opts);
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
+bpf_program__attach_raw_tracepoint(const struct bpf_program *prog,
 				   const char *tp_name);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_trace(struct bpf_program *prog);
+bpf_program__attach_trace(const struct bpf_program *prog);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_lsm(struct bpf_program *prog);
+bpf_program__attach_lsm(const struct bpf_program *prog);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+bpf_program__attach_cgroup(const struct bpf_program *prog, int cgroup_fd);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+bpf_program__attach_netns(const struct bpf_program *prog, int netns_fd);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_xdp(struct bpf_program *prog, int ifindex);
+bpf_program__attach_xdp(const struct bpf_program *prog, int ifindex);
 LIBBPF_API struct bpf_link *
-bpf_program__attach_freplace(struct bpf_program *prog,
+bpf_program__attach_freplace(const struct bpf_program *prog,
 			     int target_fd, const char *attach_func_name);
 
 struct bpf_map;
 
-LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
 
 struct bpf_iter_attach_opts {
 	size_t sz; /* size of this struct for forward/backward compatibility */
@@ -355,7 +355,7 @@ struct bpf_iter_attach_opts {
 #define bpf_iter_attach_opts__last_field link_info_len
 
 LIBBPF_API struct bpf_link *
-bpf_program__attach_iter(struct bpf_program *prog,
+bpf_program__attach_iter(const struct bpf_program *prog,
 			 const struct bpf_iter_attach_opts *opts);
 
 struct bpf_insn;

From ca21a3e5edfd47c90141724557f9d6f5000e46f3 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 16 Sep 2021 21:33:43 -0700
Subject: [PATCH 63/63] selftests/bpf: Fix a few compiler warnings

With clang building selftests/bpf, I hit a few warnings like below:

  .../bpf_iter.c:592:48: warning: variable 'expected_key_c' set but not used [-Wunused-but-set-variable]
  __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
                                                ^

  .../bpf_iter.c:688:48: warning: variable 'expected_key_c' set but not used [-Wunused-but-set-variable]
  __u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
                                                ^

  .../tc_redirect.c:657:6: warning: variable 'target_fd' is used uninitialized whenever 'if' condition is true [-Wsometimes-uninitialized]
  if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
      ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  .../tc_redirect.c:743:6: note: uninitialized use occurs here
  if (target_fd >= 0)
      ^~~~~~~~~

Removing unused variables and initializing the previously-uninitialized variable
to ensure these warnings are gone.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210917043343.3711917-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c    | 6 ++----
 tools/testing/selftests/bpf/prog_tests/tc_redirect.c | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 77ac24b191d4..9454331aaf85 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -589,7 +589,7 @@ out:
 
 static void test_bpf_hash_map(void)
 {
-	__u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+	__u32 expected_key_a = 0, expected_key_b = 0;
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_hash_map *skel;
 	int err, i, len, map_fd, iter_fd;
@@ -638,7 +638,6 @@ static void test_bpf_hash_map(void)
 		val = i + 4;
 		expected_key_a += key.a;
 		expected_key_b += key.b;
-		expected_key_c += key.c;
 		expected_val += val;
 
 		err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
@@ -685,7 +684,7 @@ out:
 
 static void test_bpf_percpu_hash_map(void)
 {
-	__u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+	__u32 expected_key_a = 0, expected_key_b = 0;
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_percpu_hash_map *skel;
 	int err, i, j, len, map_fd, iter_fd;
@@ -722,7 +721,6 @@ static void test_bpf_percpu_hash_map(void)
 		key.c = i + 3;
 		expected_key_a += key.a;
 		expected_key_b += key.b;
-		expected_key_c += key.c;
 
 		for (j = 0; j < bpf_num_possible_cpus(); j++) {
 			*(__u32 *)(val + j * 8) = i + j;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index e7201ba29ccd..e87bc4466d9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -633,7 +633,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 	struct nstoken *nstoken = NULL;
 	int err;
 	int tunnel_pid = -1;
-	int src_fd, target_fd;
+	int src_fd, target_fd = -1;
 	int ifindex;
 
 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.