bpf-next-for-netdev
-----BEGIN PGP SIGNATURE----- iQIzBAABCAAdFiEE+soXsSLHKoYyzcli6rmadz2vbToFAmOWgtsACgkQ6rmadz2v bTpT2g//WzQRsODtPVVmg87fEo1GSTXvoXq/fhg95OKNZrVKgx1N6EVlFSLSqEjL TAmOuv5cZT28ZpMPMNjnU/c/lFf/6/UWbbTusA+F3MtSCBSbP5DPsWDD0yvNT9DL EZbGoQDSyt1M+BakZLzwOV6HPn9oDhj5p/4lMw+gptTY+3IeYUbS50DinM8eLz+Q 067aF01p3ROF6LNUx9Az0cLPdU05oHzL2MvRsj/F7h/sWoSW5B/1Kx/m1vsT9lwn T2vbm6r4Jo0m0ZvpEMeRyKNZgVKIc64C7NH9CV7V66giJaONmxvLwkc0zWFwbXJ2 V9aPQbbBUx/CZXoC72LEsvVcoAFl7LAL1IALm2HVt1iQjpj1yDlWw3WV0PMQ9Rn7 xRVDOfQNGZ6jnkv6LB2j7V1z7hVENWQQwM48dgO2pAnJwYmUW9wZaAGE5kadUrZf eCD4c1U+qcZkSk4vwvpr8ubJ0PWPMUZqI0FrHUxfPxqkdy78c1h3qNQufZvAHWff Ca9NZqraFACTx58ZBsN1V5Xzv7azoK8Zgr9+JwVNahpFxclrbL8xuceThkC4smBl fiZJC9fClD9ATquIdj177jNMVC8F4B5yrKF/ehJDcNQhcqUdWx9Sbj461enf+3HI nfTP+77ZzyIJ76iRXJBV/jr9wkaPWhAZVeBGxmw5clTvB9/RBbU= =fzwv -----END PGP SIGNATURE----- Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next Alexei Starovoitov says: ==================== pull-request: bpf-next 2022-12-11 We've added 74 non-merge commits during the last 11 day(s) which contain a total of 88 files changed, 3362 insertions(+), 789 deletions(-). The main changes are: 1) Decouple prune and jump points handling in the verifier, from Andrii. 2) Do not rely on ALLOW_ERROR_INJECTION for fmod_ret, from Benjamin. Merged from hid tree. 3) Do not zero-extend kfunc return values. Necessary fix for 32-bit archs, from Björn. 4) Don't use rcu_users to refcount in task kfuncs, from David. 5) Three reg_state->id fixes in the verifier, from Eduard. 6) Optimize bpf_mem_alloc by reusing elements from free_by_rcu, from Hou. 7) Refactor dynptr handling in the verifier, from Kumar. 8) Remove the "/sys" mount and umount dance in {open,close}_netns in bpf selftests, from Martin. 9) Enable sleepable support for cgrp local storage, from Yonghong. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (74 commits) selftests/bpf: test case for relaxed prunning of active_lock.id selftests/bpf: Add pruning test case for bpf_spin_lock bpf: use check_ids() for active_lock comparison selftests/bpf: verify states_equal() maintains idmap across all frames bpf: states_equal() must build idmap for all function frames selftests/bpf: test cases for regsafe() bug skipping check_id() bpf: regsafe() must not skip check_ids() docs/bpf: Add documentation for BPF_MAP_TYPE_SK_STORAGE selftests/bpf: Add test for dynptr reinit in user_ringbuf callback bpf: Use memmove for bpf_dynptr_{read,write} bpf: Move PTR_TO_STACK alignment check to process_dynptr_func bpf: Rework check_func_arg_reg_off bpf: Rework process_dynptr_func bpf: Propagate errors from process_* checks in check_func_arg bpf: Refactor ARG_PTR_TO_DYNPTR checks into process_dynptr_func bpf: Skip rcu_barrier() if rcu_trace_implies_rcu_gp() is true bpf: Reuse freed element in free_by_rcu during allocation selftests/bpf: Bring test_offload.py back to life bpf: Fix comment error in fixup_kfunc_call function bpf: Do not zero-extend kfunc return values ... ==================== Link: https://lore.kernel.org/r/20221212024701.73809-1-alexei.starovoitov@gmail.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
26f708a284
Documentation/bpf
arch/riscv/net
include
linux
net
uapi/linux
kernel/bpf
net
bpf
core
ipv4
tls
xfrm
scripts
tools
bpf/bpftool
include/uapi/linux
lib/bpf
testing/selftests/bpf
DENYLIST.aarch64DENYLIST.s390xMakefilebpf_legacy.hconfignetwork_helpers.c
prog_tests
btf_skc_cls_ingress.ccgrp_local_storage.cdynptr.cempty_skb.ckfunc_dynptr_param.cmap_kptr.ctask_kfunc.ctc_redirect.ctest_tunnel.cuser_ringbuf.cxdp_do_redirect.cxdp_synproxy.cxfrm_info.c
progs
bpf_iter_ksym.cbpf_misc.hbpf_tracing_net.hbtf_type_tag_percpu.ccgrp_ls_sleepable.cdynptr_fail.cdynptr_success.clinked_list.cmap_kptr_fail.crcu_read_lock.ctask_kfunc_failure.ctask_kfunc_success.ctest_kfunc_dynptr_param.cuser_ringbuf_fail.cxfrm_info.c
test_cpp.cpptest_loader.ctest_offload.pytest_progs.htest_sockmap.cverifier
485
Documentation/bpf/bpf_iterators.rst
Normal file
485
Documentation/bpf/bpf_iterators.rst
Normal file
@ -0,0 +1,485 @@
|
||||
=============
|
||||
BPF Iterators
|
||||
=============
|
||||
|
||||
|
||||
----------
|
||||
Motivation
|
||||
----------
|
||||
|
||||
There are a few existing ways to dump kernel data into user space. The most
|
||||
popular one is the ``/proc`` system. For example, ``cat /proc/net/tcp6`` dumps
|
||||
all tcp6 sockets in the system, and ``cat /proc/net/netlink`` dumps all netlink
|
||||
sockets in the system. However, their output format tends to be fixed, and if
|
||||
users want more information about these sockets, they have to patch the kernel,
|
||||
which often takes time to publish upstream and release. The same is true for popular
|
||||
tools like `ss <https://man7.org/linux/man-pages/man8/ss.8.html>`_ where any
|
||||
additional information needs a kernel patch.
|
||||
|
||||
To solve this problem, the `drgn
|
||||
<https://www.kernel.org/doc/html/latest/bpf/drgn.html>`_ tool is often used to
|
||||
dig out the kernel data with no kernel change. However, the main drawback for
|
||||
drgn is performance, as it cannot do pointer tracing inside the kernel. In
|
||||
addition, drgn cannot validate a pointer value and may read invalid data if the
|
||||
pointer becomes invalid inside the kernel.
|
||||
|
||||
The BPF iterator solves the above problem by providing flexibility on what data
|
||||
(e.g., tasks, bpf_maps, etc.) to collect by calling BPF programs for each kernel
|
||||
data object.
|
||||
|
||||
----------------------
|
||||
How BPF Iterators Work
|
||||
----------------------
|
||||
|
||||
A BPF iterator is a type of BPF program that allows users to iterate over
|
||||
specific types of kernel objects. Unlike traditional BPF tracing programs that
|
||||
allow users to define callbacks that are invoked at particular points of
|
||||
execution in the kernel, BPF iterators allow users to define callbacks that
|
||||
should be executed for every entry in a variety of kernel data structures.
|
||||
|
||||
For example, users can define a BPF iterator that iterates over every task on
|
||||
the system and dumps the total amount of CPU runtime currently used by each of
|
||||
them. Another BPF task iterator may instead dump the cgroup information for each
|
||||
task. Such flexibility is the core value of BPF iterators.
|
||||
|
||||
A BPF program is always loaded into the kernel at the behest of a user space
|
||||
process. A user space process loads a BPF program by opening and initializing
|
||||
the program skeleton as required and then invoking a syscall to have the BPF
|
||||
program verified and loaded by the kernel.
|
||||
|
||||
In traditional tracing programs, a program is activated by having user space
|
||||
obtain a ``bpf_link`` to the program with ``bpf_program__attach()``. Once
|
||||
activated, the program callback will be invoked whenever the tracepoint is
|
||||
triggered in the main kernel. For BPF iterator programs, a ``bpf_link`` to the
|
||||
program is obtained using ``bpf_link_create()``, and the program callback is
|
||||
invoked by issuing system calls from user space.
|
||||
|
||||
Next, let us see how you can use the iterators to iterate on kernel objects and
|
||||
read data.
|
||||
|
||||
------------------------
|
||||
How to Use BPF iterators
|
||||
------------------------
|
||||
|
||||
BPF selftests are a great resource to illustrate how to use the iterators. In
|
||||
this section, we’ll walk through a BPF selftest which shows how to load and use
|
||||
a BPF iterator program. To begin, we’ll look at `bpf_iter.c
|
||||
<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/prog_tests/bpf_iter.c>`_,
|
||||
which illustrates how to load and trigger BPF iterators on the user space side.
|
||||
Later, we’ll look at a BPF program that runs in kernel space.
|
||||
|
||||
Loading a BPF iterator in the kernel from user space typically involves the
|
||||
following steps:
|
||||
|
||||
* The BPF program is loaded into the kernel through ``libbpf``. Once the kernel
|
||||
has verified and loaded the program, it returns a file descriptor (fd) to user
|
||||
space.
|
||||
* Obtain a ``link_fd`` to the BPF program by calling the ``bpf_link_create()``
|
||||
specified with the BPF program file descriptor received from the kernel.
|
||||
* Next, obtain a BPF iterator file descriptor (``bpf_iter_fd``) by calling the
|
||||
``bpf_iter_create()`` specified with the ``bpf_link`` received from Step 2.
|
||||
* Trigger the iteration by calling ``read(bpf_iter_fd)`` until no data is
|
||||
available.
|
||||
* Close the iterator fd using ``close(bpf_iter_fd)``.
|
||||
* If needed to reread the data, get a new ``bpf_iter_fd`` and do the read again.
|
||||
|
||||
The following are a few examples of selftest BPF iterator programs:
|
||||
|
||||
* `bpf_iter_tcp4.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c>`_
|
||||
* `bpf_iter_task_vma.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c>`_
|
||||
* `bpf_iter_task_file.c <https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c>`_
|
||||
|
||||
Let us look at ``bpf_iter_task_file.c``, which runs in kernel space:
|
||||
|
||||
Here is the definition of ``bpf_iter__task_file`` in `vmlinux.h
|
||||
<https://facebookmicrosites.github.io/bpf/blog/2020/02/19/bpf-portability-and-co-re.html#btf>`_.
|
||||
Any struct name in ``vmlinux.h`` in the format ``bpf_iter__<iter_name>``
|
||||
represents a BPF iterator. The suffix ``<iter_name>`` represents the type of
|
||||
iterator.
|
||||
|
||||
::
|
||||
|
||||
struct bpf_iter__task_file {
|
||||
union {
|
||||
struct bpf_iter_meta *meta;
|
||||
};
|
||||
union {
|
||||
struct task_struct *task;
|
||||
};
|
||||
u32 fd;
|
||||
union {
|
||||
struct file *file;
|
||||
};
|
||||
};
|
||||
|
||||
In the above code, the field 'meta' contains the metadata, which is the same for
|
||||
all BPF iterator programs. The rest of the fields are specific to different
|
||||
iterators. For example, for task_file iterators, the kernel layer provides the
|
||||
'task', 'fd' and 'file' field values. The 'task' and 'file' are `reference
|
||||
counted
|
||||
<https://facebookmicrosites.github.io/bpf/blog/2018/08/31/object-lifetime.html#file-descriptors-and-reference-counters>`_,
|
||||
so they won't go away when the BPF program runs.
|
||||
|
||||
Here is a snippet from the ``bpf_iter_task_file.c`` file:
|
||||
|
||||
::
|
||||
|
||||
SEC("iter/task_file")
|
||||
int dump_task_file(struct bpf_iter__task_file *ctx)
|
||||
{
|
||||
struct seq_file *seq = ctx->meta->seq;
|
||||
struct task_struct *task = ctx->task;
|
||||
struct file *file = ctx->file;
|
||||
__u32 fd = ctx->fd;
|
||||
|
||||
if (task == NULL || file == NULL)
|
||||
return 0;
|
||||
|
||||
if (ctx->meta->seq_num == 0) {
|
||||
count = 0;
|
||||
BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
|
||||
}
|
||||
|
||||
if (tgid == task->tgid && task->tgid != task->pid)
|
||||
count++;
|
||||
|
||||
if (last_tgid != task->tgid) {
|
||||
last_tgid = task->tgid;
|
||||
unique_tgid_count++;
|
||||
}
|
||||
|
||||
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
|
||||
(long)file->f_op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
In the above example, the section name ``SEC(iter/task_file)``, indicates that
|
||||
the program is a BPF iterator program to iterate all files from all tasks. The
|
||||
context of the program is ``bpf_iter__task_file`` struct.
|
||||
|
||||
The user space program invokes the BPF iterator program running in the kernel
|
||||
by issuing a ``read()`` syscall. Once invoked, the BPF
|
||||
program can export data to user space using a variety of BPF helper functions.
|
||||
You can use either ``bpf_seq_printf()`` (and BPF_SEQ_PRINTF helper macro) or
|
||||
``bpf_seq_write()`` function based on whether you need formatted output or just
|
||||
binary data, respectively. For binary-encoded data, the user space applications
|
||||
can process the data from ``bpf_seq_write()`` as needed. For the formatted data,
|
||||
you can use ``cat <path>`` to print the results similar to ``cat
|
||||
/proc/net/netlink`` after pinning the BPF iterator to the bpffs mount. Later,
|
||||
use ``rm -f <path>`` to remove the pinned iterator.
|
||||
|
||||
For example, you can use the following command to create a BPF iterator from the
|
||||
``bpf_iter_ipv6_route.o`` object file and pin it to the ``/sys/fs/bpf/my_route``
|
||||
path:
|
||||
|
||||
::
|
||||
|
||||
$ bpftool iter pin ./bpf_iter_ipv6_route.o /sys/fs/bpf/my_route
|
||||
|
||||
And then print out the results using the following command:
|
||||
|
||||
::
|
||||
|
||||
$ cat /sys/fs/bpf/my_route
|
||||
|
||||
|
||||
-------------------------------------------------------
|
||||
Implement Kernel Support for BPF Iterator Program Types
|
||||
-------------------------------------------------------
|
||||
|
||||
To implement a BPF iterator in the kernel, the developer must make a one-time
|
||||
change to the following key data structure defined in the `bpf.h
|
||||
<https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/tree/include/linux/bpf.h>`_
|
||||
file.
|
||||
|
||||
::
|
||||
|
||||
struct bpf_iter_reg {
|
||||
const char *target;
|
||||
bpf_iter_attach_target_t attach_target;
|
||||
bpf_iter_detach_target_t detach_target;
|
||||
bpf_iter_show_fdinfo_t show_fdinfo;
|
||||
bpf_iter_fill_link_info_t fill_link_info;
|
||||
bpf_iter_get_func_proto_t get_func_proto;
|
||||
u32 ctx_arg_info_size;
|
||||
u32 feature;
|
||||
struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
|
||||
const struct bpf_iter_seq_info *seq_info;
|
||||
};
|
||||
|
||||
After filling the data structure fields, call ``bpf_iter_reg_target()`` to
|
||||
register the iterator to the main BPF iterator subsystem.
|
||||
|
||||
The following is the breakdown for each field in struct ``bpf_iter_reg``.
|
||||
|
||||
.. list-table::
|
||||
:widths: 25 50
|
||||
:header-rows: 1
|
||||
|
||||
* - Fields
|
||||
- Description
|
||||
* - target
|
||||
- Specifies the name of the BPF iterator. For example: ``bpf_map``,
|
||||
``bpf_map_elem``. The name should be different from other ``bpf_iter`` target names in the kernel.
|
||||
* - attach_target and detach_target
|
||||
- Allows for target specific ``link_create`` action since some targets
|
||||
may need special processing. Called during the user space link_create stage.
|
||||
* - show_fdinfo and fill_link_info
|
||||
- Called to fill target specific information when user tries to get link
|
||||
info associated with the iterator.
|
||||
* - get_func_proto
|
||||
- Permits a BPF iterator to access BPF helpers specific to the iterator.
|
||||
* - ctx_arg_info_size and ctx_arg_info
|
||||
- Specifies the verifier states for BPF program arguments associated with
|
||||
the bpf iterator.
|
||||
* - feature
|
||||
- Specifies certain action requests in the kernel BPF iterator
|
||||
infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
|
||||
that the kernel function cond_resched() is called to avoid other kernel
|
||||
subsystem (e.g., rcu) misbehaving.
|
||||
* - seq_info
|
||||
- Specifies certain action requests in the kernel BPF iterator
|
||||
infrastructure. Currently, only BPF_ITER_RESCHED is supported. This means
|
||||
that the kernel function cond_resched() is called to avoid other kernel
|
||||
subsystem (e.g., rcu) misbehaving.
|
||||
|
||||
|
||||
`Click here
|
||||
<https://lore.kernel.org/bpf/20210212183107.50963-2-songliubraving@fb.com/>`_
|
||||
to see an implementation of the ``task_vma`` BPF iterator in the kernel.
|
||||
|
||||
---------------------------------
|
||||
Parameterizing BPF Task Iterators
|
||||
---------------------------------
|
||||
|
||||
By default, BPF iterators walk through all the objects of the specified types
|
||||
(processes, cgroups, maps, etc.) across the entire system to read relevant
|
||||
kernel data. But often, there are cases where we only care about a much smaller
|
||||
subset of iterable kernel objects, such as only iterating tasks within a
|
||||
specific process. Therefore, BPF iterator programs support filtering out objects
|
||||
from iteration by allowing user space to configure the iterator program when it
|
||||
is attached.
|
||||
|
||||
--------------------------
|
||||
BPF Task Iterator Program
|
||||
--------------------------
|
||||
|
||||
The following code is a BPF iterator program to print files and task information
|
||||
through the ``seq_file`` of the iterator. It is a standard BPF iterator program
|
||||
that visits every file of an iterator. We will use this BPF program in our
|
||||
example later.
|
||||
|
||||
::
|
||||
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
SEC("iter/task_file")
|
||||
int dump_task_file(struct bpf_iter__task_file *ctx)
|
||||
{
|
||||
struct seq_file *seq = ctx->meta->seq;
|
||||
struct task_struct *task = ctx->task;
|
||||
struct file *file = ctx->file;
|
||||
__u32 fd = ctx->fd;
|
||||
if (task == NULL || file == NULL)
|
||||
return 0;
|
||||
if (ctx->meta->seq_num == 0) {
|
||||
BPF_SEQ_PRINTF(seq, " tgid pid fd file\n");
|
||||
}
|
||||
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
|
||||
(long)file->f_op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
----------------------------------------
|
||||
Creating a File Iterator with Parameters
|
||||
----------------------------------------
|
||||
|
||||
Now, let us look at how to create an iterator that includes only files of a
|
||||
process.
|
||||
|
||||
First, fill the ``bpf_iter_attach_opts`` struct as shown below:
|
||||
|
||||
::
|
||||
|
||||
LIBBPF_OPTS(bpf_iter_attach_opts, opts);
|
||||
union bpf_iter_link_info linfo;
|
||||
memset(&linfo, 0, sizeof(linfo));
|
||||
linfo.task.pid = getpid();
|
||||
opts.link_info = &linfo;
|
||||
opts.link_info_len = sizeof(linfo);
|
||||
|
||||
``linfo.task.pid``, if it is non-zero, directs the kernel to create an iterator
|
||||
that only includes opened files for the process with the specified ``pid``. In
|
||||
this example, we will only be iterating files for our process. If
|
||||
``linfo.task.pid`` is zero, the iterator will visit every opened file of every
|
||||
process. Similarly, ``linfo.task.tid`` directs the kernel to create an iterator
|
||||
that visits opened files of a specific thread, not a process. In this example,
|
||||
``linfo.task.tid`` is different from ``linfo.task.pid`` only if the thread has a
|
||||
separate file descriptor table. In most circumstances, all process threads share
|
||||
a single file descriptor table.
|
||||
|
||||
Now, in the userspace program, pass the pointer of struct to the
|
||||
``bpf_program__attach_iter()``.
|
||||
|
||||
::
|
||||
|
||||
link = bpf_program__attach_iter(prog, &opts); iter_fd =
|
||||
bpf_iter_create(bpf_link__fd(link));
|
||||
|
||||
If both *tid* and *pid* are zero, an iterator created from this struct
|
||||
``bpf_iter_attach_opts`` will include every opened file of every task in the
|
||||
system (in the namespace, actually.) It is the same as passing a NULL as the
|
||||
second argument to ``bpf_program__attach_iter()``.
|
||||
|
||||
The whole program looks like the following code:
|
||||
|
||||
::
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include "bpf_iter_task_ex.skel.h"
|
||||
|
||||
static int do_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
|
||||
{
|
||||
struct bpf_link *link;
|
||||
char buf[16] = {};
|
||||
int iter_fd = -1, len;
|
||||
int ret = 0;
|
||||
|
||||
link = bpf_program__attach_iter(prog, opts);
|
||||
if (!link) {
|
||||
fprintf(stderr, "bpf_program__attach_iter() fails\n");
|
||||
return -1;
|
||||
}
|
||||
iter_fd = bpf_iter_create(bpf_link__fd(link));
|
||||
if (iter_fd < 0) {
|
||||
fprintf(stderr, "bpf_iter_create() fails\n");
|
||||
ret = -1;
|
||||
goto free_link;
|
||||
}
|
||||
/* not check contents, but ensure read() ends without error */
|
||||
while ((len = read(iter_fd, buf, sizeof(buf) - 1)) > 0) {
|
||||
buf[len] = 0;
|
||||
printf("%s", buf);
|
||||
}
|
||||
printf("\n");
|
||||
free_link:
|
||||
if (iter_fd >= 0)
|
||||
close(iter_fd);
|
||||
bpf_link__destroy(link);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void test_task_file(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_iter_attach_opts, opts);
|
||||
struct bpf_iter_task_ex *skel;
|
||||
union bpf_iter_link_info linfo;
|
||||
skel = bpf_iter_task_ex__open_and_load();
|
||||
if (skel == NULL)
|
||||
return;
|
||||
memset(&linfo, 0, sizeof(linfo));
|
||||
linfo.task.pid = getpid();
|
||||
opts.link_info = &linfo;
|
||||
opts.link_info_len = sizeof(linfo);
|
||||
printf("PID %d\n", getpid());
|
||||
do_read_opts(skel->progs.dump_task_file, &opts);
|
||||
bpf_iter_task_ex__destroy(skel);
|
||||
}
|
||||
|
||||
int main(int argc, const char * const * argv)
|
||||
{
|
||||
test_task_file();
|
||||
return 0;
|
||||
}
|
||||
|
||||
The following lines are the output of the program.
|
||||
::
|
||||
|
||||
PID 1859
|
||||
|
||||
tgid pid fd file
|
||||
1859 1859 0 ffffffff82270aa0
|
||||
1859 1859 1 ffffffff82270aa0
|
||||
1859 1859 2 ffffffff82270aa0
|
||||
1859 1859 3 ffffffff82272980
|
||||
1859 1859 4 ffffffff8225e120
|
||||
1859 1859 5 ffffffff82255120
|
||||
1859 1859 6 ffffffff82254f00
|
||||
1859 1859 7 ffffffff82254d80
|
||||
1859 1859 8 ffffffff8225abe0
|
||||
|
||||
------------------
|
||||
Without Parameters
|
||||
------------------
|
||||
|
||||
Let us look at how a BPF iterator without parameters skips files of other
|
||||
processes in the system. In this case, the BPF program has to check the pid or
|
||||
the tid of tasks, or it will receive every opened file in the system (in the
|
||||
current *pid* namespace, actually). So, we usually add a global variable in the
|
||||
BPF program to pass a *pid* to the BPF program.
|
||||
|
||||
The BPF program would look like the following block.
|
||||
|
||||
::
|
||||
|
||||
......
|
||||
int target_pid = 0;
|
||||
|
||||
SEC("iter/task_file")
|
||||
int dump_task_file(struct bpf_iter__task_file *ctx)
|
||||
{
|
||||
......
|
||||
if (task->tgid != target_pid) /* Check task->pid instead to check thread IDs */
|
||||
return 0;
|
||||
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
|
||||
(long)file->f_op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
The user space program would look like the following block:
|
||||
|
||||
::
|
||||
|
||||
......
|
||||
static void test_task_file(void)
|
||||
{
|
||||
......
|
||||
skel = bpf_iter_task_ex__open_and_load();
|
||||
if (skel == NULL)
|
||||
return;
|
||||
skel->bss->target_pid = getpid(); /* process ID. For thread id, use gettid() */
|
||||
memset(&linfo, 0, sizeof(linfo));
|
||||
linfo.task.pid = getpid();
|
||||
opts.link_info = &linfo;
|
||||
opts.link_info_len = sizeof(linfo);
|
||||
......
|
||||
}
|
||||
|
||||
``target_pid`` is a global variable in the BPF program. The user space program
|
||||
should initialize the variable with a process ID to skip opened files of other
|
||||
processes in the BPF program. When you parametrize a BPF iterator, the iterator
|
||||
calls the BPF program fewer times which can save significant resources.
|
||||
|
||||
---------------------------
|
||||
Parametrizing VMA Iterators
|
||||
---------------------------
|
||||
|
||||
By default, a BPF VMA iterator includes every VMA in every process. However,
|
||||
you can still specify a process or a thread to include only its VMAs. Unlike
|
||||
files, a thread can not have a separate address space (since Linux 2.6.0-test6).
|
||||
Here, using *tid* makes no difference from using *pid*.
|
||||
|
||||
----------------------------
|
||||
Parametrizing Task Iterators
|
||||
----------------------------
|
||||
|
||||
A BPF task iterator with *pid* includes all tasks (threads) of a process. The
|
||||
BPF program receives these tasks one after another. You can specify a BPF task
|
||||
iterator with *tid* parameter to include only the tasks that match the given
|
||||
*tid*.
|
@ -24,6 +24,7 @@ that goes into great technical depth about the BPF Architecture.
|
||||
maps
|
||||
bpf_prog_run
|
||||
classic_vs_extended.rst
|
||||
bpf_iterators
|
||||
bpf_licensing
|
||||
test_debug
|
||||
clang-notes
|
||||
|
@ -122,11 +122,11 @@ BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below)
|
||||
|
||||
``BPF_XOR | BPF_K | BPF_ALU`` means::
|
||||
|
||||
src_reg = (u32) src_reg ^ (u32) imm32
|
||||
dst_reg = (u32) dst_reg ^ (u32) imm32
|
||||
|
||||
``BPF_XOR | BPF_K | BPF_ALU64`` means::
|
||||
|
||||
src_reg = src_reg ^ imm32
|
||||
dst_reg = dst_reg ^ imm32
|
||||
|
||||
|
||||
Byte swap instructions
|
||||
|
@ -191,6 +191,15 @@ rebooting or panicking. Due to this additional restrictions apply to these
|
||||
calls. At the moment they only require CAP_SYS_BOOT capability, but more can be
|
||||
added later.
|
||||
|
||||
2.4.8 KF_RCU flag
|
||||
-----------------
|
||||
|
||||
The KF_RCU flag is used for kfuncs which have a rcu ptr as its argument.
|
||||
When used together with KF_ACQUIRE, it indicates the kfunc should have a
|
||||
single argument which must be a trusted argument or a MEM_RCU pointer.
|
||||
The argument may have reference count of 0 and the kfunc must take this
|
||||
into consideration.
|
||||
|
||||
2.5 Registering the kfuncs
|
||||
--------------------------
|
||||
|
||||
@ -213,3 +222,201 @@ type. An example is shown below::
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_task_kfunc_set);
|
||||
}
|
||||
late_initcall(init_subsystem);
|
||||
|
||||
3. Core kfuncs
|
||||
==============
|
||||
|
||||
The BPF subsystem provides a number of "core" kfuncs that are potentially
|
||||
applicable to a wide variety of different possible use cases and programs.
|
||||
Those kfuncs are documented here.
|
||||
|
||||
3.1 struct task_struct * kfuncs
|
||||
-------------------------------
|
||||
|
||||
There are a number of kfuncs that allow ``struct task_struct *`` objects to be
|
||||
used as kptrs:
|
||||
|
||||
.. kernel-doc:: kernel/bpf/helpers.c
|
||||
:identifiers: bpf_task_acquire bpf_task_release
|
||||
|
||||
These kfuncs are useful when you want to acquire or release a reference to a
|
||||
``struct task_struct *`` that was passed as e.g. a tracepoint arg, or a
|
||||
struct_ops callback arg. For example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/**
|
||||
* A trivial example tracepoint program that shows how to
|
||||
* acquire and release a struct task_struct * pointer.
|
||||
*/
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(task_acquire_release_example, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct task_struct *acquired;
|
||||
|
||||
acquired = bpf_task_acquire(task);
|
||||
|
||||
/*
|
||||
* In a typical program you'd do something like store
|
||||
* the task in a map, and the map will automatically
|
||||
* release it later. Here, we release it manually.
|
||||
*/
|
||||
bpf_task_release(acquired);
|
||||
return 0;
|
||||
}
|
||||
|
||||
----
|
||||
|
||||
A BPF program can also look up a task from a pid. This can be useful if the
|
||||
caller doesn't have a trusted pointer to a ``struct task_struct *`` object that
|
||||
it can acquire a reference on with bpf_task_acquire().
|
||||
|
||||
.. kernel-doc:: kernel/bpf/helpers.c
|
||||
:identifiers: bpf_task_from_pid
|
||||
|
||||
Here is an example of it being used:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(task_get_pid_example, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct task_struct *lookup;
|
||||
|
||||
lookup = bpf_task_from_pid(task->pid);
|
||||
if (!lookup)
|
||||
/* A task should always be found, as %task is a tracepoint arg. */
|
||||
return -ENOENT;
|
||||
|
||||
if (lookup->pid != task->pid) {
|
||||
/* bpf_task_from_pid() looks up the task via its
|
||||
* globally-unique pid from the init_pid_ns. Thus,
|
||||
* the pid of the lookup task should always be the
|
||||
* same as the input task.
|
||||
*/
|
||||
bpf_task_release(lookup);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* bpf_task_from_pid() returns an acquired reference,
|
||||
* so it must be dropped before returning from the
|
||||
* tracepoint handler.
|
||||
*/
|
||||
bpf_task_release(lookup);
|
||||
return 0;
|
||||
}
|
||||
|
||||
3.2 struct cgroup * kfuncs
|
||||
--------------------------
|
||||
|
||||
``struct cgroup *`` objects also have acquire and release functions:
|
||||
|
||||
.. kernel-doc:: kernel/bpf/helpers.c
|
||||
:identifiers: bpf_cgroup_acquire bpf_cgroup_release
|
||||
|
||||
These kfuncs are used in exactly the same manner as bpf_task_acquire() and
|
||||
bpf_task_release() respectively, so we won't provide examples for them.
|
||||
|
||||
----
|
||||
|
||||
You may also acquire a reference to a ``struct cgroup`` kptr that's already
|
||||
stored in a map using bpf_cgroup_kptr_get():
|
||||
|
||||
.. kernel-doc:: kernel/bpf/helpers.c
|
||||
:identifiers: bpf_cgroup_kptr_get
|
||||
|
||||
Here's an example of how it can be used:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/* struct containing the struct task_struct kptr which is actually stored in the map. */
|
||||
struct __cgroups_kfunc_map_value {
|
||||
struct cgroup __kptr_ref * cgroup;
|
||||
};
|
||||
|
||||
/* The map containing struct __cgroups_kfunc_map_value entries. */
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_HASH);
|
||||
__type(key, int);
|
||||
__type(value, struct __cgroups_kfunc_map_value);
|
||||
__uint(max_entries, 1);
|
||||
} __cgroups_kfunc_map SEC(".maps");
|
||||
|
||||
/* ... */
|
||||
|
||||
/**
|
||||
* A simple example tracepoint program showing how a
|
||||
* struct cgroup kptr that is stored in a map can
|
||||
* be acquired using the bpf_cgroup_kptr_get() kfunc.
|
||||
*/
|
||||
SEC("tp_btf/cgroup_mkdir")
|
||||
int BPF_PROG(cgroup_kptr_get_example, struct cgroup *cgrp, const char *path)
|
||||
{
|
||||
struct cgroup *kptr;
|
||||
struct __cgroups_kfunc_map_value *v;
|
||||
s32 id = cgrp->self.id;
|
||||
|
||||
/* Assume a cgroup kptr was previously stored in the map. */
|
||||
v = bpf_map_lookup_elem(&__cgroups_kfunc_map, &id);
|
||||
if (!v)
|
||||
return -ENOENT;
|
||||
|
||||
/* Acquire a reference to the cgroup kptr that's already stored in the map. */
|
||||
kptr = bpf_cgroup_kptr_get(&v->cgroup);
|
||||
if (!kptr)
|
||||
/* If no cgroup was present in the map, it's because
|
||||
* we're racing with another CPU that removed it with
|
||||
* bpf_kptr_xchg() between the bpf_map_lookup_elem()
|
||||
* above, and our call to bpf_cgroup_kptr_get().
|
||||
* bpf_cgroup_kptr_get() internally safely handles this
|
||||
* race, and will return NULL if the task is no longer
|
||||
* present in the map by the time we invoke the kfunc.
|
||||
*/
|
||||
return -EBUSY;
|
||||
|
||||
/* Free the reference we just took above. Note that the
|
||||
* original struct cgroup kptr is still in the map. It will
|
||||
* be freed either at a later time if another context deletes
|
||||
* it from the map, or automatically by the BPF subsystem if
|
||||
* it's still present when the map is destroyed.
|
||||
*/
|
||||
bpf_cgroup_release(kptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
----
|
||||
|
||||
Another kfunc available for interacting with ``struct cgroup *`` objects is
|
||||
bpf_cgroup_ancestor(). This allows callers to access the ancestor of a cgroup,
|
||||
and return it as a cgroup kptr.
|
||||
|
||||
.. kernel-doc:: kernel/bpf/helpers.c
|
||||
:identifiers: bpf_cgroup_ancestor
|
||||
|
||||
Eventually, BPF should be updated to allow this to happen with a normal memory
|
||||
load in the program itself. This is currently not possible without more work in
|
||||
the verifier. bpf_cgroup_ancestor() can be used as follows:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/**
|
||||
* Simple tracepoint example that illustrates how a cgroup's
|
||||
* ancestor can be accessed using bpf_cgroup_ancestor().
|
||||
*/
|
||||
SEC("tp_btf/cgroup_mkdir")
|
||||
int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
|
||||
{
|
||||
struct cgroup *parent;
|
||||
|
||||
/* The parent cgroup resides at the level before the current cgroup's level. */
|
||||
parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
|
||||
if (!parent)
|
||||
return -ENOENT;
|
||||
|
||||
bpf_printk("Parent id is %d", parent->self.id);
|
||||
|
||||
/* Return the parent cgroup that was acquired above. */
|
||||
bpf_cgroup_release(parent);
|
||||
return 0;
|
||||
}
|
||||
|
155
Documentation/bpf/map_sk_storage.rst
Normal file
155
Documentation/bpf/map_sk_storage.rst
Normal file
@ -0,0 +1,155 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0-only
|
||||
.. Copyright (C) 2022 Red Hat, Inc.
|
||||
|
||||
=======================
|
||||
BPF_MAP_TYPE_SK_STORAGE
|
||||
=======================
|
||||
|
||||
.. note::
|
||||
- ``BPF_MAP_TYPE_SK_STORAGE`` was introduced in kernel version 5.2
|
||||
|
||||
``BPF_MAP_TYPE_SK_STORAGE`` is used to provide socket-local storage for BPF
|
||||
programs. A map of type ``BPF_MAP_TYPE_SK_STORAGE`` declares the type of storage
|
||||
to be provided and acts as the handle for accessing the socket-local
|
||||
storage. The values for maps of type ``BPF_MAP_TYPE_SK_STORAGE`` are stored
|
||||
locally with each socket instead of with the map. The kernel is responsible for
|
||||
allocating storage for a socket when requested and for freeing the storage when
|
||||
either the map or the socket is deleted.
|
||||
|
||||
.. note::
|
||||
- The key type must be ``int`` and ``max_entries`` must be set to ``0``.
|
||||
- The ``BPF_F_NO_PREALLOC`` flag must be used when creating a map for
|
||||
socket-local storage.
|
||||
|
||||
Usage
|
||||
=====
|
||||
|
||||
Kernel BPF
|
||||
----------
|
||||
|
||||
bpf_sk_storage_get()
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
|
||||
|
||||
Socket-local storage can be retrieved using the ``bpf_sk_storage_get()``
|
||||
helper. The helper gets the storage from ``sk`` that is associated with ``map``.
|
||||
If the ``BPF_LOCAL_STORAGE_GET_F_CREATE`` flag is used then
|
||||
``bpf_sk_storage_get()`` will create the storage for ``sk`` if it does not
|
||||
already exist. ``value`` can be used together with
|
||||
``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise it
|
||||
will be zero initialized. Returns a pointer to the storage on success, or
|
||||
``NULL`` in case of failure.
|
||||
|
||||
.. note::
|
||||
- ``sk`` is a kernel ``struct sock`` pointer for LSM or tracing programs.
|
||||
- ``sk`` is a ``struct bpf_sock`` pointer for other program types.
|
||||
|
||||
bpf_sk_storage_delete()
|
||||
~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
|
||||
|
||||
Socket-local storage can be deleted using the ``bpf_sk_storage_delete()``
|
||||
helper. The helper deletes the storage from ``sk`` that is identified by
|
||||
``map``. Returns ``0`` on success, or negative error in case of failure.
|
||||
|
||||
User space
|
||||
----------
|
||||
|
||||
bpf_map_update_elem()
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int bpf_map_update_elem(int map_fd, const void *key, const void *value, __u64 flags)
|
||||
|
||||
Socket-local storage for the socket identified by ``key`` belonging to
|
||||
``map_fd`` can be added or updated using the ``bpf_map_update_elem()`` libbpf
|
||||
function. ``key`` must be a pointer to a valid ``fd`` in the user space
|
||||
program. The ``flags`` parameter can be used to control the update behaviour:
|
||||
|
||||
- ``BPF_ANY`` will create storage for ``fd`` or update existing storage.
|
||||
- ``BPF_NOEXIST`` will create storage for ``fd`` only if it did not already
|
||||
exist, otherwise the call will fail with ``-EEXIST``.
|
||||
- ``BPF_EXIST`` will update existing storage for ``fd`` if it already exists,
|
||||
otherwise the call will fail with ``-ENOENT``.
|
||||
|
||||
Returns ``0`` on success, or negative error in case of failure.
|
||||
|
||||
bpf_map_lookup_elem()
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int bpf_map_lookup_elem(int map_fd, const void *key, void *value)
|
||||
|
||||
Socket-local storage for the socket identified by ``key`` belonging to
|
||||
``map_fd`` can be retrieved using the ``bpf_map_lookup_elem()`` libbpf
|
||||
function. ``key`` must be a pointer to a valid ``fd`` in the user space
|
||||
program. Returns ``0`` on success, or negative error in case of failure.
|
||||
|
||||
bpf_map_delete_elem()
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int bpf_map_delete_elem(int map_fd, const void *key)
|
||||
|
||||
Socket-local storage for the socket identified by ``key`` belonging to
|
||||
``map_fd`` can be deleted using the ``bpf_map_delete_elem()`` libbpf
|
||||
function. Returns ``0`` on success, or negative error in case of failure.
|
||||
|
||||
Examples
|
||||
========
|
||||
|
||||
Kernel BPF
|
||||
----------
|
||||
|
||||
This snippet shows how to declare socket-local storage in a BPF program:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, struct my_storage);
|
||||
} socket_storage SEC(".maps");
|
||||
|
||||
This snippet shows how to retrieve socket-local storage in a BPF program:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
SEC("sockops")
|
||||
int _sockops(struct bpf_sock_ops *ctx)
|
||||
{
|
||||
struct my_storage *storage;
|
||||
struct bpf_sock *sk;
|
||||
|
||||
sk = ctx->sk;
|
||||
if (!sk)
|
||||
return 1;
|
||||
|
||||
storage = bpf_sk_storage_get(&socket_storage, sk, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!storage)
|
||||
return 1;
|
||||
|
||||
/* Use 'storage' here */
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
Please see the ``tools/testing/selftests/bpf`` directory for functional
|
||||
examples.
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
https://lwn.net/ml/netdev/20190426171103.61892-1-kafai@fb.com/
|
@ -136,6 +136,25 @@ static bool in_auipc_jalr_range(s64 val)
|
||||
val < ((1L << 31) - (1L << 11));
|
||||
}
|
||||
|
||||
/* Emit fixed-length instructions for address */
|
||||
static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
|
||||
{
|
||||
u64 ip = (u64)(ctx->insns + ctx->ninsns);
|
||||
s64 off = addr - ip;
|
||||
s64 upper = (off + (1 << 11)) >> 12;
|
||||
s64 lower = off & 0xfff;
|
||||
|
||||
if (extra_pass && !in_auipc_jalr_range(off)) {
|
||||
pr_err("bpf-jit: target offset 0x%llx is out of range\n", off);
|
||||
return -ERANGE;
|
||||
}
|
||||
|
||||
emit(rv_auipc(rd, upper), ctx);
|
||||
emit(rv_addi(rd, rd, lower), ctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Emit variable-length instructions for 32-bit and 64-bit imm */
|
||||
static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
|
||||
{
|
||||
/* Note that the immediate from the add is sign-extended,
|
||||
@ -1050,7 +1069,15 @@ out_be:
|
||||
u64 imm64;
|
||||
|
||||
imm64 = (u64)insn1.imm << 32 | (u32)imm;
|
||||
emit_imm(rd, imm64, ctx);
|
||||
if (bpf_pseudo_func(insn)) {
|
||||
/* fixed-length insns for extra jit pass */
|
||||
ret = emit_addr(rd, imm64, extra_pass, ctx);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
emit_imm(rd, imm64, ctx);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -775,7 +775,7 @@ enum bpf_reg_type {
|
||||
PTR_TO_MEM, /* reg points to valid memory region */
|
||||
PTR_TO_BUF, /* reg points to a read/write buffer */
|
||||
PTR_TO_FUNC, /* reg points to a bpf program function */
|
||||
PTR_TO_DYNPTR, /* reg points to a dynptr */
|
||||
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
|
||||
__BPF_REG_TYPE_MAX,
|
||||
|
||||
/* Extended reg_types. */
|
||||
@ -1909,11 +1909,6 @@ static inline bool bpf_allow_uninit_stack(void)
|
||||
return perfmon_capable();
|
||||
}
|
||||
|
||||
static inline bool bpf_allow_ptr_to_map_access(void)
|
||||
{
|
||||
return perfmon_capable();
|
||||
}
|
||||
|
||||
static inline bool bpf_bypass_spec_v1(void)
|
||||
{
|
||||
return perfmon_capable();
|
||||
@ -2833,7 +2828,7 @@ void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data,
|
||||
enum bpf_dynptr_type type, u32 offset, u32 size);
|
||||
void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
|
||||
int bpf_dynptr_check_size(u32 size);
|
||||
u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr);
|
||||
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr);
|
||||
|
||||
#ifdef CONFIG_BPF_LSM
|
||||
void bpf_cgroup_atype_get(u32 attach_btf_id, int cgroup_atype);
|
||||
|
@ -28,6 +28,7 @@ int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
|
||||
const struct bpf_prog *prog);
|
||||
|
||||
bool bpf_lsm_is_sleepable_hook(u32 btf_id);
|
||||
bool bpf_lsm_is_trusted(const struct bpf_prog *prog);
|
||||
|
||||
static inline struct bpf_storage_blob *bpf_inode(
|
||||
const struct inode *inode)
|
||||
@ -51,6 +52,11 @@ static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id)
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline int bpf_lsm_verify_prog(struct bpf_verifier_log *vlog,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
|
@ -273,9 +273,9 @@ struct bpf_id_pair {
|
||||
u32 cur;
|
||||
};
|
||||
|
||||
/* Maximum number of register states that can exist at once */
|
||||
#define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
|
||||
#define MAX_CALL_FRAMES 8
|
||||
/* Maximum number of register states that can exist at once */
|
||||
#define BPF_ID_MAP_SIZE ((MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE) * MAX_CALL_FRAMES)
|
||||
struct bpf_verifier_state {
|
||||
/* call stack tracking */
|
||||
struct bpf_func_state *frame[MAX_CALL_FRAMES];
|
||||
@ -452,6 +452,7 @@ struct bpf_insn_aux_data {
|
||||
/* below fields are initialized once */
|
||||
unsigned int orig_idx; /* original instruction index */
|
||||
bool prune_point;
|
||||
bool jmp_point;
|
||||
};
|
||||
|
||||
#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
|
||||
@ -531,7 +532,6 @@ struct bpf_verifier_env {
|
||||
bool explore_alu_limits;
|
||||
bool allow_ptr_leaks;
|
||||
bool allow_uninit_stack;
|
||||
bool allow_ptr_to_map_access;
|
||||
bool bpf_capable;
|
||||
bool bypass_spec_v1;
|
||||
bool bypass_spec_v4;
|
||||
@ -615,11 +615,9 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
|
||||
enum bpf_arg_type arg_type);
|
||||
int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
|
||||
u32 regno, u32 mem_size);
|
||||
bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg);
|
||||
bool is_dynptr_type_expected(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg,
|
||||
enum bpf_arg_type arg_type);
|
||||
struct bpf_call_arg_meta;
|
||||
int process_dynptr_func(struct bpf_verifier_env *env, int regno,
|
||||
enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta);
|
||||
|
||||
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
|
||||
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
|
||||
@ -683,7 +681,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
|
||||
}
|
||||
}
|
||||
|
||||
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
|
||||
#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
|
||||
|
||||
static inline bool bpf_type_has_unsafe_modifiers(u32 type)
|
||||
{
|
||||
|
@ -70,6 +70,7 @@
|
||||
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
|
||||
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
|
||||
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
|
||||
#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
|
||||
|
||||
/*
|
||||
* Return the name of the passed struct, if exists, or halt the build if for
|
||||
@ -477,8 +478,10 @@ struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog);
|
||||
u32 *btf_kfunc_id_set_contains(const struct btf *btf,
|
||||
enum bpf_prog_type prog_type,
|
||||
u32 kfunc_btf_id);
|
||||
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id);
|
||||
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
|
||||
const struct btf_kfunc_id_set *s);
|
||||
int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset);
|
||||
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id);
|
||||
int register_btf_id_dtor_kfuncs(const struct btf_id_dtor_kfunc *dtors, u32 add_cnt,
|
||||
struct module *owner);
|
||||
|
@ -266,5 +266,6 @@ MAX_BTF_TRACING_TYPE,
|
||||
|
||||
extern u32 btf_tracing_ids[];
|
||||
extern u32 bpf_cgroup_btf_id[];
|
||||
extern u32 bpf_local_storage_map_btf_id[];
|
||||
|
||||
#endif
|
||||
|
@ -82,6 +82,7 @@ struct sk_psock {
|
||||
u32 apply_bytes;
|
||||
u32 cork_bytes;
|
||||
u32 eval;
|
||||
bool redir_ingress; /* undefined if sk_redir is null */
|
||||
struct sk_msg *cork;
|
||||
struct sk_psock_progs progs;
|
||||
#if IS_ENABLED(CONFIG_BPF_STREAM_PARSER)
|
||||
|
@ -26,6 +26,7 @@ struct macsec_info {
|
||||
struct xfrm_md_info {
|
||||
u32 if_id;
|
||||
int link;
|
||||
struct dst_entry *dst_orig;
|
||||
};
|
||||
|
||||
struct metadata_dst {
|
||||
|
@ -2,8 +2,8 @@
|
||||
#ifndef __NETNS_XDP_H__
|
||||
#define __NETNS_XDP_H__
|
||||
|
||||
#include <linux/rculist.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct netns_xdp {
|
||||
struct mutex lock;
|
||||
|
@ -2323,8 +2323,8 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
|
||||
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
|
||||
#endif /* CONFIG_BPF_SYSCALL */
|
||||
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
|
||||
int flags);
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
|
||||
struct sk_msg *msg, u32 bytes, int flags);
|
||||
#endif /* CONFIG_NET_SOCK_MSG */
|
||||
|
||||
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
|
||||
|
@ -2164,4 +2164,21 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
|
||||
(IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
|
||||
|
||||
extern struct metadata_dst __percpu *xfrm_bpf_md_dst;
|
||||
|
||||
int register_xfrm_interface_bpf(void);
|
||||
|
||||
#else
|
||||
|
||||
static inline int register_xfrm_interface_bpf(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _NET_XFRM_H */
|
||||
|
@ -5293,7 +5293,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* Nothing. Always succeeds.
|
||||
*
|
||||
* long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
|
||||
* long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
|
||||
* Description
|
||||
* Read *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *src*.
|
||||
@ -5303,7 +5303,7 @@ union bpf_attr {
|
||||
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
|
||||
* *flags* is not 0.
|
||||
*
|
||||
* long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
|
||||
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
|
||||
* Description
|
||||
* Write *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *dst*.
|
||||
@ -5313,7 +5313,7 @@ union bpf_attr {
|
||||
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
|
||||
* is a read-only dynptr or if *flags* is not 0.
|
||||
*
|
||||
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
|
||||
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
|
||||
* Description
|
||||
* Get a pointer to the underlying dynptr data.
|
||||
*
|
||||
@ -5414,7 +5414,7 @@ union bpf_attr {
|
||||
* Drain samples from the specified user ring buffer, and invoke
|
||||
* the provided callback for each such sample:
|
||||
*
|
||||
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
|
||||
* long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
|
||||
*
|
||||
* If **callback_fn** returns 0, the helper will continue to try
|
||||
* and drain the next sample, up to a maximum of
|
||||
|
@ -211,7 +211,6 @@ BPF_CALL_2(bpf_cgrp_storage_delete, struct bpf_map *, map, struct cgroup *, cgro
|
||||
return ret;
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(cgroup_storage_map_btf_ids, struct, bpf_local_storage_map)
|
||||
const struct bpf_map_ops cgrp_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
@ -222,7 +221,7 @@ const struct bpf_map_ops cgrp_storage_map_ops = {
|
||||
.map_update_elem = bpf_cgrp_storage_update_elem,
|
||||
.map_delete_elem = bpf_cgrp_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_id = &cgroup_storage_map_btf_ids[0],
|
||||
.map_btf_id = &bpf_local_storage_map_btf_id[0],
|
||||
.map_owner_storage_ptr = cgroup_storage_ptr,
|
||||
};
|
||||
|
||||
|
@ -213,8 +213,6 @@ static void inode_storage_map_free(struct bpf_map *map)
|
||||
bpf_local_storage_map_free(map, &inode_cache, NULL);
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(inode_storage_map_btf_ids, struct,
|
||||
bpf_local_storage_map)
|
||||
const struct bpf_map_ops inode_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
@ -225,7 +223,7 @@ const struct bpf_map_ops inode_storage_map_ops = {
|
||||
.map_update_elem = bpf_fd_inode_storage_update_elem,
|
||||
.map_delete_elem = bpf_fd_inode_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_id = &inode_storage_map_btf_ids[0],
|
||||
.map_btf_id = &bpf_local_storage_map_btf_id[0],
|
||||
.map_owner_storage_ptr = inode_storage_ptr,
|
||||
};
|
||||
|
||||
|
@ -345,11 +345,27 @@ BTF_ID(func, bpf_lsm_task_to_inode)
|
||||
BTF_ID(func, bpf_lsm_userns_create)
|
||||
BTF_SET_END(sleepable_lsm_hooks)
|
||||
|
||||
BTF_SET_START(untrusted_lsm_hooks)
|
||||
BTF_ID(func, bpf_lsm_bpf_map_free_security)
|
||||
BTF_ID(func, bpf_lsm_bpf_prog_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_bpf_prog_free_security)
|
||||
BTF_ID(func, bpf_lsm_file_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_file_free_security)
|
||||
BTF_ID(func, bpf_lsm_sk_alloc_security)
|
||||
BTF_ID(func, bpf_lsm_sk_free_security)
|
||||
BTF_ID(func, bpf_lsm_task_free)
|
||||
BTF_SET_END(untrusted_lsm_hooks)
|
||||
|
||||
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
|
||||
{
|
||||
return btf_id_set_contains(&sleepable_lsm_hooks, btf_id);
|
||||
}
|
||||
|
||||
bool bpf_lsm_is_trusted(const struct bpf_prog *prog)
|
||||
{
|
||||
return !btf_id_set_contains(&untrusted_lsm_hooks, prog->aux->attach_btf_id);
|
||||
}
|
||||
|
||||
const struct bpf_prog_ops lsm_prog_ops = {
|
||||
};
|
||||
|
||||
|
@ -324,7 +324,7 @@ static void task_storage_map_free(struct bpf_map *map)
|
||||
bpf_local_storage_map_free(map, &task_cache, &bpf_task_storage_busy);
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(task_storage_map_btf_ids, struct, bpf_local_storage_map)
|
||||
BTF_ID_LIST_GLOBAL_SINGLE(bpf_local_storage_map_btf_id, struct, bpf_local_storage_map)
|
||||
const struct bpf_map_ops task_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
@ -335,7 +335,7 @@ const struct bpf_map_ops task_storage_map_ops = {
|
||||
.map_update_elem = bpf_pid_task_storage_update_elem,
|
||||
.map_delete_elem = bpf_pid_task_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_id = &task_storage_map_btf_ids[0],
|
||||
.map_btf_id = &bpf_local_storage_map_btf_id[0],
|
||||
.map_owner_storage_ptr = task_storage_ptr,
|
||||
};
|
||||
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/bpf_verifier.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/bpf_lsm.h>
|
||||
#include <linux/skmsg.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/bsearch.h>
|
||||
@ -205,6 +206,7 @@ enum btf_kfunc_hook {
|
||||
BTF_KFUNC_HOOK_STRUCT_OPS,
|
||||
BTF_KFUNC_HOOK_TRACING,
|
||||
BTF_KFUNC_HOOK_SYSCALL,
|
||||
BTF_KFUNC_HOOK_FMODRET,
|
||||
BTF_KFUNC_HOOK_MAX,
|
||||
};
|
||||
|
||||
@ -5829,6 +5831,7 @@ static bool prog_args_trusted(const struct bpf_prog *prog)
|
||||
case BPF_PROG_TYPE_TRACING:
|
||||
return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER;
|
||||
case BPF_PROG_TYPE_LSM:
|
||||
return bpf_lsm_is_trusted(prog);
|
||||
case BPF_PROG_TYPE_STRUCT_OPS:
|
||||
return true;
|
||||
default:
|
||||
@ -7606,11 +7609,14 @@ u32 *btf_kfunc_id_set_contains(const struct btf *btf,
|
||||
return __btf_kfunc_id_set_contains(btf, hook, kfunc_btf_id);
|
||||
}
|
||||
|
||||
/* This function must be invoked only from initcalls/module init functions */
|
||||
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
|
||||
const struct btf_kfunc_id_set *kset)
|
||||
u32 *btf_kfunc_is_modify_return(const struct btf *btf, u32 kfunc_btf_id)
|
||||
{
|
||||
return __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_FMODRET, kfunc_btf_id);
|
||||
}
|
||||
|
||||
static int __register_btf_kfunc_id_set(enum btf_kfunc_hook hook,
|
||||
const struct btf_kfunc_id_set *kset)
|
||||
{
|
||||
enum btf_kfunc_hook hook;
|
||||
struct btf *btf;
|
||||
int ret;
|
||||
|
||||
@ -7629,13 +7635,29 @@ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
|
||||
if (IS_ERR(btf))
|
||||
return PTR_ERR(btf);
|
||||
|
||||
hook = bpf_prog_type_to_kfunc_hook(prog_type);
|
||||
ret = btf_populate_kfunc_set(btf, hook, kset->set);
|
||||
btf_put(btf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* This function must be invoked only from initcalls/module init functions */
|
||||
int register_btf_kfunc_id_set(enum bpf_prog_type prog_type,
|
||||
const struct btf_kfunc_id_set *kset)
|
||||
{
|
||||
enum btf_kfunc_hook hook;
|
||||
|
||||
hook = bpf_prog_type_to_kfunc_hook(prog_type);
|
||||
return __register_btf_kfunc_id_set(hook, kset);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_btf_kfunc_id_set);
|
||||
|
||||
/* This function must be invoked only from initcalls/module init functions */
|
||||
int register_btf_fmodret_id_set(const struct btf_kfunc_id_set *kset)
|
||||
{
|
||||
return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set);
|
||||
|
||||
s32 btf_find_dtor_kfunc(struct btf *btf, u32 btf_id)
|
||||
{
|
||||
struct btf_id_dtor_kfunc_tab *tab = btf->dtor_kfunc_tab;
|
||||
|
@ -1404,7 +1404,7 @@ static const struct bpf_func_proto bpf_kptr_xchg_proto = {
|
||||
#define DYNPTR_SIZE_MASK 0xFFFFFF
|
||||
#define DYNPTR_RDONLY_BIT BIT(31)
|
||||
|
||||
static bool bpf_dynptr_is_rdonly(struct bpf_dynptr_kern *ptr)
|
||||
static bool bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr)
|
||||
{
|
||||
return ptr->size & DYNPTR_RDONLY_BIT;
|
||||
}
|
||||
@ -1414,7 +1414,7 @@ static void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_typ
|
||||
ptr->size |= type << DYNPTR_TYPE_SHIFT;
|
||||
}
|
||||
|
||||
u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
|
||||
u32 bpf_dynptr_get_size(const struct bpf_dynptr_kern *ptr)
|
||||
{
|
||||
return ptr->size & DYNPTR_SIZE_MASK;
|
||||
}
|
||||
@ -1438,7 +1438,7 @@ void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
|
||||
memset(ptr, 0, sizeof(*ptr));
|
||||
}
|
||||
|
||||
static int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
|
||||
static int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
|
||||
{
|
||||
u32 size = bpf_dynptr_get_size(ptr);
|
||||
|
||||
@ -1483,7 +1483,7 @@ static const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
|
||||
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
|
||||
BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, const struct bpf_dynptr_kern *, src,
|
||||
u32, offset, u64, flags)
|
||||
{
|
||||
int err;
|
||||
@ -1495,7 +1495,11 @@ BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(dst, src->data + src->offset + offset, len);
|
||||
/* Source and destination may possibly overlap, hence use memmove to
|
||||
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
|
||||
* pointing to overlapping PTR_TO_MAP_VALUE regions.
|
||||
*/
|
||||
memmove(dst, src->data + src->offset + offset, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1506,12 +1510,12 @@ static const struct bpf_func_proto bpf_dynptr_read_proto = {
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
|
||||
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg3_type = ARG_PTR_TO_DYNPTR,
|
||||
.arg3_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
|
||||
.arg4_type = ARG_ANYTHING,
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
|
||||
BPF_CALL_5(bpf_dynptr_write, const struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
|
||||
u32, len, u64, flags)
|
||||
{
|
||||
int err;
|
||||
@ -1523,7 +1527,11 @@ BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
memcpy(dst->data + dst->offset + offset, src, len);
|
||||
/* Source and destination may possibly overlap, hence use memmove to
|
||||
* copy the data. E.g. bpf_dynptr_from_mem may create two dynptr
|
||||
* pointing to overlapping PTR_TO_MAP_VALUE regions.
|
||||
*/
|
||||
memmove(dst->data + dst->offset + offset, src, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1532,14 +1540,14 @@ static const struct bpf_func_proto bpf_dynptr_write_proto = {
|
||||
.func = bpf_dynptr_write,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_INTEGER,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
|
||||
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
|
||||
.arg5_type = ARG_ANYTHING,
|
||||
};
|
||||
|
||||
BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
|
||||
BPF_CALL_3(bpf_dynptr_data, const struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
|
||||
{
|
||||
int err;
|
||||
|
||||
@ -1560,7 +1568,7 @@ static const struct bpf_func_proto bpf_dynptr_data_proto = {
|
||||
.func = bpf_dynptr_data,
|
||||
.gpl_only = false,
|
||||
.ret_type = RET_PTR_TO_DYNPTR_MEM_OR_NULL,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR,
|
||||
.arg1_type = ARG_PTR_TO_DYNPTR | MEM_RDONLY,
|
||||
.arg2_type = ARG_ANYTHING,
|
||||
.arg3_type = ARG_CONST_ALLOC_SIZE_OR_ZERO,
|
||||
};
|
||||
@ -1833,8 +1841,59 @@ struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
|
||||
*/
|
||||
struct task_struct *bpf_task_acquire(struct task_struct *p)
|
||||
{
|
||||
refcount_inc(&p->rcu_users);
|
||||
return p;
|
||||
return get_task_struct(p);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_acquire_not_zero - Acquire a reference to a rcu task object. A task
|
||||
* acquired by this kfunc which is not stored in a map as a kptr, must be
|
||||
* released by calling bpf_task_release().
|
||||
* @p: The task on which a reference is being acquired.
|
||||
*/
|
||||
struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p)
|
||||
{
|
||||
/* For the time being this function returns NULL, as it's not currently
|
||||
* possible to safely acquire a reference to a task with RCU protection
|
||||
* using get_task_struct() and put_task_struct(). This is due to the
|
||||
* slightly odd mechanics of p->rcu_users, and how task RCU protection
|
||||
* works.
|
||||
*
|
||||
* A struct task_struct is refcounted by two different refcount_t
|
||||
* fields:
|
||||
*
|
||||
* 1. p->usage: The "true" refcount field which tracks a task's
|
||||
* lifetime. The task is freed as soon as this
|
||||
* refcount drops to 0.
|
||||
*
|
||||
* 2. p->rcu_users: An "RCU users" refcount field which is statically
|
||||
* initialized to 2, and is co-located in a union with
|
||||
* a struct rcu_head field (p->rcu). p->rcu_users
|
||||
* essentially encapsulates a single p->usage
|
||||
* refcount, and when p->rcu_users goes to 0, an RCU
|
||||
* callback is scheduled on the struct rcu_head which
|
||||
* decrements the p->usage refcount.
|
||||
*
|
||||
* There are two important implications to this task refcounting logic
|
||||
* described above. The first is that
|
||||
* refcount_inc_not_zero(&p->rcu_users) cannot be used anywhere, as
|
||||
* after the refcount goes to 0, the RCU callback being scheduled will
|
||||
* cause the memory backing the refcount to again be nonzero due to the
|
||||
* fields sharing a union. The other is that we can't rely on RCU to
|
||||
* guarantee that a task is valid in a BPF program. This is because a
|
||||
* task could have already transitioned to being in the TASK_DEAD
|
||||
* state, had its rcu_users refcount go to 0, and its rcu callback
|
||||
* invoked in which it drops its single p->usage reference. At this
|
||||
* point the task will be freed as soon as the last p->usage reference
|
||||
* goes to 0, without waiting for another RCU gp to elapse. The only
|
||||
* way that a BPF program can guarantee that a task is valid is in this
|
||||
* scenario is to hold a p->usage refcount itself.
|
||||
*
|
||||
* Until we're able to resolve this issue, either by pulling
|
||||
* p->rcu_users and p->rcu out of the union, or by getting rid of
|
||||
* p->usage and just using p->rcu_users for refcounting, we'll just
|
||||
* return NULL here.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1845,33 +1904,15 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
|
||||
*/
|
||||
struct task_struct *bpf_task_kptr_get(struct task_struct **pp)
|
||||
{
|
||||
struct task_struct *p;
|
||||
|
||||
rcu_read_lock();
|
||||
p = READ_ONCE(*pp);
|
||||
|
||||
/* Another context could remove the task from the map and release it at
|
||||
* any time, including after we've done the lookup above. This is safe
|
||||
* because we're in an RCU read region, so the task is guaranteed to
|
||||
* remain valid until at least the rcu_read_unlock() below.
|
||||
/* We must return NULL here until we have clarity on how to properly
|
||||
* leverage RCU for ensuring a task's lifetime. See the comment above
|
||||
* in bpf_task_acquire_not_zero() for more details.
|
||||
*/
|
||||
if (p && !refcount_inc_not_zero(&p->rcu_users))
|
||||
/* If the task had been removed from the map and freed as
|
||||
* described above, refcount_inc_not_zero() will return false.
|
||||
* The task will be freed at some point after the current RCU
|
||||
* gp has ended, so just return NULL to the user.
|
||||
*/
|
||||
p = NULL;
|
||||
rcu_read_unlock();
|
||||
|
||||
return p;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_task_release - Release the reference acquired on a struct task_struct *.
|
||||
* If this kfunc is invoked in an RCU read region, the task_struct is
|
||||
* guaranteed to not be freed until the current grace period has ended, even if
|
||||
* its refcount drops to 0.
|
||||
* bpf_task_release - Release the reference acquired on a task.
|
||||
* @p: The task on which a reference is being released.
|
||||
*/
|
||||
void bpf_task_release(struct task_struct *p)
|
||||
@ -1879,7 +1920,7 @@ void bpf_task_release(struct task_struct *p)
|
||||
if (!p)
|
||||
return;
|
||||
|
||||
put_task_struct_rcu_user(p);
|
||||
put_task_struct(p);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_CGROUPS
|
||||
@ -1927,7 +1968,7 @@ struct cgroup *bpf_cgroup_kptr_get(struct cgroup **cgrpp)
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cgroup_release - Release the reference acquired on a struct cgroup *.
|
||||
* bpf_cgroup_release - Release the reference acquired on a cgroup.
|
||||
* If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to
|
||||
* not be freed until the current grace period has ended, even if its refcount
|
||||
* drops to 0.
|
||||
@ -2013,6 +2054,7 @@ BTF_ID_FLAGS(func, bpf_list_push_back)
|
||||
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_task_acquire_not_zero, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
|
||||
#ifdef CONFIG_CGROUPS
|
||||
|
@ -171,9 +171,24 @@ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node)
|
||||
memcg = get_memcg(c);
|
||||
old_memcg = set_active_memcg(memcg);
|
||||
for (i = 0; i < cnt; i++) {
|
||||
obj = __alloc(c, node);
|
||||
if (!obj)
|
||||
break;
|
||||
/*
|
||||
* free_by_rcu is only manipulated by irq work refill_work().
|
||||
* IRQ works on the same CPU are called sequentially, so it is
|
||||
* safe to use __llist_del_first() here. If alloc_bulk() is
|
||||
* invoked by the initial prefill, there will be no running
|
||||
* refill_work(), so __llist_del_first() is fine as well.
|
||||
*
|
||||
* In most cases, objects on free_by_rcu are from the same CPU.
|
||||
* If some objects come from other CPUs, it doesn't incur any
|
||||
* harm because NUMA_NO_NODE means the preference for current
|
||||
* numa node and it is not a guarantee.
|
||||
*/
|
||||
obj = __llist_del_first(&c->free_by_rcu);
|
||||
if (!obj) {
|
||||
obj = __alloc(c, node);
|
||||
if (!obj)
|
||||
break;
|
||||
}
|
||||
if (IS_ENABLED(CONFIG_PREEMPT_RT))
|
||||
/* In RT irq_work runs in per-cpu kthread, so disable
|
||||
* interrupts to avoid preemption and interrupts and
|
||||
@ -449,9 +464,17 @@ static void free_mem_alloc(struct bpf_mem_alloc *ma)
|
||||
{
|
||||
/* waiting_for_gp lists was drained, but __free_rcu might
|
||||
* still execute. Wait for it now before we freeing percpu caches.
|
||||
*
|
||||
* rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(),
|
||||
* but rcu_barrier_tasks_trace() and rcu_barrier() below are only used
|
||||
* to wait for the pending __free_rcu_tasks_trace() and __free_rcu(),
|
||||
* so if call_rcu(head, __free_rcu) is skipped due to
|
||||
* rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by
|
||||
* using rcu_trace_implies_rcu_gp() as well.
|
||||
*/
|
||||
rcu_barrier_tasks_trace();
|
||||
rcu_barrier();
|
||||
if (!rcu_trace_implies_rcu_gp())
|
||||
rcu_barrier();
|
||||
free_mem_alloc_no_barrier(ma);
|
||||
}
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -489,7 +489,6 @@ int noinline bpf_fentry_test1(int a)
|
||||
return a + 1;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_fentry_test1);
|
||||
ALLOW_ERROR_INJECTION(bpf_fentry_test1, ERRNO);
|
||||
|
||||
int noinline bpf_fentry_test2(int a, u64 b)
|
||||
{
|
||||
@ -733,7 +732,15 @@ noinline void bpf_kfunc_call_test_destructive(void)
|
||||
|
||||
__diag_pop();
|
||||
|
||||
ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
|
||||
BTF_SET8_START(bpf_test_modify_return_ids)
|
||||
BTF_ID_FLAGS(func, bpf_modify_return_test)
|
||||
BTF_ID_FLAGS(func, bpf_fentry_test1, KF_SLEEPABLE)
|
||||
BTF_SET8_END(bpf_test_modify_return_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set bpf_test_modify_return_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &bpf_test_modify_return_ids,
|
||||
};
|
||||
|
||||
BTF_SET8_START(test_sk_check_kfunc_ids)
|
||||
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
|
||||
@ -1666,7 +1673,8 @@ static int __init bpf_prog_test_run_init(void)
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
|
||||
ret = register_btf_fmodret_id_set(&bpf_test_modify_return_set);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set);
|
||||
return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
|
||||
|
@ -310,7 +310,6 @@ bpf_sk_storage_ptr(void *owner)
|
||||
return &sk->sk_bpf_storage;
|
||||
}
|
||||
|
||||
BTF_ID_LIST_SINGLE(sk_storage_map_btf_ids, struct, bpf_local_storage_map)
|
||||
const struct bpf_map_ops sk_storage_map_ops = {
|
||||
.map_meta_equal = bpf_map_meta_equal,
|
||||
.map_alloc_check = bpf_local_storage_map_alloc_check,
|
||||
@ -321,7 +320,7 @@ const struct bpf_map_ops sk_storage_map_ops = {
|
||||
.map_update_elem = bpf_fd_sk_storage_update_elem,
|
||||
.map_delete_elem = bpf_fd_sk_storage_delete_elem,
|
||||
.map_check_btf = bpf_local_storage_map_check_btf,
|
||||
.map_btf_id = &sk_storage_map_btf_ids[0],
|
||||
.map_btf_id = &bpf_local_storage_map_btf_id[0],
|
||||
.map_local_storage_charge = bpf_sk_storage_charge,
|
||||
.map_local_storage_uncharge = bpf_sk_storage_uncharge,
|
||||
.map_owner_storage_ptr = bpf_sk_storage_ptr,
|
||||
|
@ -316,6 +316,8 @@ void metadata_dst_free(struct metadata_dst *md_dst)
|
||||
if (md_dst->type == METADATA_IP_TUNNEL)
|
||||
dst_cache_destroy(&md_dst->u.tun_info.dst_cache);
|
||||
#endif
|
||||
if (md_dst->type == METADATA_XFRM)
|
||||
dst_release(md_dst->u.xfrm_info.dst_orig);
|
||||
kfree(md_dst);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(metadata_dst_free);
|
||||
@ -340,16 +342,18 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu);
|
||||
|
||||
void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst)
|
||||
{
|
||||
#ifdef CONFIG_DST_CACHE
|
||||
int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu);
|
||||
|
||||
#ifdef CONFIG_DST_CACHE
|
||||
if (one_md_dst->type == METADATA_IP_TUNNEL)
|
||||
dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache);
|
||||
}
|
||||
#endif
|
||||
if (one_md_dst->type == METADATA_XFRM)
|
||||
dst_release(one_md_dst->u.xfrm_info.dst_orig);
|
||||
}
|
||||
free_percpu(md_dst);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(metadata_dst_free_percpu);
|
||||
|
@ -80,6 +80,7 @@
|
||||
#include <net/tls.h>
|
||||
#include <net/xdp.h>
|
||||
#include <net/mptcp.h>
|
||||
#include <net/netfilter/nf_conntrack_bpf.h>
|
||||
|
||||
static const struct bpf_func_proto *
|
||||
bpf_sk_base_func_proto(enum bpf_func_id func_id);
|
||||
@ -5630,6 +5631,15 @@ static const struct bpf_func_proto bpf_bind_proto = {
|
||||
};
|
||||
|
||||
#ifdef CONFIG_XFRM
|
||||
|
||||
#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
|
||||
(IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
|
||||
|
||||
struct metadata_dst __percpu *xfrm_bpf_md_dst;
|
||||
EXPORT_SYMBOL_GPL(xfrm_bpf_md_dst);
|
||||
|
||||
#endif
|
||||
|
||||
BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
|
||||
struct bpf_xfrm_state *, to, u32, size, u64, flags)
|
||||
{
|
||||
@ -7992,6 +8002,19 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
|
||||
default:
|
||||
return bpf_sk_base_func_proto(func_id);
|
||||
}
|
||||
|
||||
#if IS_MODULE(CONFIG_NF_CONNTRACK) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)
|
||||
/* The nf_conn___init type is used in the NF_CONNTRACK kfuncs. The
|
||||
* kfuncs are defined in two different modules, and we want to be able
|
||||
* to use them interchangably with the same BTF type ID. Because modules
|
||||
* can't de-duplicate BTF IDs between each other, we need the type to be
|
||||
* referenced in the vmlinux BTF or the verifier will get confused about
|
||||
* the different types. So we add this dummy type reference which will
|
||||
* be included in vmlinux BTF, allowing both modules to refer to the
|
||||
* same type ID.
|
||||
*/
|
||||
BTF_TYPE_EMIT(struct nf_conn___init);
|
||||
#endif
|
||||
}
|
||||
|
||||
const struct bpf_func_proto bpf_sock_map_update_proto __weak;
|
||||
|
@ -886,13 +886,16 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
ret = sk_psock_map_verd(ret, msg->sk_redir);
|
||||
psock->apply_bytes = msg->apply_bytes;
|
||||
if (ret == __SK_REDIRECT) {
|
||||
if (psock->sk_redir)
|
||||
if (psock->sk_redir) {
|
||||
sock_put(psock->sk_redir);
|
||||
psock->sk_redir = msg->sk_redir;
|
||||
if (!psock->sk_redir) {
|
||||
psock->sk_redir = NULL;
|
||||
}
|
||||
if (!msg->sk_redir) {
|
||||
ret = __SK_DROP;
|
||||
goto out;
|
||||
}
|
||||
psock->redir_ingress = sk_msg_to_ingress(msg);
|
||||
psock->sk_redir = msg->sk_redir;
|
||||
sock_hold(psock->sk_redir);
|
||||
}
|
||||
out:
|
||||
|
@ -349,11 +349,13 @@ static void sock_map_free(struct bpf_map *map)
|
||||
|
||||
sk = xchg(psk, NULL);
|
||||
if (sk) {
|
||||
sock_hold(sk);
|
||||
lock_sock(sk);
|
||||
rcu_read_lock();
|
||||
sock_map_unref(sk, psk);
|
||||
rcu_read_unlock();
|
||||
release_sock(sk);
|
||||
sock_put(sk);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,8 +45,11 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
|
||||
tmp->sg.end = i;
|
||||
if (apply) {
|
||||
apply_bytes -= size;
|
||||
if (!apply_bytes)
|
||||
if (!apply_bytes) {
|
||||
if (sge->length)
|
||||
sk_msg_iter_var_prev(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (i != msg->sg.end);
|
||||
|
||||
@ -131,10 +134,9 @@ static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
|
||||
u32 bytes, int flags)
|
||||
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
|
||||
struct sk_msg *msg, u32 bytes, int flags)
|
||||
{
|
||||
bool ingress = sk_msg_to_ingress(msg);
|
||||
struct sk_psock *psock = sk_psock_get(sk);
|
||||
int ret;
|
||||
|
||||
@ -276,10 +278,10 @@ msg_bytes_ready:
|
||||
static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
|
||||
struct sk_msg *msg, int *copied, int flags)
|
||||
{
|
||||
bool cork = false, enospc = sk_msg_full(msg);
|
||||
bool cork = false, enospc = sk_msg_full(msg), redir_ingress;
|
||||
struct sock *sk_redir;
|
||||
u32 tosend, origsize, sent, delta = 0;
|
||||
u32 eval = __SK_NONE;
|
||||
u32 eval;
|
||||
int ret;
|
||||
|
||||
more_data:
|
||||
@ -310,6 +312,7 @@ more_data:
|
||||
tosend = msg->sg.size;
|
||||
if (psock->apply_bytes && psock->apply_bytes < tosend)
|
||||
tosend = psock->apply_bytes;
|
||||
eval = __SK_NONE;
|
||||
|
||||
switch (psock->eval) {
|
||||
case __SK_PASS:
|
||||
@ -321,6 +324,7 @@ more_data:
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
break;
|
||||
case __SK_REDIRECT:
|
||||
redir_ingress = psock->redir_ingress;
|
||||
sk_redir = psock->sk_redir;
|
||||
sk_msg_apply_bytes(psock, tosend);
|
||||
if (!psock->apply_bytes) {
|
||||
@ -337,7 +341,8 @@ more_data:
|
||||
release_sock(sk);
|
||||
|
||||
origsize = msg->sg.size;
|
||||
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
|
||||
ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
|
||||
msg, tosend, flags);
|
||||
sent = origsize - msg->sg.size;
|
||||
|
||||
if (eval == __SK_REDIRECT)
|
||||
|
@ -792,7 +792,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
|
||||
struct sk_psock *psock;
|
||||
struct sock *sk_redir;
|
||||
struct tls_rec *rec;
|
||||
bool enospc, policy;
|
||||
bool enospc, policy, redir_ingress;
|
||||
int err = 0, send;
|
||||
u32 delta = 0;
|
||||
|
||||
@ -837,6 +837,7 @@ more_data:
|
||||
}
|
||||
break;
|
||||
case __SK_REDIRECT:
|
||||
redir_ingress = psock->redir_ingress;
|
||||
sk_redir = psock->sk_redir;
|
||||
memcpy(&msg_redir, msg, sizeof(*msg));
|
||||
if (msg->apply_bytes < send)
|
||||
@ -846,7 +847,8 @@ more_data:
|
||||
sk_msg_return_zero(sk, msg, send);
|
||||
msg->sg.size -= send;
|
||||
release_sock(sk);
|
||||
err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
|
||||
err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
|
||||
&msg_redir, send, flags);
|
||||
lock_sock(sk);
|
||||
if (err < 0) {
|
||||
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
|
||||
|
@ -3,6 +3,14 @@
|
||||
# Makefile for the XFRM subsystem.
|
||||
#
|
||||
|
||||
xfrm_interface-$(CONFIG_XFRM_INTERFACE) += xfrm_interface_core.o
|
||||
|
||||
ifeq ($(CONFIG_XFRM_INTERFACE),m)
|
||||
xfrm_interface-$(CONFIG_DEBUG_INFO_BTF_MODULES) += xfrm_interface_bpf.o
|
||||
else ifeq ($(CONFIG_XFRM_INTERFACE),y)
|
||||
xfrm_interface-$(CONFIG_DEBUG_INFO_BTF) += xfrm_interface_bpf.o
|
||||
endif
|
||||
|
||||
obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \
|
||||
xfrm_input.o xfrm_output.o \
|
||||
xfrm_sysctl.o xfrm_replay.o xfrm_device.o
|
||||
|
115
net/xfrm/xfrm_interface_bpf.c
Normal file
115
net/xfrm/xfrm_interface_bpf.c
Normal file
@ -0,0 +1,115 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Unstable XFRM Helpers for TC-BPF hook
|
||||
*
|
||||
* These are called from SCHED_CLS BPF programs. Note that it is
|
||||
* allowed to break compatibility for these functions since the interface they
|
||||
* are exposed through to BPF programs is explicitly unstable.
|
||||
*/
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
|
||||
#include <net/dst_metadata.h>
|
||||
#include <net/xfrm.h>
|
||||
|
||||
/* bpf_xfrm_info - XFRM metadata information
|
||||
*
|
||||
* Members:
|
||||
* @if_id - XFRM if_id:
|
||||
* Transmit: if_id to be used in policy and state lookups
|
||||
* Receive: if_id of the state matched for the incoming packet
|
||||
* @link - Underlying device ifindex:
|
||||
* Transmit: used as the underlying device in VRF routing
|
||||
* Receive: the device on which the packet had been received
|
||||
*/
|
||||
struct bpf_xfrm_info {
|
||||
u32 if_id;
|
||||
int link;
|
||||
};
|
||||
|
||||
__diag_push();
|
||||
__diag_ignore_all("-Wmissing-prototypes",
|
||||
"Global functions as their definitions will be in xfrm_interface BTF");
|
||||
|
||||
/* bpf_skb_get_xfrm_info - Get XFRM metadata
|
||||
*
|
||||
* Parameters:
|
||||
* @skb_ctx - Pointer to ctx (__sk_buff) in TC program
|
||||
* Cannot be NULL
|
||||
* @to - Pointer to memory to which the metadata will be copied
|
||||
* Cannot be NULL
|
||||
*/
|
||||
__used noinline
|
||||
int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
|
||||
struct xfrm_md_info *info;
|
||||
|
||||
info = skb_xfrm_md_info(skb);
|
||||
if (!info)
|
||||
return -EINVAL;
|
||||
|
||||
to->if_id = info->if_id;
|
||||
to->link = info->link;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* bpf_skb_get_xfrm_info - Set XFRM metadata
|
||||
*
|
||||
* Parameters:
|
||||
* @skb_ctx - Pointer to ctx (__sk_buff) in TC program
|
||||
* Cannot be NULL
|
||||
* @from - Pointer to memory from which the metadata will be copied
|
||||
* Cannot be NULL
|
||||
*/
|
||||
__used noinline
|
||||
int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
|
||||
const struct bpf_xfrm_info *from)
|
||||
{
|
||||
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
|
||||
struct metadata_dst *md_dst;
|
||||
struct xfrm_md_info *info;
|
||||
|
||||
if (unlikely(skb_metadata_dst(skb)))
|
||||
return -EINVAL;
|
||||
|
||||
if (!xfrm_bpf_md_dst) {
|
||||
struct metadata_dst __percpu *tmp;
|
||||
|
||||
tmp = metadata_dst_alloc_percpu(0, METADATA_XFRM, GFP_ATOMIC);
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
if (cmpxchg(&xfrm_bpf_md_dst, NULL, tmp))
|
||||
metadata_dst_free_percpu(tmp);
|
||||
}
|
||||
md_dst = this_cpu_ptr(xfrm_bpf_md_dst);
|
||||
|
||||
info = &md_dst->u.xfrm_info;
|
||||
|
||||
info->if_id = from->if_id;
|
||||
info->link = from->link;
|
||||
skb_dst_force(skb);
|
||||
info->dst_orig = skb_dst(skb);
|
||||
|
||||
dst_hold((struct dst_entry *)md_dst);
|
||||
skb_dst_set(skb, (struct dst_entry *)md_dst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
__diag_pop()
|
||||
|
||||
BTF_SET8_START(xfrm_ifc_kfunc_set)
|
||||
BTF_ID_FLAGS(func, bpf_skb_get_xfrm_info)
|
||||
BTF_ID_FLAGS(func, bpf_skb_set_xfrm_info)
|
||||
BTF_SET8_END(xfrm_ifc_kfunc_set)
|
||||
|
||||
static const struct btf_kfunc_id_set xfrm_interface_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &xfrm_ifc_kfunc_set,
|
||||
};
|
||||
|
||||
int __init register_xfrm_interface_bpf(void)
|
||||
{
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS,
|
||||
&xfrm_interface_kfunc_set);
|
||||
}
|
@ -396,6 +396,14 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
|
||||
|
||||
if_id = md_info->if_id;
|
||||
fl->flowi_oif = md_info->link;
|
||||
if (md_info->dst_orig) {
|
||||
struct dst_entry *tmp_dst = dst;
|
||||
|
||||
dst = md_info->dst_orig;
|
||||
skb_dst_set(skb, dst);
|
||||
md_info->dst_orig = NULL;
|
||||
dst_release(tmp_dst);
|
||||
}
|
||||
} else {
|
||||
if_id = xi->p.if_id;
|
||||
}
|
||||
@ -1162,12 +1170,18 @@ static int __init xfrmi_init(void)
|
||||
if (err < 0)
|
||||
goto rtnl_link_failed;
|
||||
|
||||
err = register_xfrm_interface_bpf();
|
||||
if (err < 0)
|
||||
goto kfunc_failed;
|
||||
|
||||
lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM);
|
||||
|
||||
xfrm_if_register_cb(&xfrm_if_cb);
|
||||
|
||||
return err;
|
||||
|
||||
kfunc_failed:
|
||||
rtnl_link_unregister(&xfrmi_link_ops);
|
||||
rtnl_link_failed:
|
||||
xfrmi6_fini();
|
||||
xfrmi6_failed:
|
@ -752,6 +752,7 @@ class PrinterHelpers(Printer):
|
||||
'struct bpf_timer',
|
||||
'struct mptcp_sock',
|
||||
'struct bpf_dynptr',
|
||||
'const struct bpf_dynptr',
|
||||
'struct iphdr',
|
||||
'struct ipv6hdr',
|
||||
}
|
||||
|
@ -501,6 +501,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb,
|
||||
if (err) {
|
||||
p_err("failed to append entry to hashmap for ID %u, path '%s': %s",
|
||||
pinned_info.id, path, strerror(errno));
|
||||
free(path);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
|
@ -5293,7 +5293,7 @@ union bpf_attr {
|
||||
* Return
|
||||
* Nothing. Always succeeds.
|
||||
*
|
||||
* long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
|
||||
* long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
|
||||
* Description
|
||||
* Read *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *src*.
|
||||
@ -5303,7 +5303,7 @@ union bpf_attr {
|
||||
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
|
||||
* *flags* is not 0.
|
||||
*
|
||||
* long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
|
||||
* long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
|
||||
* Description
|
||||
* Write *len* bytes from *src* into *dst*, starting from *offset*
|
||||
* into *dst*.
|
||||
@ -5313,7 +5313,7 @@ union bpf_attr {
|
||||
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
|
||||
* is a read-only dynptr or if *flags* is not 0.
|
||||
*
|
||||
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
|
||||
* void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
|
||||
* Description
|
||||
* Get a pointer to the underlying dynptr data.
|
||||
*
|
||||
@ -5414,7 +5414,7 @@ union bpf_attr {
|
||||
* Drain samples from the specified user ring buffer, and invoke
|
||||
* the provided callback for each such sample:
|
||||
*
|
||||
* long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
|
||||
* long (\*callback_fn)(const struct bpf_dynptr \*dynptr, void \*ctx);
|
||||
*
|
||||
* If **callback_fn** returns 0, the helper will continue to try
|
||||
* and drain the next sample, up to a maximum of
|
||||
|
@ -673,6 +673,7 @@ enum {
|
||||
IFLA_XFRM_UNSPEC,
|
||||
IFLA_XFRM_LINK,
|
||||
IFLA_XFRM_IF_ID,
|
||||
IFLA_XFRM_COLLECT_METADATA,
|
||||
__IFLA_XFRM_MAX
|
||||
};
|
||||
|
||||
|
@ -286,3 +286,20 @@ tags:
|
||||
|
||||
# Delete partially updated (corrupted) files on error
|
||||
.DELETE_ON_ERROR:
|
||||
|
||||
help:
|
||||
@echo 'libbpf common targets:'
|
||||
@echo ' HINT: use "V=1" to enable verbose build'
|
||||
@echo ' all - build libraries and pkgconfig'
|
||||
@echo ' clean - remove all generated files'
|
||||
@echo ' check - check abi and version info'
|
||||
@echo ''
|
||||
@echo 'libbpf install targets:'
|
||||
@echo ' HINT: use "prefix"(defaults to "/usr/local") or "DESTDIR" (defaults to "/")'
|
||||
@echo ' to adjust target desitantion, e.g. "make prefix=/usr/local install"'
|
||||
@echo ' install - build and install all headers, libraries and pkgconfig'
|
||||
@echo ' install_headers - install only headers to include/bpf'
|
||||
@echo ''
|
||||
@echo 'libbpf make targets:'
|
||||
@echo ' tags - use ctags to make tag information for source code browsing'
|
||||
@echo ' cscope - use cscope to make interactive source code browsing database'
|
||||
|
@ -409,8 +409,15 @@ LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
|
||||
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
|
||||
__u64 *probe_offset, __u64 *probe_addr);
|
||||
|
||||
#ifdef __cplusplus
|
||||
/* forward-declaring enums in C++ isn't compatible with pure C enums, so
|
||||
* instead define bpf_enable_stats() as accepting int as an input
|
||||
*/
|
||||
LIBBPF_API int bpf_enable_stats(int type);
|
||||
#else
|
||||
enum bpf_stats_type; /* defined in up-to-date linux/bpf.h */
|
||||
LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
|
||||
#endif
|
||||
|
||||
struct bpf_prog_bind_opts {
|
||||
size_t sz; /* size of this struct for forward/backward compatibility */
|
||||
|
@ -1233,6 +1233,14 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
|
||||
if (reg_off < 0)
|
||||
return reg_off;
|
||||
arg->reg_off = reg_off;
|
||||
} else if (sscanf(arg_str, " %d @ ( %%%15[^)] ) %n", &arg_sz, reg_name, &len) == 2) {
|
||||
/* Memory dereference case without offset, e.g., 8@(%rsp) */
|
||||
arg->arg_type = USDT_ARG_REG_DEREF;
|
||||
arg->val_off = 0;
|
||||
reg_off = calc_pt_regs_off(reg_name);
|
||||
if (reg_off < 0)
|
||||
return reg_off;
|
||||
arg->reg_off = reg_off;
|
||||
} else if (sscanf(arg_str, " %d @ %%%15s %n", &arg_sz, reg_name, &len) == 2) {
|
||||
/* Register read case, e.g., -4@%eax */
|
||||
arg->arg_type = USDT_ARG_REG;
|
||||
|
@ -28,6 +28,7 @@ kfree_skb # attach fentry unexpected erro
|
||||
kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF
|
||||
kfunc_call/subprog_lskel # skel unexpected error: -2
|
||||
kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22
|
||||
kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
|
||||
kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95
|
||||
kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95
|
||||
kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95
|
||||
|
@ -29,6 +29,7 @@ htab_update # failed to attach: ERROR: strerror_r(-
|
||||
kfree_skb # attach fentry unexpected error: -524 (trampoline)
|
||||
kfunc_call # 'bpf_prog_active': not found in kernel BTF (?)
|
||||
kfunc_dynptr_param # JIT does not support calling kernel function (kfunc)
|
||||
kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95
|
||||
kprobe_multi_test # relies on fentry
|
||||
ksyms_module # test_ksyms_module__open_and_load unexpected error: -9 (?)
|
||||
ksyms_module_libbpf # JIT does not support calling kernel function (kfunc)
|
||||
@ -84,3 +85,4 @@ xdp_bonding # failed to auto-attach program 'trace_
|
||||
xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
|
||||
xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22)
|
||||
xdp_synproxy # JIT does not support calling kernel function (kfunc)
|
||||
xfrm_info # JIT does not support calling kernel function (kfunc)
|
||||
|
@ -527,13 +527,15 @@ TRUNNER_BPF_PROGS_DIR := progs
|
||||
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
|
||||
network_helpers.c testing_helpers.c \
|
||||
btf_helpers.c flow_dissector_load.h \
|
||||
cap_helpers.c
|
||||
cap_helpers.c test_loader.c
|
||||
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
|
||||
$(OUTPUT)/liburandom_read.so \
|
||||
$(OUTPUT)/xdp_synproxy \
|
||||
$(OUTPUT)/sign-file \
|
||||
ima_setup.sh verify_sig_setup.sh \
|
||||
$(wildcard progs/btf_dump_test_case_*.c)
|
||||
ima_setup.sh \
|
||||
verify_sig_setup.sh \
|
||||
$(wildcard progs/btf_dump_test_case_*.c) \
|
||||
$(wildcard progs/*.bpf.o)
|
||||
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
|
||||
TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
|
||||
$(eval $(call DEFINE_TEST_RUNNER,test_progs))
|
||||
|
@ -2,15 +2,22 @@
|
||||
#ifndef __BPF_LEGACY__
|
||||
#define __BPF_LEGACY__
|
||||
|
||||
#if __GNUC__ && !__clang__
|
||||
/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions. We
|
||||
* provide the "standard" names as synonyms of the corresponding GCC
|
||||
* builtins. Note how the SKB argument is ignored.
|
||||
*/
|
||||
#define load_byte(skb, off) __builtin_bpf_load_byte(off)
|
||||
#define load_half(skb, off) __builtin_bpf_load_half(off)
|
||||
#define load_word(skb, off) __builtin_bpf_load_word(off)
|
||||
#else
|
||||
/* llvm builtin functions that eBPF C program may use to
|
||||
* emit BPF_LD_ABS and BPF_LD_IND instructions
|
||||
*/
|
||||
unsigned long long load_byte(void *skb,
|
||||
unsigned long long off) asm("llvm.bpf.load.byte");
|
||||
unsigned long long load_half(void *skb,
|
||||
unsigned long long off) asm("llvm.bpf.load.half");
|
||||
unsigned long long load_word(void *skb,
|
||||
unsigned long long off) asm("llvm.bpf.load.word");
|
||||
unsigned long long load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte");
|
||||
unsigned long long load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half");
|
||||
unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word");
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -8,7 +8,7 @@ CONFIG_BPF_LIRC_MODE2=y
|
||||
CONFIG_BPF_LSM=y
|
||||
CONFIG_BPF_STREAM_PARSER=y
|
||||
CONFIG_BPF_SYSCALL=y
|
||||
CONFIG_BPF_UNPRIV_DEFAULT_OFF=n
|
||||
# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set
|
||||
CONFIG_CGROUP_BPF=y
|
||||
CONFIG_CRYPTO_HMAC=y
|
||||
CONFIG_CRYPTO_SHA256=y
|
||||
@ -23,6 +23,7 @@ CONFIG_IKCONFIG_PROC=y
|
||||
CONFIG_IMA=y
|
||||
CONFIG_IMA_READ_POLICY=y
|
||||
CONFIG_IMA_WRITE_POLICY=y
|
||||
CONFIG_INET_ESP=y
|
||||
CONFIG_IP_NF_FILTER=y
|
||||
CONFIG_IP_NF_RAW=y
|
||||
CONFIG_IP_NF_TARGET_SYNPROXY=y
|
||||
@ -70,7 +71,8 @@ CONFIG_NF_NAT=y
|
||||
CONFIG_RC_CORE=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITYFS=y
|
||||
CONFIG_TEST_BPF=y
|
||||
CONFIG_TEST_BPF=m
|
||||
CONFIG_USERFAULTFD=y
|
||||
CONFIG_VXLAN=y
|
||||
CONFIG_XDP_SOCKETS=y
|
||||
CONFIG_XFRM_INTERFACE=y
|
||||
|
@ -390,49 +390,6 @@ struct nstoken {
|
||||
int orig_netns_fd;
|
||||
};
|
||||
|
||||
static int setns_by_fd(int nsfd)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = setns(nsfd, CLONE_NEWNET);
|
||||
close(nsfd);
|
||||
|
||||
if (!ASSERT_OK(err, "setns"))
|
||||
return err;
|
||||
|
||||
/* Switch /sys to the new namespace so that e.g. /sys/class/net
|
||||
* reflects the devices in the new namespace.
|
||||
*/
|
||||
err = unshare(CLONE_NEWNS);
|
||||
if (!ASSERT_OK(err, "unshare"))
|
||||
return err;
|
||||
|
||||
/* Make our /sys mount private, so the following umount won't
|
||||
* trigger the global umount in case it's shared.
|
||||
*/
|
||||
err = mount("none", "/sys", NULL, MS_PRIVATE, NULL);
|
||||
if (!ASSERT_OK(err, "remount private /sys"))
|
||||
return err;
|
||||
|
||||
err = umount2("/sys", MNT_DETACH);
|
||||
if (!ASSERT_OK(err, "umount2 /sys"))
|
||||
return err;
|
||||
|
||||
err = mount("sysfs", "/sys", "sysfs", 0, NULL);
|
||||
if (!ASSERT_OK(err, "mount /sys"))
|
||||
return err;
|
||||
|
||||
err = mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL);
|
||||
if (!ASSERT_OK(err, "mount /sys/fs/bpf"))
|
||||
return err;
|
||||
|
||||
err = mount("debugfs", "/sys/kernel/debug", "debugfs", 0, NULL);
|
||||
if (!ASSERT_OK(err, "mount /sys/kernel/debug"))
|
||||
return err;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct nstoken *open_netns(const char *name)
|
||||
{
|
||||
int nsfd;
|
||||
@ -453,8 +410,9 @@ struct nstoken *open_netns(const char *name)
|
||||
if (!ASSERT_GE(nsfd, 0, "open netns fd"))
|
||||
goto fail;
|
||||
|
||||
err = setns_by_fd(nsfd);
|
||||
if (!ASSERT_OK(err, "setns_by_fd"))
|
||||
err = setns(nsfd, CLONE_NEWNET);
|
||||
close(nsfd);
|
||||
if (!ASSERT_OK(err, "setns"))
|
||||
goto fail;
|
||||
|
||||
return token;
|
||||
@ -465,6 +423,7 @@ fail:
|
||||
|
||||
void close_netns(struct nstoken *token)
|
||||
{
|
||||
ASSERT_OK(setns_by_fd(token->orig_netns_fd), "setns_by_fd");
|
||||
ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns");
|
||||
close(token->orig_netns_fd);
|
||||
free(token);
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <sched.h>
|
||||
#include <net/if.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
@ -20,10 +21,12 @@ static struct test_btf_skc_cls_ingress *skel;
|
||||
static struct sockaddr_in6 srv_sa6;
|
||||
static __u32 duration;
|
||||
|
||||
#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
|
||||
|
||||
static int prepare_netns(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
|
||||
LIBBPF_OPTS(bpf_tc_opts, tc_attach,
|
||||
.prog_fd = bpf_program__fd(skel->progs.cls_ingress));
|
||||
|
||||
if (CHECK(unshare(CLONE_NEWNET), "create netns",
|
||||
"unshare(CLONE_NEWNET): %s (%d)",
|
||||
strerror(errno), errno))
|
||||
@ -33,12 +36,12 @@ static int prepare_netns(void)
|
||||
"ip link set dev lo up", "failed\n"))
|
||||
return -1;
|
||||
|
||||
if (CHECK(system("tc qdisc add dev lo clsact"),
|
||||
"tc qdisc add dev lo clsact", "failed\n"))
|
||||
qdisc_lo.ifindex = if_nametoindex("lo");
|
||||
if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
|
||||
return -1;
|
||||
|
||||
if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
|
||||
"install tc cls-prog at ingress", "failed\n"))
|
||||
if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
|
||||
"filter add dev lo ingress"))
|
||||
return -1;
|
||||
|
||||
/* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
|
||||
@ -195,19 +198,12 @@ static struct test tests[] = {
|
||||
|
||||
void test_btf_skc_cls_ingress(void)
|
||||
{
|
||||
int i, err;
|
||||
int i;
|
||||
|
||||
skel = test_btf_skc_cls_ingress__open_and_load();
|
||||
if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
|
||||
return;
|
||||
|
||||
err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
|
||||
if (CHECK(err, "bpf_program__pin",
|
||||
"cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
|
||||
test_btf_skc_cls_ingress__destroy(skel);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(tests); i++) {
|
||||
if (!test__start_subtest(tests[i].desc))
|
||||
continue;
|
||||
@ -221,6 +217,5 @@ void test_btf_skc_cls_ingress(void)
|
||||
reset_test();
|
||||
}
|
||||
|
||||
bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
|
||||
test_btf_skc_cls_ingress__destroy(skel);
|
||||
}
|
||||
|
@ -10,7 +10,9 @@
|
||||
#include "cgrp_ls_recursion.skel.h"
|
||||
#include "cgrp_ls_attach_cgroup.skel.h"
|
||||
#include "cgrp_ls_negative.skel.h"
|
||||
#include "cgrp_ls_sleepable.skel.h"
|
||||
#include "network_helpers.h"
|
||||
#include "cgroup_helpers.h"
|
||||
|
||||
struct socket_cookie {
|
||||
__u64 cookie_key;
|
||||
@ -150,14 +152,100 @@ static void test_negative(void)
|
||||
}
|
||||
}
|
||||
|
||||
static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
|
||||
{
|
||||
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
|
||||
union bpf_iter_link_info linfo;
|
||||
struct cgrp_ls_sleepable *skel;
|
||||
struct bpf_link *link;
|
||||
int err, iter_fd;
|
||||
char buf[16];
|
||||
|
||||
skel = cgrp_ls_sleepable__open();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||
return;
|
||||
|
||||
bpf_program__set_autoload(skel->progs.cgroup_iter, true);
|
||||
err = cgrp_ls_sleepable__load(skel);
|
||||
if (!ASSERT_OK(err, "skel_load"))
|
||||
goto out;
|
||||
|
||||
memset(&linfo, 0, sizeof(linfo));
|
||||
linfo.cgroup.cgroup_fd = cgroup_fd;
|
||||
linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
|
||||
opts.link_info = &linfo;
|
||||
opts.link_info_len = sizeof(linfo);
|
||||
link = bpf_program__attach_iter(skel->progs.cgroup_iter, &opts);
|
||||
if (!ASSERT_OK_PTR(link, "attach_iter"))
|
||||
goto out;
|
||||
|
||||
iter_fd = bpf_iter_create(bpf_link__fd(link));
|
||||
if (!ASSERT_GE(iter_fd, 0, "iter_create"))
|
||||
goto out;
|
||||
|
||||
/* trigger the program run */
|
||||
(void)read(iter_fd, buf, sizeof(buf));
|
||||
|
||||
ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
|
||||
|
||||
close(iter_fd);
|
||||
out:
|
||||
cgrp_ls_sleepable__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_no_rcu_lock(__u64 cgroup_id)
|
||||
{
|
||||
struct cgrp_ls_sleepable *skel;
|
||||
int err;
|
||||
|
||||
skel = cgrp_ls_sleepable__open();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||
return;
|
||||
|
||||
skel->bss->target_pid = syscall(SYS_gettid);
|
||||
|
||||
bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
|
||||
err = cgrp_ls_sleepable__load(skel);
|
||||
if (!ASSERT_OK(err, "skel_load"))
|
||||
goto out;
|
||||
|
||||
err = cgrp_ls_sleepable__attach(skel);
|
||||
if (!ASSERT_OK(err, "skel_attach"))
|
||||
goto out;
|
||||
|
||||
syscall(SYS_getpgid);
|
||||
|
||||
ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
|
||||
out:
|
||||
cgrp_ls_sleepable__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_rcu_lock(void)
|
||||
{
|
||||
struct cgrp_ls_sleepable *skel;
|
||||
int err;
|
||||
|
||||
skel = cgrp_ls_sleepable__open();
|
||||
if (!ASSERT_OK_PTR(skel, "skel_open"))
|
||||
return;
|
||||
|
||||
bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
|
||||
err = cgrp_ls_sleepable__load(skel);
|
||||
ASSERT_ERR(err, "skel_load");
|
||||
|
||||
cgrp_ls_sleepable__destroy(skel);
|
||||
}
|
||||
|
||||
void test_cgrp_local_storage(void)
|
||||
{
|
||||
__u64 cgroup_id;
|
||||
int cgroup_fd;
|
||||
|
||||
cgroup_fd = test__join_cgroup("/cgrp_local_storage");
|
||||
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
|
||||
return;
|
||||
|
||||
cgroup_id = get_cgroup_id("/cgrp_local_storage");
|
||||
if (test__start_subtest("tp_btf"))
|
||||
test_tp_btf(cgroup_fd);
|
||||
if (test__start_subtest("attach_cgroup"))
|
||||
@ -166,6 +254,12 @@ void test_cgrp_local_storage(void)
|
||||
test_recursion(cgroup_fd);
|
||||
if (test__start_subtest("negative"))
|
||||
test_negative();
|
||||
if (test__start_subtest("cgroup_iter_sleepable"))
|
||||
test_cgroup_iter_sleepable(cgroup_fd, cgroup_id);
|
||||
if (test__start_subtest("no_rcu_lock"))
|
||||
test_no_rcu_lock(cgroup_id);
|
||||
if (test__start_subtest("rcu_lock"))
|
||||
test_rcu_lock();
|
||||
|
||||
close(cgroup_fd);
|
||||
}
|
||||
|
@ -5,86 +5,16 @@
|
||||
#include "dynptr_fail.skel.h"
|
||||
#include "dynptr_success.skel.h"
|
||||
|
||||
static size_t log_buf_sz = 1048576; /* 1 MB */
|
||||
static char obj_log_buf[1048576];
|
||||
|
||||
static struct {
|
||||
const char *prog_name;
|
||||
const char *expected_err_msg;
|
||||
} dynptr_tests[] = {
|
||||
/* failure cases */
|
||||
{"ringbuf_missing_release1", "Unreleased reference id=1"},
|
||||
{"ringbuf_missing_release2", "Unreleased reference id=2"},
|
||||
{"ringbuf_missing_release_callback", "Unreleased reference id"},
|
||||
{"use_after_invalid", "Expected an initialized dynptr as arg #3"},
|
||||
{"ringbuf_invalid_api", "type=mem expected=ringbuf_mem"},
|
||||
{"add_dynptr_to_map1", "invalid indirect read from stack"},
|
||||
{"add_dynptr_to_map2", "invalid indirect read from stack"},
|
||||
{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
|
||||
{"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"},
|
||||
{"data_slice_use_after_release1", "invalid mem access 'scalar'"},
|
||||
{"data_slice_use_after_release2", "invalid mem access 'scalar'"},
|
||||
{"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"},
|
||||
{"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"},
|
||||
{"invalid_helper1", "invalid indirect read from stack"},
|
||||
{"invalid_helper2", "Expected an initialized dynptr as arg #3"},
|
||||
{"invalid_write1", "Expected an initialized dynptr as arg #1"},
|
||||
{"invalid_write2", "Expected an initialized dynptr as arg #3"},
|
||||
{"invalid_write3", "Expected an initialized dynptr as arg #1"},
|
||||
{"invalid_write4", "arg 1 is an unacquired reference"},
|
||||
{"invalid_read1", "invalid read from stack"},
|
||||
{"invalid_read2", "cannot pass in dynptr at an offset"},
|
||||
{"invalid_read3", "invalid read from stack"},
|
||||
{"invalid_read4", "invalid read from stack"},
|
||||
{"invalid_offset", "invalid write to stack"},
|
||||
{"global", "type=map_value expected=fp"},
|
||||
{"release_twice", "arg 1 is an unacquired reference"},
|
||||
{"release_twice_callback", "arg 1 is an unacquired reference"},
|
||||
{"dynptr_from_mem_invalid_api",
|
||||
"Unsupported reg type fp for bpf_dynptr_from_mem data"},
|
||||
|
||||
/* success cases */
|
||||
{"test_read_write", NULL},
|
||||
{"test_data_slice", NULL},
|
||||
{"test_ringbuf", NULL},
|
||||
};
|
||||
|
||||
static void verify_fail(const char *prog_name, const char *expected_err_msg)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_object_open_opts, opts);
|
||||
struct bpf_program *prog;
|
||||
struct dynptr_fail *skel;
|
||||
int err;
|
||||
|
||||
opts.kernel_log_buf = obj_log_buf;
|
||||
opts.kernel_log_size = log_buf_sz;
|
||||
opts.kernel_log_level = 1;
|
||||
|
||||
skel = dynptr_fail__open_opts(&opts);
|
||||
if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts"))
|
||||
goto cleanup;
|
||||
|
||||
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
|
||||
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
|
||||
goto cleanup;
|
||||
|
||||
bpf_program__set_autoload(prog, true);
|
||||
|
||||
bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
|
||||
|
||||
err = dynptr_fail__load(skel);
|
||||
if (!ASSERT_ERR(err, "unexpected load success"))
|
||||
goto cleanup;
|
||||
|
||||
if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
|
||||
fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
|
||||
fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
dynptr_fail__destroy(skel);
|
||||
}
|
||||
|
||||
static void verify_success(const char *prog_name)
|
||||
{
|
||||
struct dynptr_success *skel;
|
||||
@ -97,8 +27,6 @@ static void verify_success(const char *prog_name)
|
||||
|
||||
skel->bss->pid = getpid();
|
||||
|
||||
bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
|
||||
|
||||
dynptr_success__load(skel);
|
||||
if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
|
||||
goto cleanup;
|
||||
@ -129,10 +57,8 @@ void test_dynptr(void)
|
||||
if (!test__start_subtest(dynptr_tests[i].prog_name))
|
||||
continue;
|
||||
|
||||
if (dynptr_tests[i].expected_err_msg)
|
||||
verify_fail(dynptr_tests[i].prog_name,
|
||||
dynptr_tests[i].expected_err_msg);
|
||||
else
|
||||
verify_success(dynptr_tests[i].prog_name);
|
||||
verify_success(dynptr_tests[i].prog_name);
|
||||
}
|
||||
|
||||
RUN_TESTS(dynptr_fail);
|
||||
}
|
||||
|
@ -9,7 +9,7 @@
|
||||
goto out; \
|
||||
})
|
||||
|
||||
void serial_test_empty_skb(void)
|
||||
void test_empty_skb(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, tattr);
|
||||
struct empty_skb *bpf_obj = NULL;
|
||||
|
@ -18,11 +18,8 @@ static struct {
|
||||
const char *expected_verifier_err_msg;
|
||||
int expected_runtime_err;
|
||||
} kfunc_dynptr_tests[] = {
|
||||
{"dynptr_type_not_supp",
|
||||
"arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0},
|
||||
{"not_valid_dynptr",
|
||||
"arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0},
|
||||
{"not_ptr_to_stack", "arg#0 expected pointer to stack", 0},
|
||||
{"not_valid_dynptr", "Expected an initialized dynptr as arg #1", 0},
|
||||
{"not_ptr_to_stack", "arg#0 expected pointer to stack or dynptr_ptr", 0},
|
||||
{"dynptr_data_null", NULL, -EBADMSG},
|
||||
};
|
||||
|
||||
|
@ -5,83 +5,6 @@
|
||||
#include "map_kptr.skel.h"
|
||||
#include "map_kptr_fail.skel.h"
|
||||
|
||||
static char log_buf[1024 * 1024];
|
||||
|
||||
struct {
|
||||
const char *prog_name;
|
||||
const char *err_msg;
|
||||
} map_kptr_fail_tests[] = {
|
||||
{ "size_not_bpf_dw", "kptr access size must be BPF_DW" },
|
||||
{ "non_const_var_off", "kptr access cannot have variable offset" },
|
||||
{ "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" },
|
||||
{ "misaligned_access_write", "kptr access misaligned expected=8 off=7" },
|
||||
{ "misaligned_access_read", "kptr access misaligned expected=8 off=1" },
|
||||
{ "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" },
|
||||
{ "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" },
|
||||
{ "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
|
||||
{ "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" },
|
||||
{ "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" },
|
||||
{ "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" },
|
||||
{ "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" },
|
||||
{ "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" },
|
||||
{ "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" },
|
||||
{ "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" },
|
||||
{ "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" },
|
||||
{ "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
|
||||
{ "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" },
|
||||
{ "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" },
|
||||
{ "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" },
|
||||
{ "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" },
|
||||
{ "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" },
|
||||
{ "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" },
|
||||
{ "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" },
|
||||
{ "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" },
|
||||
};
|
||||
|
||||
static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
|
||||
.kernel_log_size = sizeof(log_buf),
|
||||
.kernel_log_level = 1);
|
||||
struct map_kptr_fail *skel;
|
||||
struct bpf_program *prog;
|
||||
int ret;
|
||||
|
||||
skel = map_kptr_fail__open_opts(&opts);
|
||||
if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts"))
|
||||
return;
|
||||
|
||||
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
|
||||
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
|
||||
goto end;
|
||||
|
||||
bpf_program__set_autoload(prog, true);
|
||||
|
||||
ret = map_kptr_fail__load(skel);
|
||||
if (!ASSERT_ERR(ret, "map_kptr__load must fail"))
|
||||
goto end;
|
||||
|
||||
if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
|
||||
fprintf(stderr, "Expected: %s\n", err_msg);
|
||||
fprintf(stderr, "Verifier: %s\n", log_buf);
|
||||
}
|
||||
|
||||
end:
|
||||
map_kptr_fail__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_map_kptr_fail(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) {
|
||||
if (!test__start_subtest(map_kptr_fail_tests[i].prog_name))
|
||||
continue;
|
||||
test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name,
|
||||
map_kptr_fail_tests[i].err_msg);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_map_kptr_success(bool test_run)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_test_run_opts, opts,
|
||||
@ -145,5 +68,6 @@ void test_map_kptr(void)
|
||||
*/
|
||||
test_map_kptr_success(true);
|
||||
}
|
||||
test_map_kptr_fail();
|
||||
|
||||
RUN_TESTS(map_kptr_fail);
|
||||
}
|
||||
|
@ -103,6 +103,7 @@ static struct {
|
||||
{"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
|
||||
{"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
|
||||
{"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
|
||||
{"task_kfunc_from_lsm_task_free", "reg type unsupported for arg#0 function"},
|
||||
};
|
||||
|
||||
static void verify_fail(const char *prog_name, const char *expected_err_msg)
|
||||
|
@ -11,12 +11,12 @@
|
||||
*/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <linux/if.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/sysctl.h>
|
||||
#include <linux/time_types.h>
|
||||
#include <linux/net_tstamp.h>
|
||||
#include <net/if.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
@ -59,10 +59,6 @@
|
||||
#define IFADDR_STR_LEN 18
|
||||
#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
|
||||
|
||||
#define SRC_PROG_PIN_FILE "/sys/fs/bpf/test_tc_src"
|
||||
#define DST_PROG_PIN_FILE "/sys/fs/bpf/test_tc_dst"
|
||||
#define CHK_PROG_PIN_FILE "/sys/fs/bpf/test_tc_chk"
|
||||
|
||||
#define TIMEOUT_MILLIS 10000
|
||||
#define NSEC_PER_SEC 1000000000ULL
|
||||
|
||||
@ -115,7 +111,9 @@ static void netns_setup_namespaces_nofail(const char *verb)
|
||||
}
|
||||
|
||||
struct netns_setup_result {
|
||||
int ifindex_veth_src;
|
||||
int ifindex_veth_src_fwd;
|
||||
int ifindex_veth_dst;
|
||||
int ifindex_veth_dst_fwd;
|
||||
};
|
||||
|
||||
@ -139,27 +137,6 @@ static int get_ifaddr(const char *name, char *ifaddr)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int get_ifindex(const char *name)
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char buf[32];
|
||||
FILE *f;
|
||||
int ret;
|
||||
|
||||
snprintf(path, PATH_MAX, "/sys/class/net/%s/ifindex", name);
|
||||
f = fopen(path, "r");
|
||||
if (!ASSERT_OK_PTR(f, path))
|
||||
return -1;
|
||||
|
||||
ret = fread(buf, 1, sizeof(buf), f);
|
||||
if (!ASSERT_GT(ret, 0, "fread ifindex")) {
|
||||
fclose(f);
|
||||
return -1;
|
||||
}
|
||||
fclose(f);
|
||||
return atoi(buf);
|
||||
}
|
||||
|
||||
#define SYS(fmt, ...) \
|
||||
({ \
|
||||
char cmd[1024]; \
|
||||
@ -182,11 +159,20 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
|
||||
if (get_ifaddr("veth_src_fwd", veth_src_fwd_addr))
|
||||
goto fail;
|
||||
|
||||
result->ifindex_veth_src_fwd = get_ifindex("veth_src_fwd");
|
||||
if (result->ifindex_veth_src_fwd < 0)
|
||||
result->ifindex_veth_src = if_nametoindex("veth_src");
|
||||
if (!ASSERT_GT(result->ifindex_veth_src, 0, "ifindex_veth_src"))
|
||||
goto fail;
|
||||
result->ifindex_veth_dst_fwd = get_ifindex("veth_dst_fwd");
|
||||
if (result->ifindex_veth_dst_fwd < 0)
|
||||
|
||||
result->ifindex_veth_src_fwd = if_nametoindex("veth_src_fwd");
|
||||
if (!ASSERT_GT(result->ifindex_veth_src_fwd, 0, "ifindex_veth_src_fwd"))
|
||||
goto fail;
|
||||
|
||||
result->ifindex_veth_dst = if_nametoindex("veth_dst");
|
||||
if (!ASSERT_GT(result->ifindex_veth_dst, 0, "ifindex_veth_dst"))
|
||||
goto fail;
|
||||
|
||||
result->ifindex_veth_dst_fwd = if_nametoindex("veth_dst_fwd");
|
||||
if (!ASSERT_GT(result->ifindex_veth_dst_fwd, 0, "ifindex_veth_dst_fwd"))
|
||||
goto fail;
|
||||
|
||||
SYS("ip link set veth_src netns " NS_SRC);
|
||||
@ -260,19 +246,78 @@ fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int netns_load_bpf(void)
|
||||
static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
|
||||
{
|
||||
SYS("tc qdisc add dev veth_src_fwd clsact");
|
||||
SYS("tc filter add dev veth_src_fwd ingress bpf da object-pinned "
|
||||
SRC_PROG_PIN_FILE);
|
||||
SYS("tc filter add dev veth_src_fwd egress bpf da object-pinned "
|
||||
CHK_PROG_PIN_FILE);
|
||||
char err_str[128], ifname[16];
|
||||
int err;
|
||||
|
||||
SYS("tc qdisc add dev veth_dst_fwd clsact");
|
||||
SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
|
||||
DST_PROG_PIN_FILE);
|
||||
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
|
||||
CHK_PROG_PIN_FILE);
|
||||
qdisc_hook->ifindex = ifindex;
|
||||
qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
|
||||
err = bpf_tc_hook_create(qdisc_hook);
|
||||
snprintf(err_str, sizeof(err_str),
|
||||
"qdisc add dev %s clsact",
|
||||
if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
|
||||
err_str[sizeof(err_str) - 1] = 0;
|
||||
ASSERT_OK(err, err_str);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
|
||||
enum bpf_tc_attach_point xgress,
|
||||
const struct bpf_program *prog, int priority)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_opts, tc_attach);
|
||||
char err_str[128], ifname[16];
|
||||
int err;
|
||||
|
||||
qdisc_hook->attach_point = xgress;
|
||||
tc_attach.prog_fd = bpf_program__fd(prog);
|
||||
tc_attach.priority = priority;
|
||||
err = bpf_tc_attach(qdisc_hook, &tc_attach);
|
||||
snprintf(err_str, sizeof(err_str),
|
||||
"filter add dev %s %s prio %d bpf da %s",
|
||||
if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
|
||||
xgress == BPF_TC_INGRESS ? "ingress" : "egress",
|
||||
priority, bpf_program__name(prog));
|
||||
err_str[sizeof(err_str) - 1] = 0;
|
||||
ASSERT_OK(err, err_str);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
|
||||
if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
|
||||
goto fail; \
|
||||
})
|
||||
|
||||
#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
|
||||
if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
|
||||
goto fail; \
|
||||
})
|
||||
|
||||
static int netns_load_bpf(const struct bpf_program *src_prog,
|
||||
const struct bpf_program *dst_prog,
|
||||
const struct bpf_program *chk_prog,
|
||||
const struct netns_setup_result *setup_result)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
|
||||
int err;
|
||||
|
||||
/* tc qdisc add dev veth_src_fwd clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
|
||||
/* tc filter add dev veth_src_fwd ingress bpf da src_prog */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS, src_prog, 0);
|
||||
/* tc filter add dev veth_src_fwd egress bpf da chk_prog */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
|
||||
|
||||
/* tc qdisc add dev veth_dst_fwd clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
|
||||
/* tc filter add dev veth_dst_fwd ingress bpf da dst_prog */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
|
||||
/* tc filter add dev veth_dst_fwd egress bpf da chk_prog */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
@ -499,78 +544,79 @@ done:
|
||||
close(client_fd);
|
||||
}
|
||||
|
||||
static int netns_load_dtime_bpf(struct test_tc_dtime *skel)
|
||||
static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
|
||||
const struct netns_setup_result *setup_result)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src_fwd);
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_src);
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst);
|
||||
struct nstoken *nstoken;
|
||||
|
||||
#define PIN_FNAME(__file) "/sys/fs/bpf/" #__file
|
||||
#define PIN(__prog) ({ \
|
||||
int err = bpf_program__pin(skel->progs.__prog, PIN_FNAME(__prog)); \
|
||||
if (!ASSERT_OK(err, "pin " #__prog)) \
|
||||
goto fail; \
|
||||
})
|
||||
int err;
|
||||
|
||||
/* setup ns_src tc progs */
|
||||
nstoken = open_netns(NS_SRC);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
|
||||
return -1;
|
||||
PIN(egress_host);
|
||||
PIN(ingress_host);
|
||||
SYS("tc qdisc add dev veth_src clsact");
|
||||
SYS("tc filter add dev veth_src ingress bpf da object-pinned "
|
||||
PIN_FNAME(ingress_host));
|
||||
SYS("tc filter add dev veth_src egress bpf da object-pinned "
|
||||
PIN_FNAME(egress_host));
|
||||
/* tc qdisc add dev veth_src clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_veth_src, setup_result->ifindex_veth_src);
|
||||
/* tc filter add dev veth_src ingress bpf da ingress_host */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
|
||||
/* tc filter add dev veth_src egress bpf da egress_host */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
|
||||
close_netns(nstoken);
|
||||
|
||||
/* setup ns_dst tc progs */
|
||||
nstoken = open_netns(NS_DST);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
|
||||
return -1;
|
||||
PIN(egress_host);
|
||||
PIN(ingress_host);
|
||||
SYS("tc qdisc add dev veth_dst clsact");
|
||||
SYS("tc filter add dev veth_dst ingress bpf da object-pinned "
|
||||
PIN_FNAME(ingress_host));
|
||||
SYS("tc filter add dev veth_dst egress bpf da object-pinned "
|
||||
PIN_FNAME(egress_host));
|
||||
/* tc qdisc add dev veth_dst clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_veth_dst, setup_result->ifindex_veth_dst);
|
||||
/* tc filter add dev veth_dst ingress bpf da ingress_host */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
|
||||
/* tc filter add dev veth_dst egress bpf da egress_host */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
|
||||
close_netns(nstoken);
|
||||
|
||||
/* setup ns_fwd tc progs */
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
|
||||
return -1;
|
||||
PIN(ingress_fwdns_prio100);
|
||||
PIN(egress_fwdns_prio100);
|
||||
PIN(ingress_fwdns_prio101);
|
||||
PIN(egress_fwdns_prio101);
|
||||
SYS("tc qdisc add dev veth_dst_fwd clsact");
|
||||
SYS("tc filter add dev veth_dst_fwd ingress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_dst_fwd ingress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio101));
|
||||
SYS("tc filter add dev veth_dst_fwd egress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_dst_fwd egress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio101));
|
||||
SYS("tc qdisc add dev veth_src_fwd clsact");
|
||||
SYS("tc filter add dev veth_src_fwd ingress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_src_fwd ingress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(ingress_fwdns_prio101));
|
||||
SYS("tc filter add dev veth_src_fwd egress prio 100 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio100));
|
||||
SYS("tc filter add dev veth_src_fwd egress prio 101 bpf da object-pinned "
|
||||
PIN_FNAME(egress_fwdns_prio101));
|
||||
/* tc qdisc add dev veth_dst_fwd clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
|
||||
/* tc filter add dev veth_dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
|
||||
skel->progs.ingress_fwdns_prio100, 100);
|
||||
/* tc filter add dev veth_dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS,
|
||||
skel->progs.ingress_fwdns_prio101, 101);
|
||||
/* tc filter add dev veth_dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
|
||||
skel->progs.egress_fwdns_prio100, 100);
|
||||
/* tc filter add dev veth_dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS,
|
||||
skel->progs.egress_fwdns_prio101, 101);
|
||||
|
||||
/* tc qdisc add dev veth_src_fwd clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_veth_src_fwd, setup_result->ifindex_veth_src_fwd);
|
||||
/* tc filter add dev veth_src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
|
||||
skel->progs.ingress_fwdns_prio100, 100);
|
||||
/* tc filter add dev veth_src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_INGRESS,
|
||||
skel->progs.ingress_fwdns_prio101, 101);
|
||||
/* tc filter add dev veth_src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
|
||||
skel->progs.egress_fwdns_prio100, 100);
|
||||
/* tc filter add dev veth_src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_src_fwd, BPF_TC_EGRESS,
|
||||
skel->progs.egress_fwdns_prio101, 101);
|
||||
close_netns(nstoken);
|
||||
|
||||
#undef PIN
|
||||
|
||||
return 0;
|
||||
|
||||
fail:
|
||||
close_netns(nstoken);
|
||||
return -1;
|
||||
return err;
|
||||
}
|
||||
|
||||
enum {
|
||||
@ -746,7 +792,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
|
||||
if (!ASSERT_OK(err, "test_tc_dtime__load"))
|
||||
goto done;
|
||||
|
||||
if (netns_load_dtime_bpf(skel))
|
||||
if (netns_load_dtime_bpf(skel, setup_result))
|
||||
goto done;
|
||||
|
||||
nstoken = open_netns(NS_FWD);
|
||||
@ -788,7 +834,6 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
|
||||
{
|
||||
struct nstoken *nstoken = NULL;
|
||||
struct test_tc_neigh_fib *skel = NULL;
|
||||
int err;
|
||||
|
||||
nstoken = open_netns(NS_FWD);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
|
||||
@ -801,19 +846,8 @@ static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
|
||||
if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
if (netns_load_bpf())
|
||||
if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
|
||||
skel->progs.tc_chk, setup_result))
|
||||
goto done;
|
||||
|
||||
/* bpf_fib_lookup() checks if forwarding is enabled */
|
||||
@ -849,19 +883,8 @@ static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
|
||||
if (!ASSERT_OK(err, "test_tc_neigh__load"))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
if (netns_load_bpf())
|
||||
if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
|
||||
skel->progs.tc_chk, setup_result))
|
||||
goto done;
|
||||
|
||||
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
|
||||
@ -896,19 +919,8 @@ static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
|
||||
if (!ASSERT_OK(err, "test_tc_peer__load"))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_dst, DST_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
|
||||
goto done;
|
||||
|
||||
if (netns_load_bpf())
|
||||
if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
|
||||
skel->progs.tc_chk, setup_result))
|
||||
goto done;
|
||||
|
||||
if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
|
||||
@ -991,6 +1003,8 @@ static int tun_relay_loop(int src_fd, int target_fd)
|
||||
|
||||
static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
|
||||
LIBBPF_OPTS(bpf_tc_hook, qdisc_veth_dst_fwd);
|
||||
struct test_tc_peer *skel = NULL;
|
||||
struct nstoken *nstoken = NULL;
|
||||
int err;
|
||||
@ -1034,8 +1048,8 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
|
||||
if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
|
||||
goto fail;
|
||||
|
||||
ifindex = get_ifindex("tun_fwd");
|
||||
if (!ASSERT_GE(ifindex, 0, "get_ifindex tun_fwd"))
|
||||
ifindex = if_nametoindex("tun_fwd");
|
||||
if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
|
||||
goto fail;
|
||||
|
||||
skel->rodata->IFINDEX_SRC = ifindex;
|
||||
@ -1045,31 +1059,21 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
|
||||
if (!ASSERT_OK(err, "test_tc_peer__load"))
|
||||
goto fail;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_src_l3, SRC_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " SRC_PROG_PIN_FILE))
|
||||
goto fail;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_dst_l3, DST_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " DST_PROG_PIN_FILE))
|
||||
goto fail;
|
||||
|
||||
err = bpf_program__pin(skel->progs.tc_chk, CHK_PROG_PIN_FILE);
|
||||
if (!ASSERT_OK(err, "pin " CHK_PROG_PIN_FILE))
|
||||
goto fail;
|
||||
|
||||
/* Load "tc_src_l3" to the tun_fwd interface to redirect packets
|
||||
* towards dst, and "tc_dst" to redirect packets
|
||||
* and "tc_chk" on veth_dst_fwd to drop non-redirected packets.
|
||||
*/
|
||||
SYS("tc qdisc add dev tun_fwd clsact");
|
||||
SYS("tc filter add dev tun_fwd ingress bpf da object-pinned "
|
||||
SRC_PROG_PIN_FILE);
|
||||
/* tc qdisc add dev tun_fwd clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
|
||||
/* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
|
||||
XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
|
||||
|
||||
SYS("tc qdisc add dev veth_dst_fwd clsact");
|
||||
SYS("tc filter add dev veth_dst_fwd ingress bpf da object-pinned "
|
||||
DST_PROG_PIN_FILE);
|
||||
SYS("tc filter add dev veth_dst_fwd egress bpf da object-pinned "
|
||||
CHK_PROG_PIN_FILE);
|
||||
/* tc qdisc add dev veth_dst_fwd clsact */
|
||||
QDISC_CLSACT_CREATE(&qdisc_veth_dst_fwd, setup_result->ifindex_veth_dst_fwd);
|
||||
/* tc filter add dev veth_dst_fwd ingress bpf da tc_dst_l3 */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
|
||||
/* tc filter add dev veth_dst_fwd egress bpf da tc_chk */
|
||||
XGRESS_FILTER_ADD(&qdisc_veth_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
|
||||
|
||||
/* Setup route and neigh tables */
|
||||
SYS("ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
|
||||
@ -1134,7 +1138,7 @@ static void *test_tc_redirect_run_tests(void *arg)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void serial_test_tc_redirect(void)
|
||||
void test_tc_redirect(void)
|
||||
{
|
||||
pthread_t test_thread;
|
||||
int err;
|
||||
|
@ -421,7 +421,7 @@ static void *test_tunnel_run_tests(void *arg)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void serial_test_tunnel(void)
|
||||
void test_tunnel(void)
|
||||
{
|
||||
pthread_t test_thread;
|
||||
int err;
|
||||
|
@ -673,9 +673,11 @@ static struct {
|
||||
{"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"},
|
||||
{"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"},
|
||||
{"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"},
|
||||
{"user_ringbuf_callback_discard_dynptr", "arg 1 is an unacquired reference"},
|
||||
{"user_ringbuf_callback_submit_dynptr", "arg 1 is an unacquired reference"},
|
||||
{"user_ringbuf_callback_discard_dynptr", "cannot release unowned const bpf_dynptr"},
|
||||
{"user_ringbuf_callback_submit_dynptr", "cannot release unowned const bpf_dynptr"},
|
||||
{"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"},
|
||||
{"user_ringbuf_callback_reinit_dynptr_mem", "Dynptr has to be an uninitialized dynptr"},
|
||||
{"user_ringbuf_callback_reinit_dynptr_ringbuf", "Dynptr has to be an uninitialized dynptr"},
|
||||
};
|
||||
|
||||
#define SUCCESS_TEST(_func) { _func, #_func }
|
||||
|
@ -85,7 +85,7 @@ static void test_max_pkt_size(int fd)
|
||||
}
|
||||
|
||||
#define NUM_PKTS 10000
|
||||
void serial_test_xdp_do_redirect(void)
|
||||
void test_xdp_do_redirect(void)
|
||||
{
|
||||
int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
|
||||
char data[sizeof(pkt_udp) + sizeof(__u32)];
|
||||
|
@ -174,7 +174,7 @@ out:
|
||||
system("ip netns del synproxy");
|
||||
}
|
||||
|
||||
void serial_test_xdp_synproxy(void)
|
||||
void test_xdp_synproxy(void)
|
||||
{
|
||||
if (test__start_subtest("xdp"))
|
||||
test_synproxy(true);
|
||||
|
362
tools/testing/selftests/bpf/prog_tests/xfrm_info.c
Normal file
362
tools/testing/selftests/bpf/prog_tests/xfrm_info.c
Normal file
@ -0,0 +1,362 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
|
||||
|
||||
/*
|
||||
* Topology:
|
||||
* ---------
|
||||
* NS0 namespace | NS1 namespace | NS2 namespace
|
||||
* | |
|
||||
* +---------------+ | +---------------+ |
|
||||
* | ipsec0 |---------| ipsec0 | |
|
||||
* | 192.168.1.100 | | | 192.168.1.200 | |
|
||||
* | if_id: bpf | | +---------------+ |
|
||||
* +---------------+ | |
|
||||
* | | | +---------------+
|
||||
* | | | | ipsec0 |
|
||||
* \------------------------------------------| 192.168.1.200 |
|
||||
* | | +---------------+
|
||||
* | |
|
||||
* | | (overlay network)
|
||||
* ------------------------------------------------------
|
||||
* | | (underlay network)
|
||||
* +--------------+ | +--------------+ |
|
||||
* | veth01 |----------| veth10 | |
|
||||
* | 172.16.1.100 | | | 172.16.1.200 | |
|
||||
* ---------------+ | +--------------+ |
|
||||
* | |
|
||||
* +--------------+ | | +--------------+
|
||||
* | veth02 |-----------------------------------| veth20 |
|
||||
* | 172.16.2.100 | | | | 172.16.2.200 |
|
||||
* +--------------+ | | +--------------+
|
||||
*
|
||||
*
|
||||
* Test Packet flow
|
||||
* -----------
|
||||
* The tests perform 'ping 192.168.1.200' from the NS0 namespace:
|
||||
* 1) request is routed to NS0 ipsec0
|
||||
* 2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based
|
||||
* on the requested value. This makes the ipsec0 device in external mode
|
||||
* select the destination tunnel
|
||||
* 3) ping reaches the other namespace (NS1 or NS2 based on which if_id was
|
||||
* used) and response is sent
|
||||
* 4) response is received on NS0 ipsec0, tc ingress program is triggered and
|
||||
* records the response if_id
|
||||
* 5) requested if_id is compared with received if_id
|
||||
*/
|
||||
|
||||
#include <net/if.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/if_link.h>
|
||||
|
||||
#include "test_progs.h"
|
||||
#include "network_helpers.h"
|
||||
#include "xfrm_info.skel.h"
|
||||
|
||||
#define NS0 "xfrm_test_ns0"
|
||||
#define NS1 "xfrm_test_ns1"
|
||||
#define NS2 "xfrm_test_ns2"
|
||||
|
||||
#define IF_ID_0_TO_1 1
|
||||
#define IF_ID_0_TO_2 2
|
||||
#define IF_ID_1 3
|
||||
#define IF_ID_2 4
|
||||
|
||||
#define IP4_ADDR_VETH01 "172.16.1.100"
|
||||
#define IP4_ADDR_VETH10 "172.16.1.200"
|
||||
#define IP4_ADDR_VETH02 "172.16.2.100"
|
||||
#define IP4_ADDR_VETH20 "172.16.2.200"
|
||||
|
||||
#define ESP_DUMMY_PARAMS \
|
||||
"proto esp aead 'rfc4106(gcm(aes))' " \
|
||||
"0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel "
|
||||
|
||||
#define SYS(fmt, ...) \
|
||||
({ \
|
||||
char cmd[1024]; \
|
||||
snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
|
||||
if (!ASSERT_OK(system(cmd), cmd)) \
|
||||
goto fail; \
|
||||
})
|
||||
|
||||
#define SYS_NOFAIL(fmt, ...) \
|
||||
({ \
|
||||
char cmd[1024]; \
|
||||
snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
|
||||
system(cmd); \
|
||||
})
|
||||
|
||||
static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
|
||||
.prog_fd = igr_fd);
|
||||
LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1,
|
||||
.prog_fd = egr_fd);
|
||||
int ret;
|
||||
|
||||
ret = bpf_tc_hook_create(hook);
|
||||
if (!ASSERT_OK(ret, "create tc hook"))
|
||||
return ret;
|
||||
|
||||
if (igr_fd >= 0) {
|
||||
hook->attach_point = BPF_TC_INGRESS;
|
||||
ret = bpf_tc_attach(hook, &opts1);
|
||||
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
|
||||
bpf_tc_hook_destroy(hook);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (egr_fd >= 0) {
|
||||
hook->attach_point = BPF_TC_EGRESS;
|
||||
ret = bpf_tc_attach(hook, &opts2);
|
||||
if (!ASSERT_OK(ret, "bpf_tc_attach")) {
|
||||
bpf_tc_hook_destroy(hook);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cleanup(void)
|
||||
{
|
||||
SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0);
|
||||
SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1);
|
||||
SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2);
|
||||
}
|
||||
|
||||
static int config_underlay(void)
|
||||
{
|
||||
SYS("ip netns add " NS0);
|
||||
SYS("ip netns add " NS1);
|
||||
SYS("ip netns add " NS2);
|
||||
|
||||
/* NS0 <-> NS1 [veth01 <-> veth10] */
|
||||
SYS("ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
|
||||
SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
|
||||
SYS("ip -net " NS0 " link set dev veth01 up");
|
||||
SYS("ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
|
||||
SYS("ip -net " NS1 " link set dev veth10 up");
|
||||
|
||||
/* NS0 <-> NS2 [veth02 <-> veth20] */
|
||||
SYS("ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
|
||||
SYS("ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
|
||||
SYS("ip -net " NS0 " link set dev veth02 up");
|
||||
SYS("ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
|
||||
SYS("ip -net " NS2 " link set dev veth20 up");
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local,
|
||||
const char *ipv4_remote, int if_id)
|
||||
{
|
||||
/* State: local -> remote */
|
||||
SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
|
||||
ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id);
|
||||
|
||||
/* State: local <- remote */
|
||||
SYS("ip -net %s xfrm state add src %s dst %s spi 1 "
|
||||
ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id);
|
||||
|
||||
/* Policy: local -> remote */
|
||||
SYS("ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
|
||||
"if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
|
||||
if_id, ipv4_local, ipv4_remote, if_id);
|
||||
|
||||
/* Policy: local <- remote */
|
||||
SYS("ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
|
||||
"if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
|
||||
if_id, ipv4_remote, ipv4_local, if_id);
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b,
|
||||
const char *ipv4_a, const char *ipv4_b,
|
||||
int if_id_a, int if_id_b)
|
||||
{
|
||||
return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) ||
|
||||
setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b);
|
||||
}
|
||||
|
||||
static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
|
||||
unsigned short len)
|
||||
{
|
||||
struct rtattr *rta =
|
||||
(struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
|
||||
rta->rta_type = type;
|
||||
rta->rta_len = RTA_LENGTH(len);
|
||||
nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
|
||||
return rta;
|
||||
}
|
||||
|
||||
static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
|
||||
const char *s)
|
||||
{
|
||||
struct rtattr *rta = rtattr_add(nh, type, strlen(s));
|
||||
|
||||
memcpy(RTA_DATA(rta), s, strlen(s));
|
||||
return rta;
|
||||
}
|
||||
|
||||
static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
|
||||
{
|
||||
return rtattr_add(nh, type, 0);
|
||||
}
|
||||
|
||||
static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
|
||||
{
|
||||
uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
|
||||
|
||||
attr->rta_len = end - (uint8_t *)attr;
|
||||
}
|
||||
|
||||
static int setup_xfrmi_external_dev(const char *ns)
|
||||
{
|
||||
struct {
|
||||
struct nlmsghdr nh;
|
||||
struct ifinfomsg info;
|
||||
unsigned char data[128];
|
||||
} req;
|
||||
struct rtattr *link_info, *info_data;
|
||||
struct nstoken *nstoken;
|
||||
int ret = -1, sock = -1;
|
||||
struct nlmsghdr *nh;
|
||||
|
||||
memset(&req, 0, sizeof(req));
|
||||
nh = &req.nh;
|
||||
nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
|
||||
nh->nlmsg_type = RTM_NEWLINK;
|
||||
nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST;
|
||||
|
||||
rtattr_add_str(nh, IFLA_IFNAME, "ipsec0");
|
||||
link_info = rtattr_begin(nh, IFLA_LINKINFO);
|
||||
rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm");
|
||||
info_data = rtattr_begin(nh, IFLA_INFO_DATA);
|
||||
rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0);
|
||||
rtattr_end(nh, info_data);
|
||||
rtattr_end(nh, link_info);
|
||||
|
||||
nstoken = open_netns(ns);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns"))
|
||||
goto done;
|
||||
|
||||
sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
|
||||
if (!ASSERT_GE(sock, 0, "netlink socket"))
|
||||
goto done;
|
||||
ret = send(sock, nh, nh->nlmsg_len, 0);
|
||||
if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length"))
|
||||
goto done;
|
||||
|
||||
ret = 0;
|
||||
done:
|
||||
if (sock != -1)
|
||||
close(sock);
|
||||
if (nstoken)
|
||||
close_netns(nstoken);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int config_overlay(void)
|
||||
{
|
||||
if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10,
|
||||
IF_ID_0_TO_1, IF_ID_1))
|
||||
goto fail;
|
||||
if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20,
|
||||
IF_ID_0_TO_2, IF_ID_2))
|
||||
goto fail;
|
||||
|
||||
/* Older iproute2 doesn't support this option */
|
||||
if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
|
||||
goto fail;
|
||||
|
||||
SYS("ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
|
||||
SYS("ip -net " NS0 " link set dev ipsec0 up");
|
||||
|
||||
SYS("ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
|
||||
SYS("ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
|
||||
SYS("ip -net " NS1 " link set dev ipsec0 up");
|
||||
|
||||
SYS("ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
|
||||
SYS("ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
|
||||
SYS("ip -net " NS2 " link set dev ipsec0 up");
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id)
|
||||
{
|
||||
skel->bss->req_if_id = if_id;
|
||||
|
||||
SYS("ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
|
||||
|
||||
if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
|
||||
goto fail;
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void _test_xfrm_info(void)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
|
||||
int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd;
|
||||
struct nstoken *nstoken = NULL;
|
||||
struct xfrm_info *skel;
|
||||
int ifindex;
|
||||
|
||||
/* load and attach bpf progs to ipsec dev tc hook point */
|
||||
skel = xfrm_info__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load"))
|
||||
goto done;
|
||||
nstoken = open_netns(NS0);
|
||||
if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
|
||||
goto done;
|
||||
ifindex = if_nametoindex("ipsec0");
|
||||
if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex"))
|
||||
goto done;
|
||||
tc_hook.ifindex = ifindex;
|
||||
set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info);
|
||||
get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info);
|
||||
if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd"))
|
||||
goto done;
|
||||
if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd"))
|
||||
goto done;
|
||||
if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd,
|
||||
set_xfrm_info_prog_fd))
|
||||
goto done;
|
||||
|
||||
/* perform test */
|
||||
if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1))
|
||||
goto done;
|
||||
if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2))
|
||||
goto done;
|
||||
|
||||
done:
|
||||
if (nstoken)
|
||||
close_netns(nstoken);
|
||||
xfrm_info__destroy(skel);
|
||||
}
|
||||
|
||||
void test_xfrm_info(void)
|
||||
{
|
||||
cleanup();
|
||||
|
||||
if (!ASSERT_OK(config_underlay(), "config_underlay"))
|
||||
goto done;
|
||||
if (!ASSERT_OK(config_overlay(), "config_overlay"))
|
||||
goto done;
|
||||
|
||||
if (test__start_subtest("xfrm_info"))
|
||||
_test_xfrm_info();
|
||||
|
||||
done:
|
||||
cleanup();
|
||||
}
|
@ -7,14 +7,14 @@ char _license[] SEC("license") = "GPL";
|
||||
|
||||
unsigned long last_sym_value = 0;
|
||||
|
||||
static inline char tolower(char c)
|
||||
static inline char to_lower(char c)
|
||||
{
|
||||
if (c >= 'A' && c <= 'Z')
|
||||
c += ('a' - 'A');
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline char toupper(char c)
|
||||
static inline char to_upper(char c)
|
||||
{
|
||||
if (c >= 'a' && c <= 'z')
|
||||
c -= ('a' - 'A');
|
||||
@ -54,7 +54,7 @@ int dump_ksym(struct bpf_iter__ksym *ctx)
|
||||
type = iter->type;
|
||||
|
||||
if (iter->module_name[0]) {
|
||||
type = iter->exported ? toupper(type) : tolower(type);
|
||||
type = iter->exported ? to_upper(type) : to_lower(type);
|
||||
BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ",
|
||||
value, type, iter->name, iter->module_name);
|
||||
} else {
|
||||
|
@ -2,6 +2,11 @@
|
||||
#ifndef __BPF_MISC_H__
|
||||
#define __BPF_MISC_H__
|
||||
|
||||
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
|
||||
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
|
||||
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
|
||||
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
|
||||
|
||||
#if defined(__TARGET_ARCH_x86)
|
||||
#define SYSCALL_WRAPPER 1
|
||||
#define SYS_PREFIX "__x64_"
|
||||
|
@ -25,6 +25,9 @@
|
||||
#define IPV6_TCLASS 67
|
||||
#define IPV6_AUTOFLOWLABEL 70
|
||||
|
||||
#define TC_ACT_UNSPEC (-1)
|
||||
#define TC_ACT_SHOT 2
|
||||
|
||||
#define SOL_TCP 6
|
||||
#define TCP_NODELAY 1
|
||||
#define TCP_MAXSEG 2
|
||||
|
@ -64,3 +64,4 @@ int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
|
||||
|
||||
return 0;
|
||||
}
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
80
tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
Normal file
80
tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
Normal file
@ -0,0 +1,80 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include "bpf_iter.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bpf_misc.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
|
||||
__uint(map_flags, BPF_F_NO_PREALLOC);
|
||||
__type(key, int);
|
||||
__type(value, long);
|
||||
} map_a SEC(".maps");
|
||||
|
||||
__u32 target_pid;
|
||||
__u64 cgroup_id;
|
||||
|
||||
void bpf_rcu_read_lock(void) __ksym;
|
||||
void bpf_rcu_read_unlock(void) __ksym;
|
||||
|
||||
SEC("?iter.s/cgroup")
|
||||
int cgroup_iter(struct bpf_iter__cgroup *ctx)
|
||||
{
|
||||
struct seq_file *seq = ctx->meta->seq;
|
||||
struct cgroup *cgrp = ctx->cgroup;
|
||||
long *ptr;
|
||||
|
||||
if (cgrp == NULL)
|
||||
return 0;
|
||||
|
||||
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
cgroup_id = cgrp->kn->id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
|
||||
int no_rcu_lock(void *ctx)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup *cgrp;
|
||||
long *ptr;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
if (task->pid != target_pid)
|
||||
return 0;
|
||||
|
||||
/* ptr_to_btf_id semantics. should work. */
|
||||
cgrp = task->cgroups->dfl_cgrp;
|
||||
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
cgroup_id = cgrp->kn->id;
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
|
||||
int yes_rcu_lock(void *ctx)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct cgroup *cgrp;
|
||||
long *ptr;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
if (task->pid != target_pid)
|
||||
return 0;
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
cgrp = task->cgroups->dfl_cgrp;
|
||||
/* cgrp is untrusted and cannot pass to bpf_cgrp_storage_get() helper. */
|
||||
ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (ptr)
|
||||
cgroup_id = cgrp->kn->id;
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
@ -43,6 +43,7 @@ struct sample {
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_RINGBUF);
|
||||
__uint(max_entries, 4096);
|
||||
} ringbuf SEC(".maps");
|
||||
|
||||
int err, val;
|
||||
@ -66,6 +67,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr)
|
||||
* bpf_ringbuf_submit/discard_dynptr call
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Unreleased reference id=1")
|
||||
int ringbuf_missing_release1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -78,6 +80,7 @@ int ringbuf_missing_release1(void *ctx)
|
||||
}
|
||||
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Unreleased reference id=2")
|
||||
int ringbuf_missing_release2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr1, ptr2;
|
||||
@ -113,6 +116,7 @@ static int missing_release_callback_fn(__u32 index, void *data)
|
||||
|
||||
/* Any dynptr initialized within a callback must have bpf_dynptr_put called */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Unreleased reference id")
|
||||
int ringbuf_missing_release_callback(void *ctx)
|
||||
{
|
||||
bpf_loop(10, missing_release_callback_fn, NULL, 0);
|
||||
@ -121,6 +125,7 @@ int ringbuf_missing_release_callback(void *ctx)
|
||||
|
||||
/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("arg 1 is an unacquired reference")
|
||||
int ringbuf_release_uninit_dynptr(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -133,6 +138,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)
|
||||
|
||||
/* A dynptr can't be used after it has been invalidated */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #3")
|
||||
int use_after_invalid(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -152,6 +158,7 @@ int use_after_invalid(void *ctx)
|
||||
|
||||
/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("type=mem expected=ringbuf_mem")
|
||||
int ringbuf_invalid_api(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -174,6 +181,7 @@ done:
|
||||
|
||||
/* Can't add a dynptr to a map */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid indirect read from stack")
|
||||
int add_dynptr_to_map1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -191,6 +199,7 @@ int add_dynptr_to_map1(void *ctx)
|
||||
|
||||
/* Can't add a struct with an embedded dynptr to a map */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid indirect read from stack")
|
||||
int add_dynptr_to_map2(void *ctx)
|
||||
{
|
||||
struct test_info x;
|
||||
@ -208,6 +217,7 @@ int add_dynptr_to_map2(void *ctx)
|
||||
|
||||
/* A data slice can't be accessed out of bounds */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("value is outside of the allowed memory range")
|
||||
int data_slice_out_of_bounds_ringbuf(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -228,6 +238,7 @@ done:
|
||||
}
|
||||
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("value is outside of the allowed memory range")
|
||||
int data_slice_out_of_bounds_map_value(void *ctx)
|
||||
{
|
||||
__u32 key = 0, map_val;
|
||||
@ -248,6 +259,7 @@ int data_slice_out_of_bounds_map_value(void *ctx)
|
||||
|
||||
/* A data slice can't be used after it has been released */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid mem access 'scalar'")
|
||||
int data_slice_use_after_release1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -279,6 +291,7 @@ done:
|
||||
* ptr2 is at fp - 16).
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid mem access 'scalar'")
|
||||
int data_slice_use_after_release2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr1, ptr2;
|
||||
@ -310,6 +323,7 @@ done:
|
||||
|
||||
/* A data slice must be first checked for NULL */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid mem access 'mem_or_null'")
|
||||
int data_slice_missing_null_check1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -330,6 +344,7 @@ int data_slice_missing_null_check1(void *ctx)
|
||||
|
||||
/* A data slice can't be dereferenced if it wasn't checked for null */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid mem access 'mem_or_null'")
|
||||
int data_slice_missing_null_check2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -352,6 +367,7 @@ done:
|
||||
* dynptr argument
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid indirect read from stack")
|
||||
int invalid_helper1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -366,6 +382,7 @@ int invalid_helper1(void *ctx)
|
||||
|
||||
/* A dynptr can't be passed into a helper function at a non-zero offset */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #3")
|
||||
int invalid_helper2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -381,6 +398,7 @@ int invalid_helper2(void *ctx)
|
||||
|
||||
/* A bpf_dynptr is invalidated if it's been written into */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #1")
|
||||
int invalid_write1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -402,6 +420,7 @@ int invalid_write1(void *ctx)
|
||||
* offset
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #3")
|
||||
int invalid_write2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -425,6 +444,7 @@ int invalid_write2(void *ctx)
|
||||
* non-const offset
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #1")
|
||||
int invalid_write3(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -456,6 +476,7 @@ static int invalid_write4_callback(__u32 index, void *data)
|
||||
* be invalidated as a dynptr
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("arg 1 is an unacquired reference")
|
||||
int invalid_write4(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -472,7 +493,9 @@ int invalid_write4(void *ctx)
|
||||
|
||||
/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
|
||||
struct bpf_dynptr global_dynptr;
|
||||
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("type=map_value expected=fp")
|
||||
int global(void *ctx)
|
||||
{
|
||||
/* this should fail */
|
||||
@ -485,6 +508,7 @@ int global(void *ctx)
|
||||
|
||||
/* A direct read should fail */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid read from stack")
|
||||
int invalid_read1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -501,6 +525,7 @@ int invalid_read1(void *ctx)
|
||||
|
||||
/* A direct read at an offset should fail */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("cannot pass in dynptr at an offset")
|
||||
int invalid_read2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -516,6 +541,7 @@ int invalid_read2(void *ctx)
|
||||
|
||||
/* A direct read at an offset into the lower stack slot should fail */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid read from stack")
|
||||
int invalid_read3(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr1, ptr2;
|
||||
@ -542,6 +568,7 @@ static int invalid_read4_callback(__u32 index, void *data)
|
||||
|
||||
/* A direct read within a callback function should fail */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid read from stack")
|
||||
int invalid_read4(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -557,6 +584,7 @@ int invalid_read4(void *ctx)
|
||||
|
||||
/* Initializing a dynptr on an offset should fail */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid write to stack")
|
||||
int invalid_offset(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -571,6 +599,7 @@ int invalid_offset(void *ctx)
|
||||
|
||||
/* Can't release a dynptr twice */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("arg 1 is an unacquired reference")
|
||||
int release_twice(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -597,6 +626,7 @@ static int release_twice_callback_fn(__u32 index, void *data)
|
||||
* within a calback function, fails
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("arg 1 is an unacquired reference")
|
||||
int release_twice_callback(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -612,6 +642,7 @@ int release_twice_callback(void *ctx)
|
||||
|
||||
/* Reject unsupported local mem types for dynptr_from_mem API */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Unsupported reg type fp for bpf_dynptr_from_mem data")
|
||||
int dynptr_from_mem_invalid_api(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
|
@ -20,6 +20,7 @@ struct sample {
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_RINGBUF);
|
||||
__uint(max_entries, 4096);
|
||||
} ringbuf SEC(".maps");
|
||||
|
||||
struct {
|
||||
|
@ -99,13 +99,28 @@ int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *hea
|
||||
struct foo *f[8], *pf;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(f); i++) {
|
||||
/* Loop following this check adds nodes 2-at-a-time in order to
|
||||
* validate multiple release_on_unlock release logic
|
||||
*/
|
||||
if (ARRAY_SIZE(f) % 2)
|
||||
return 10;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(f); i += 2) {
|
||||
f[i] = bpf_obj_new(typeof(**f));
|
||||
if (!f[i])
|
||||
return 2;
|
||||
f[i]->data = i;
|
||||
|
||||
f[i + 1] = bpf_obj_new(typeof(**f));
|
||||
if (!f[i + 1]) {
|
||||
bpf_obj_drop(f[i]);
|
||||
return 9;
|
||||
}
|
||||
f[i + 1]->data = i + 1;
|
||||
|
||||
bpf_spin_lock(lock);
|
||||
bpf_list_push_front(head, &f[i]->node);
|
||||
bpf_list_push_front(head, &f[i + 1]->node);
|
||||
bpf_spin_unlock(lock);
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include "bpf_misc.h"
|
||||
|
||||
struct map_value {
|
||||
char buf[8];
|
||||
@ -23,6 +24,7 @@ extern struct prog_test_ref_kfunc *
|
||||
bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("kptr access size must be BPF_DW")
|
||||
int size_not_bpf_dw(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -37,6 +39,7 @@ int size_not_bpf_dw(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("kptr access cannot have variable offset")
|
||||
int non_const_var_off(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -55,6 +58,7 @@ int non_const_var_off(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("R1 doesn't have constant offset. kptr has to be")
|
||||
int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -73,6 +77,7 @@ int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("kptr access misaligned expected=8 off=7")
|
||||
int misaligned_access_write(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -88,6 +93,7 @@ int misaligned_access_write(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("kptr access misaligned expected=8 off=1")
|
||||
int misaligned_access_read(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -101,6 +107,7 @@ int misaligned_access_read(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("variable untrusted_ptr_ access var_off=(0x0; 0x1e0)")
|
||||
int reject_var_off_store(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *unref_ptr;
|
||||
@ -124,6 +131,7 @@ int reject_var_off_store(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc")
|
||||
int reject_bad_type_match(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *unref_ptr;
|
||||
@ -144,6 +152,7 @@ int reject_bad_type_match(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
|
||||
int marked_as_untrusted_or_null(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -158,6 +167,7 @@ int marked_as_untrusted_or_null(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("access beyond struct prog_test_ref_kfunc at off 32 size 4")
|
||||
int correct_btf_id_check_size(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *p;
|
||||
@ -175,6 +185,7 @@ int correct_btf_id_check_size(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("R1 type=untrusted_ptr_ expected=percpu_ptr_")
|
||||
int inherit_untrusted_on_walk(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *unref_ptr;
|
||||
@ -194,6 +205,7 @@ int inherit_untrusted_on_walk(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("off=8 kptr isn't referenced kptr")
|
||||
int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -208,6 +220,7 @@ int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("arg#0 expected pointer to map value")
|
||||
int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
|
||||
{
|
||||
bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0);
|
||||
@ -215,6 +228,7 @@ int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("arg#0 expected pointer to map value")
|
||||
int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
|
||||
{
|
||||
bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0);
|
||||
@ -222,6 +236,7 @@ int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("arg#0 no referenced kptr at map value offset=0")
|
||||
int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -236,6 +251,7 @@ int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("arg#0 no referenced kptr at map value offset=8")
|
||||
int reject_kptr_get_on_unref(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -250,6 +266,7 @@ int reject_kptr_get_on_unref(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("kernel function bpf_kfunc_call_test_kptr_get args#0")
|
||||
int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -264,6 +281,7 @@ int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
|
||||
int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -278,6 +296,7 @@ int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("store to referenced kptr disallowed")
|
||||
int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *p;
|
||||
@ -297,6 +316,7 @@ int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("R2 type=untrusted_ptr_ expected=ptr_")
|
||||
int reject_untrusted_xchg(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *p;
|
||||
@ -315,6 +335,8 @@ int reject_untrusted_xchg(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure
|
||||
__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
|
||||
int reject_bad_type_xchg(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *ref_ptr;
|
||||
@ -333,6 +355,7 @@ int reject_bad_type_xchg(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc")
|
||||
int reject_member_of_ref_xchg(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *ref_ptr;
|
||||
@ -351,6 +374,7 @@ int reject_member_of_ref_xchg(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?syscall")
|
||||
__failure __msg("kptr cannot be accessed indirectly by helper")
|
||||
int reject_indirect_helper_access(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -371,6 +395,7 @@ int write_func(int *p)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("kptr cannot be accessed indirectly by helper")
|
||||
int reject_indirect_global_func_access(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
@ -384,6 +409,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("Unreleased reference id=5 alloc_insn=")
|
||||
int kptr_xchg_ref_state(struct __sk_buff *ctx)
|
||||
{
|
||||
struct prog_test_ref_kfunc *p;
|
||||
@ -402,6 +428,7 @@ int kptr_xchg_ref_state(struct __sk_buff *ctx)
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("Unreleased reference id=3 alloc_insn=")
|
||||
int kptr_get_ref_state(struct __sk_buff *ctx)
|
||||
{
|
||||
struct map_value *v;
|
||||
|
@ -23,13 +23,14 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
|
||||
void bpf_key_put(struct bpf_key *key) __ksym;
|
||||
void bpf_rcu_read_lock(void) __ksym;
|
||||
void bpf_rcu_read_unlock(void) __ksym;
|
||||
struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
|
||||
struct task_struct *bpf_task_acquire_not_zero(struct task_struct *p) __ksym;
|
||||
void bpf_task_release(struct task_struct *p) __ksym;
|
||||
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
|
||||
int get_cgroup_id(void *ctx)
|
||||
{
|
||||
struct task_struct *task;
|
||||
struct css_set *cgroups;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
if (task->pid != target_pid)
|
||||
@ -37,7 +38,11 @@ int get_cgroup_id(void *ctx)
|
||||
|
||||
/* simulate bpf_get_current_cgroup_id() helper */
|
||||
bpf_rcu_read_lock();
|
||||
cgroup_id = task->cgroups->dfl_cgrp->kn->id;
|
||||
cgroups = task->cgroups;
|
||||
if (!cgroups)
|
||||
goto unlock;
|
||||
cgroup_id = cgroups->dfl_cgrp->kn->id;
|
||||
unlock:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@ -56,6 +61,8 @@ int task_succ(void *ctx)
|
||||
bpf_rcu_read_lock();
|
||||
/* region including helper using rcu ptr real_parent */
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
|
||||
BPF_LOCAL_STORAGE_GET_F_CREATE);
|
||||
if (!ptr)
|
||||
@ -92,7 +99,10 @@ int two_regions(void *ctx)
|
||||
bpf_rcu_read_unlock();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@ -105,7 +115,10 @@ int non_sleepable_1(void *ctx)
|
||||
task = bpf_get_current_task_btf();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@ -121,7 +134,10 @@ int non_sleepable_2(void *ctx)
|
||||
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@ -129,16 +145,33 @@ int non_sleepable_2(void *ctx)
|
||||
SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
|
||||
int task_acquire(void *ctx)
|
||||
{
|
||||
struct task_struct *task, *real_parent;
|
||||
struct task_struct *task, *real_parent, *gparent;
|
||||
|
||||
task = bpf_get_current_task_btf();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
|
||||
/* rcu_ptr->rcu_field */
|
||||
gparent = real_parent->real_parent;
|
||||
if (!gparent)
|
||||
goto out;
|
||||
|
||||
/* acquire a reference which can be used outside rcu read lock region */
|
||||
real_parent = bpf_task_acquire(real_parent);
|
||||
gparent = bpf_task_acquire_not_zero(gparent);
|
||||
if (!gparent)
|
||||
/* Until we resolve the issues with using task->rcu_users, we
|
||||
* expect bpf_task_acquire_not_zero() to return a NULL task.
|
||||
* See the comment at the definition of
|
||||
* bpf_task_acquire_not_zero() for more details.
|
||||
*/
|
||||
goto out;
|
||||
|
||||
(void)bpf_task_storage_get(&map_a, gparent, 0, 0);
|
||||
bpf_task_release(gparent);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
bpf_task_release(real_parent);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -181,9 +214,12 @@ int non_sleepable_rcu_mismatch(void *ctx)
|
||||
/* non-sleepable: missing bpf_rcu_read_unlock() in one path */
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
if (real_parent)
|
||||
bpf_rcu_read_unlock();
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -199,16 +235,17 @@ int inproper_sleepable_helper(void *ctx)
|
||||
/* sleepable helper in rcu read lock region */
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
|
||||
if (!regs) {
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
if (!regs)
|
||||
goto out;
|
||||
|
||||
ptr = (void *)PT_REGS_IP(regs);
|
||||
(void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
|
||||
user_data = value;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
}
|
||||
@ -239,7 +276,10 @@ int nested_rcu_region(void *ctx)
|
||||
bpf_rcu_read_lock();
|
||||
bpf_rcu_read_lock();
|
||||
real_parent = task->real_parent;
|
||||
if (!real_parent)
|
||||
goto out;
|
||||
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
|
||||
out:
|
||||
bpf_rcu_read_unlock();
|
||||
bpf_rcu_read_unlock();
|
||||
return 0;
|
||||
|
@ -271,3 +271,14 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("lsm/task_free")
|
||||
int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
|
||||
{
|
||||
struct task_struct *acquired;
|
||||
|
||||
/* the argument of lsm task_free hook is untrusted. */
|
||||
acquired = bpf_task_acquire(task);
|
||||
bpf_task_release(acquired);
|
||||
return 0;
|
||||
}
|
||||
|
@ -123,12 +123,17 @@ int BPF_PROG(test_task_get_release, struct task_struct *task, u64 clone_flags)
|
||||
}
|
||||
|
||||
kptr = bpf_task_kptr_get(&v->task);
|
||||
if (!kptr) {
|
||||
if (kptr) {
|
||||
/* Until we resolve the issues with using task->rcu_users, we
|
||||
* expect bpf_task_kptr_get() to return a NULL task. See the
|
||||
* comment at the definition of bpf_task_acquire_not_zero() for
|
||||
* more details.
|
||||
*/
|
||||
bpf_task_release(kptr);
|
||||
err = 3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bpf_task_release(kptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -32,18 +32,6 @@ int err, pid;
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
SEC("?lsm.s/bpf")
|
||||
int BPF_PROG(dynptr_type_not_supp, int cmd, union bpf_attr *attr,
|
||||
unsigned int size)
|
||||
{
|
||||
char write_data[64] = "hello there, world!!";
|
||||
struct bpf_dynptr ptr;
|
||||
|
||||
bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
|
||||
|
||||
return bpf_verify_pkcs7_signature(&ptr, &ptr, NULL);
|
||||
}
|
||||
|
||||
SEC("?lsm.s/bpf")
|
||||
int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
|
||||
{
|
||||
|
@ -18,6 +18,13 @@ struct {
|
||||
__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
|
||||
} user_ringbuf SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_RINGBUF);
|
||||
__uint(max_entries, 2);
|
||||
} ringbuf SEC(".maps");
|
||||
|
||||
static int map_value;
|
||||
|
||||
static long
|
||||
bad_access1(struct bpf_dynptr *dynptr, void *context)
|
||||
{
|
||||
@ -32,7 +39,7 @@ bad_access1(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to read before the pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_bad_access1(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0);
|
||||
@ -54,7 +61,7 @@ bad_access2(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to read past the end of the pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_bad_access2(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0);
|
||||
@ -73,7 +80,7 @@ write_forbidden(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to write to that pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_write_forbidden(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0);
|
||||
@ -92,7 +99,7 @@ null_context_write(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to write to that pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_null_context_write(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0);
|
||||
@ -113,7 +120,7 @@ null_context_read(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to write to that pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_null_context_read(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0);
|
||||
@ -132,7 +139,7 @@ try_discard_dynptr(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to read past the end of the pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_discard_dynptr(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0);
|
||||
@ -151,7 +158,7 @@ try_submit_dynptr(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to read past the end of the pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_submit_dynptr(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0);
|
||||
@ -168,10 +175,38 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
|
||||
/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
|
||||
* not be able to write to that pointer.
|
||||
*/
|
||||
SEC("?raw_tp/sys_nanosleep")
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_invalid_return(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long
|
||||
try_reinit_dynptr_mem(struct bpf_dynptr *dynptr, void *context)
|
||||
{
|
||||
bpf_dynptr_from_mem(&map_value, 4, 0, dynptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long
|
||||
try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context)
|
||||
{
|
||||
bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, dynptr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_reinit_dynptr_mem(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?raw_tp/")
|
||||
int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
|
||||
{
|
||||
bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
40
tools/testing/selftests/bpf/progs/xfrm_info.c
Normal file
40
tools/testing/selftests/bpf/progs/xfrm_info.c
Normal file
@ -0,0 +1,40 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "vmlinux.h"
|
||||
#include "bpf_tracing_net.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
struct bpf_xfrm_info___local {
|
||||
u32 if_id;
|
||||
int link;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
__u32 req_if_id;
|
||||
__u32 resp_if_id;
|
||||
|
||||
int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
|
||||
const struct bpf_xfrm_info___local *from) __ksym;
|
||||
int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx,
|
||||
struct bpf_xfrm_info___local *to) __ksym;
|
||||
|
||||
SEC("tc")
|
||||
int set_xfrm_info(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_xfrm_info___local info = { .if_id = req_if_id };
|
||||
|
||||
return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
SEC("tc")
|
||||
int get_xfrm_info(struct __sk_buff *skb)
|
||||
{
|
||||
struct bpf_xfrm_info___local info = {};
|
||||
|
||||
if (bpf_skb_get_xfrm_info(skb, &info) < 0)
|
||||
return TC_ACT_SHOT;
|
||||
|
||||
resp_if_id = info.if_id;
|
||||
|
||||
return TC_ACT_UNSPEC;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
@ -1,9 +1,9 @@
|
||||
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
|
||||
#include <iostream>
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
|
||||
#include <unistd.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#pragma GCC diagnostic pop
|
||||
#include <bpf/bpf.h>
|
||||
#include <bpf/btf.h>
|
||||
#include "test_core_extern.skel.h"
|
||||
@ -99,6 +99,7 @@ int main(int argc, char *argv[])
|
||||
struct btf_dump_opts opts = { };
|
||||
struct test_core_extern *skel;
|
||||
struct btf *btf;
|
||||
int fd;
|
||||
|
||||
try_skeleton_template();
|
||||
|
||||
@ -117,6 +118,12 @@ int main(int argc, char *argv[])
|
||||
skel = test_core_extern__open_and_load();
|
||||
test_core_extern__destroy(skel);
|
||||
|
||||
fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
|
||||
if (fd < 0)
|
||||
std::cout << "FAILED to enable stats: " << fd << std::endl;
|
||||
else
|
||||
::close(fd);
|
||||
|
||||
std::cout << "DONE!" << std::endl;
|
||||
|
||||
return 0;
|
||||
|
233
tools/testing/selftests/bpf/test_loader.c
Normal file
233
tools/testing/selftests/bpf/test_loader.c
Normal file
@ -0,0 +1,233 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
|
||||
#include <stdlib.h>
|
||||
#include <test_progs.h>
|
||||
#include <bpf/btf.h>
|
||||
|
||||
#define str_has_pfx(str, pfx) \
|
||||
(strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
|
||||
|
||||
#define TEST_LOADER_LOG_BUF_SZ 1048576
|
||||
|
||||
#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
|
||||
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
|
||||
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
|
||||
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
|
||||
|
||||
struct test_spec {
|
||||
const char *name;
|
||||
bool expect_failure;
|
||||
const char *expect_msg;
|
||||
int log_level;
|
||||
};
|
||||
|
||||
static int tester_init(struct test_loader *tester)
|
||||
{
|
||||
if (!tester->log_buf) {
|
||||
tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ;
|
||||
tester->log_buf = malloc(tester->log_buf_sz);
|
||||
if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf"))
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void test_loader_fini(struct test_loader *tester)
|
||||
{
|
||||
if (!tester)
|
||||
return;
|
||||
|
||||
free(tester->log_buf);
|
||||
}
|
||||
|
||||
static int parse_test_spec(struct test_loader *tester,
|
||||
struct bpf_object *obj,
|
||||
struct bpf_program *prog,
|
||||
struct test_spec *spec)
|
||||
{
|
||||
struct btf *btf;
|
||||
int func_id, i;
|
||||
|
||||
memset(spec, 0, sizeof(*spec));
|
||||
|
||||
spec->name = bpf_program__name(prog);
|
||||
|
||||
btf = bpf_object__btf(obj);
|
||||
if (!btf) {
|
||||
ASSERT_FAIL("BPF object has no BTF");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
func_id = btf__find_by_name_kind(btf, spec->name, BTF_KIND_FUNC);
|
||||
if (func_id < 0) {
|
||||
ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 1; i < btf__type_cnt(btf); i++) {
|
||||
const struct btf_type *t;
|
||||
const char *s;
|
||||
|
||||
t = btf__type_by_id(btf, i);
|
||||
if (!btf_is_decl_tag(t))
|
||||
continue;
|
||||
|
||||
if (t->type != func_id || btf_decl_tag(t)->component_idx != -1)
|
||||
continue;
|
||||
|
||||
s = btf__str_by_offset(btf, t->name_off);
|
||||
if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
|
||||
spec->expect_failure = true;
|
||||
} else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
|
||||
spec->expect_failure = false;
|
||||
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
|
||||
spec->expect_msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
|
||||
} else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
|
||||
errno = 0;
|
||||
spec->log_level = strtol(s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1, NULL, 0);
|
||||
if (errno) {
|
||||
ASSERT_FAIL("failed to parse test log level from '%s'", s);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void prepare_case(struct test_loader *tester,
|
||||
struct test_spec *spec,
|
||||
struct bpf_object *obj,
|
||||
struct bpf_program *prog)
|
||||
{
|
||||
int min_log_level = 0;
|
||||
|
||||
if (env.verbosity > VERBOSE_NONE)
|
||||
min_log_level = 1;
|
||||
if (env.verbosity > VERBOSE_VERY)
|
||||
min_log_level = 2;
|
||||
|
||||
bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz);
|
||||
|
||||
/* Make sure we set at least minimal log level, unless test requirest
|
||||
* even higher level already. Make sure to preserve independent log
|
||||
* level 4 (verifier stats), though.
|
||||
*/
|
||||
if ((spec->log_level & 3) < min_log_level)
|
||||
bpf_program__set_log_level(prog, (spec->log_level & 4) | min_log_level);
|
||||
else
|
||||
bpf_program__set_log_level(prog, spec->log_level);
|
||||
|
||||
tester->log_buf[0] = '\0';
|
||||
}
|
||||
|
||||
static void emit_verifier_log(const char *log_buf, bool force)
|
||||
{
|
||||
if (!force && env.verbosity == VERBOSE_NONE)
|
||||
return;
|
||||
fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log_buf);
|
||||
}
|
||||
|
||||
static void validate_case(struct test_loader *tester,
|
||||
struct test_spec *spec,
|
||||
struct bpf_object *obj,
|
||||
struct bpf_program *prog,
|
||||
int load_err)
|
||||
{
|
||||
if (spec->expect_msg) {
|
||||
char *match;
|
||||
|
||||
match = strstr(tester->log_buf, spec->expect_msg);
|
||||
if (!ASSERT_OK_PTR(match, "expect_msg")) {
|
||||
/* if we are in verbose mode, we've already emitted log */
|
||||
if (env.verbosity == VERBOSE_NONE)
|
||||
emit_verifier_log(tester->log_buf, true /*force*/);
|
||||
fprintf(stderr, "EXPECTED MSG: '%s'\n", spec->expect_msg);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* this function is forced noinline and has short generic name to look better
|
||||
* in test_progs output (in case of a failure)
|
||||
*/
|
||||
static noinline
|
||||
void run_subtest(struct test_loader *tester,
|
||||
const char *skel_name,
|
||||
skel_elf_bytes_fn elf_bytes_factory)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name);
|
||||
struct bpf_object *obj = NULL, *tobj;
|
||||
struct bpf_program *prog, *tprog;
|
||||
const void *obj_bytes;
|
||||
size_t obj_byte_cnt;
|
||||
int err;
|
||||
|
||||
if (tester_init(tester) < 0)
|
||||
return; /* failed to initialize tester */
|
||||
|
||||
obj_bytes = elf_bytes_factory(&obj_byte_cnt);
|
||||
obj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
|
||||
if (!ASSERT_OK_PTR(obj, "obj_open_mem"))
|
||||
return;
|
||||
|
||||
bpf_object__for_each_program(prog, obj) {
|
||||
const char *prog_name = bpf_program__name(prog);
|
||||
struct test_spec spec;
|
||||
|
||||
if (!test__start_subtest(prog_name))
|
||||
continue;
|
||||
|
||||
/* if we can't derive test specification, go to the next test */
|
||||
err = parse_test_spec(tester, obj, prog, &spec);
|
||||
if (!ASSERT_OK(err, "parse_test_spec"))
|
||||
continue;
|
||||
|
||||
tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
|
||||
if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
|
||||
continue;
|
||||
|
||||
bpf_object__for_each_program(tprog, tobj)
|
||||
bpf_program__set_autoload(tprog, false);
|
||||
|
||||
bpf_object__for_each_program(tprog, tobj) {
|
||||
/* only load specified program */
|
||||
if (strcmp(bpf_program__name(tprog), prog_name) == 0) {
|
||||
bpf_program__set_autoload(tprog, true);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
prepare_case(tester, &spec, tobj, tprog);
|
||||
|
||||
err = bpf_object__load(tobj);
|
||||
if (spec.expect_failure) {
|
||||
if (!ASSERT_ERR(err, "unexpected_load_success")) {
|
||||
emit_verifier_log(tester->log_buf, false /*force*/);
|
||||
goto tobj_cleanup;
|
||||
}
|
||||
} else {
|
||||
if (!ASSERT_OK(err, "unexpected_load_failure")) {
|
||||
emit_verifier_log(tester->log_buf, true /*force*/);
|
||||
goto tobj_cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
emit_verifier_log(tester->log_buf, false /*force*/);
|
||||
validate_case(tester, &spec, tobj, tprog, err);
|
||||
|
||||
tobj_cleanup:
|
||||
bpf_object__close(tobj);
|
||||
}
|
||||
|
||||
bpf_object__close(obj);
|
||||
}
|
||||
|
||||
void test_loader__run_subtests(struct test_loader *tester,
|
||||
const char *skel_name,
|
||||
skel_elf_bytes_fn elf_bytes_factory)
|
||||
{
|
||||
/* see comment in run_subtest() for why we do this function nesting */
|
||||
run_subtest(tester, skel_name, elf_bytes_factory);
|
||||
}
|
@ -769,12 +769,14 @@ skip(ret != 0, "bpftool not installed")
|
||||
base_progs = progs
|
||||
_, base_maps = bpftool("map")
|
||||
base_map_names = [
|
||||
'pid_iter.rodata' # created on each bpftool invocation
|
||||
'pid_iter.rodata', # created on each bpftool invocation
|
||||
'libbpf_det_bind', # created on each bpftool invocation
|
||||
]
|
||||
|
||||
# Check netdevsim
|
||||
ret, out = cmd("modprobe netdevsim", fail=False)
|
||||
skip(ret != 0, "netdevsim module could not be loaded")
|
||||
if not os.path.isdir("/sys/bus/netdevsim/"):
|
||||
ret, out = cmd("modprobe netdevsim", fail=False)
|
||||
skip(ret != 0, "netdevsim module could not be loaded")
|
||||
|
||||
# Check debugfs
|
||||
_, out = cmd("mount")
|
||||
|
@ -1,4 +1,7 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __TEST_PROGS_H
|
||||
#define __TEST_PROGS_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
@ -210,6 +213,12 @@ int test__join_cgroup(const char *path);
|
||||
#define CHECK_ATTR(condition, tag, format...) \
|
||||
_CHECK(condition, tag, tattr.duration, format)
|
||||
|
||||
#define ASSERT_FAIL(fmt, args...) ({ \
|
||||
static int duration = 0; \
|
||||
CHECK(false, "", fmt"\n", ##args); \
|
||||
false; \
|
||||
})
|
||||
|
||||
#define ASSERT_TRUE(actual, name) ({ \
|
||||
static int duration = 0; \
|
||||
bool ___ok = (actual); \
|
||||
@ -397,3 +406,27 @@ int write_sysctl(const char *sysctl, const char *value);
|
||||
#endif
|
||||
|
||||
#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod"
|
||||
|
||||
struct test_loader {
|
||||
char *log_buf;
|
||||
size_t log_buf_sz;
|
||||
|
||||
struct bpf_object *obj;
|
||||
};
|
||||
|
||||
typedef const void *(*skel_elf_bytes_fn)(size_t *sz);
|
||||
|
||||
extern void test_loader__run_subtests(struct test_loader *tester,
|
||||
const char *skel_name,
|
||||
skel_elf_bytes_fn elf_bytes_factory);
|
||||
|
||||
extern void test_loader_fini(struct test_loader *tester);
|
||||
|
||||
#define RUN_TESTS(skel) ({ \
|
||||
struct test_loader tester = {}; \
|
||||
\
|
||||
test_loader__run_subtests(&tester, #skel, skel##__elf_bytes); \
|
||||
test_loader_fini(&tester); \
|
||||
})
|
||||
|
||||
#endif /* __TEST_PROGS_H */
|
||||
|
@ -1690,24 +1690,42 @@ static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
|
||||
{
|
||||
txmsg_pass = 1;
|
||||
txmsg_redir = 0;
|
||||
txmsg_ingress = 0;
|
||||
txmsg_apply = 1;
|
||||
txmsg_cork = 0;
|
||||
test_send_one(opt, cgrp);
|
||||
|
||||
txmsg_pass = 0;
|
||||
txmsg_redir = 1;
|
||||
txmsg_ingress = 0;
|
||||
txmsg_apply = 1;
|
||||
txmsg_cork = 0;
|
||||
test_send_one(opt, cgrp);
|
||||
|
||||
txmsg_pass = 0;
|
||||
txmsg_redir = 1;
|
||||
txmsg_ingress = 1;
|
||||
txmsg_apply = 1;
|
||||
txmsg_cork = 0;
|
||||
test_send_one(opt, cgrp);
|
||||
|
||||
txmsg_pass = 1;
|
||||
txmsg_redir = 0;
|
||||
txmsg_ingress = 0;
|
||||
txmsg_apply = 1024;
|
||||
txmsg_cork = 0;
|
||||
test_send_large(opt, cgrp);
|
||||
|
||||
txmsg_pass = 0;
|
||||
txmsg_redir = 1;
|
||||
txmsg_ingress = 0;
|
||||
txmsg_apply = 1024;
|
||||
txmsg_cork = 0;
|
||||
test_send_large(opt, cgrp);
|
||||
|
||||
txmsg_pass = 0;
|
||||
txmsg_redir = 1;
|
||||
txmsg_ingress = 1;
|
||||
txmsg_apply = 1024;
|
||||
txmsg_cork = 0;
|
||||
test_send_large(opt, cgrp);
|
||||
|
@ -76,7 +76,7 @@
|
||||
},
|
||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
.result = REJECT,
|
||||
.errstr = "arg#0 expected pointer to ctx, but got PTR",
|
||||
.errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc",
|
||||
.fixup_kfunc_btf_id = {
|
||||
{ "bpf_kfunc_call_test_pass_ctx", 2 },
|
||||
},
|
||||
@ -2305,3 +2305,85 @@
|
||||
.errstr = "!read_ok",
|
||||
.result = REJECT,
|
||||
},
|
||||
/* Make sure that verifier.c:states_equal() considers IDs from all
|
||||
* frames when building 'idmap' for check_ids().
|
||||
*/
|
||||
{
|
||||
"calls: check_ids() across call boundary",
|
||||
.insns = {
|
||||
/* Function main() */
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
/* fp[-24] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1,
|
||||
0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -24),
|
||||
/* fp[-32] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1,
|
||||
0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -32),
|
||||
/* call foo(&fp[-24], &fp[-32]) ; both arguments have IDs in the current
|
||||
* ; stack frame
|
||||
*/
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -24),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
|
||||
BPF_CALL_REL(2),
|
||||
/* exit 0 */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
/* Function foo()
|
||||
*
|
||||
* r9 = &frame[0].fp[-24] ; save arguments in the callee saved registers,
|
||||
* r8 = &frame[0].fp[-32] ; arguments are pointers to pointers to map value
|
||||
*/
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
|
||||
BPF_MOV64_REG(BPF_REG_8, BPF_REG_2),
|
||||
/* r7 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
/* r6 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
|
||||
/* if r6 > r7 goto +1 ; no new information about the state is derived from
|
||||
* ; this check, thus produced verifier states differ
|
||||
* ; only in 'insn_idx'
|
||||
* r9 = r8
|
||||
*/
|
||||
BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
|
||||
/* r9 = *r9 ; verifier get's to this point via two paths:
|
||||
* ; (I) one including r9 = r8, verified first;
|
||||
* ; (II) one excluding r9 = r8, verified next.
|
||||
* ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
|
||||
* ; Suppose that checkpoint is created here via path (I).
|
||||
* ; When verifying via (II) the r9.id must be compared against
|
||||
* ; frame[0].fp[-24].id, otherwise (I) and (II) would be
|
||||
* ; incorrectly deemed equivalent.
|
||||
* if r9 == 0 goto <exit>
|
||||
*/
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_9, 0),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
|
||||
/* r8 = *r8 ; read map value via r8, this is not safe
|
||||
* r0 = *r8 ; because r8 might be not equal to r9.
|
||||
*/
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_8, 0),
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
|
||||
/* exit 0 */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.flags = BPF_F_TEST_STATE_FREQ,
|
||||
.fixup_map_hash_8b = { 3, 9 },
|
||||
.result = REJECT,
|
||||
.errstr = "R8 invalid mem access 'map_value_or_null'",
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "",
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
|
||||
},
|
||||
|
@ -654,3 +654,57 @@
|
||||
.result = ACCEPT,
|
||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
},
|
||||
{
|
||||
"direct packet access: test30 (check_id() in regsafe(), bad access)",
|
||||
.insns = {
|
||||
/* r9 = ctx */
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
|
||||
/* r7 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
/* r6 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
|
||||
/* r2 = ctx->data
|
||||
* r3 = ctx->data
|
||||
* r4 = ctx->data_end
|
||||
*/
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_9, offsetof(struct __sk_buff, data)),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_9, offsetof(struct __sk_buff, data)),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_9, offsetof(struct __sk_buff, data_end)),
|
||||
/* if r6 > 100 goto exit
|
||||
* if r7 > 100 goto exit
|
||||
*/
|
||||
BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 100, 9),
|
||||
BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 100, 8),
|
||||
/* r2 += r6 ; this forces assignment of ID to r2
|
||||
* r2 += 1 ; get some fixed off for r2
|
||||
* r3 += r7 ; this forces assignment of ID to r3
|
||||
* r3 += 1 ; get some fixed off for r3
|
||||
*/
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_6),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
|
||||
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_7),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 1),
|
||||
/* if r6 > r7 goto +1 ; no new information about the state is derived from
|
||||
* ; this check, thus produced verifier states differ
|
||||
* ; only in 'insn_idx'
|
||||
* r2 = r3 ; optionally share ID between r2 and r3
|
||||
*/
|
||||
BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_7, 1),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
|
||||
/* if r3 > ctx->data_end goto exit */
|
||||
BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 1),
|
||||
/* r5 = *(u8 *) (r2 - 1) ; access packet memory using r2,
|
||||
* ; this is not always safe
|
||||
*/
|
||||
BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, -1),
|
||||
/* exit(0) */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.flags = BPF_F_TEST_STATE_FREQ,
|
||||
.result = REJECT,
|
||||
.errstr = "invalid access to packet, off=0 size=1, R2",
|
||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
},
|
||||
|
@ -9,7 +9,7 @@
|
||||
},
|
||||
.fixup_map_array_48b = { 1 },
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.result = REJECT,
|
||||
.errstr = "R1 is bpf_array invalid negative access: off=-8",
|
||||
},
|
||||
@ -26,7 +26,7 @@
|
||||
},
|
||||
.fixup_map_array_48b = { 3 },
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.result = REJECT,
|
||||
.errstr = "only read from bpf_array is supported",
|
||||
},
|
||||
@ -41,7 +41,7 @@
|
||||
},
|
||||
.fixup_map_array_48b = { 1 },
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.result = REJECT,
|
||||
.errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
|
||||
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
|
||||
@ -57,7 +57,7 @@
|
||||
},
|
||||
.fixup_map_array_48b = { 1 },
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.errstr_unpriv = "access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
|
||||
.result = ACCEPT,
|
||||
.retval = 1,
|
||||
},
|
||||
|
@ -28,7 +28,7 @@
|
||||
},
|
||||
.fixup_map_ringbuf = { 1 },
|
||||
.result = REJECT,
|
||||
.errstr = "dereference of modified ringbuf_mem ptr R1",
|
||||
.errstr = "R1 must have zero offset when passed to release func",
|
||||
},
|
||||
{
|
||||
"ringbuf: invalid reservation offset 2",
|
||||
|
@ -331,3 +331,117 @@
|
||||
.errstr = "inside bpf_spin_lock",
|
||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
},
|
||||
{
|
||||
"spin_lock: regsafe compare reg->id for map value",
|
||||
.insns = {
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
|
||||
BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_6, offsetof(struct __sk_buff, mark)),
|
||||
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
|
||||
BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
|
||||
BPF_EXIT_INSN(),
|
||||
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1),
|
||||
BPF_JMP_IMM(BPF_JA, 0, 0, 1),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
|
||||
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_spin_lock = { 2 },
|
||||
.result = REJECT,
|
||||
.errstr = "bpf_spin_unlock of different lock",
|
||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
.flags = BPF_F_TEST_STATE_FREQ,
|
||||
},
|
||||
/* Make sure that regsafe() compares ids for spin lock records using
|
||||
* check_ids():
|
||||
* 1: r9 = map_lookup_elem(...) ; r9.id == 1
|
||||
* 2: r8 = map_lookup_elem(...) ; r8.id == 2
|
||||
* 3: r7 = ktime_get_ns()
|
||||
* 4: r6 = ktime_get_ns()
|
||||
* 5: if r6 > r7 goto <9>
|
||||
* 6: spin_lock(r8)
|
||||
* 7: r9 = r8
|
||||
* 8: goto <10>
|
||||
* 9: spin_lock(r9)
|
||||
* 10: spin_unlock(r9) ; r9.id == 1 || r9.id == 2 and lock is active,
|
||||
* ; second visit to (10) should be considered safe
|
||||
* ; if check_ids() is used.
|
||||
* 11: exit(0)
|
||||
*/
|
||||
{
|
||||
"spin_lock: regsafe() check_ids() similar id mappings",
|
||||
.insns = {
|
||||
BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
|
||||
/* r9 = map_lookup_elem(...) */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
|
||||
BPF_LD_MAP_FD(BPF_REG_1,
|
||||
0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 24),
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
|
||||
/* r8 = map_lookup_elem(...) */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
|
||||
BPF_LD_MAP_FD(BPF_REG_1,
|
||||
0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 18),
|
||||
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
|
||||
/* r7 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
/* r6 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
|
||||
/* if r6 > r7 goto +5 ; no new information about the state is derived from
|
||||
* ; this check, thus produced verifier states differ
|
||||
* ; only in 'insn_idx'
|
||||
* spin_lock(r8)
|
||||
* r9 = r8
|
||||
* goto unlock
|
||||
*/
|
||||
BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 5),
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
|
||||
BPF_EMIT_CALL(BPF_FUNC_spin_lock),
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
|
||||
BPF_JMP_A(3),
|
||||
/* spin_lock(r9) */
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
|
||||
BPF_EMIT_CALL(BPF_FUNC_spin_lock),
|
||||
/* spin_unlock(r9) */
|
||||
BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
|
||||
BPF_EMIT_CALL(BPF_FUNC_spin_unlock),
|
||||
/* exit(0) */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.fixup_map_spin_lock = { 3, 10 },
|
||||
.result = VERBOSE_ACCEPT,
|
||||
.errstr = "28: safe",
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "",
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
|
||||
.flags = BPF_F_TEST_STATE_FREQ,
|
||||
},
|
||||
|
@ -169,3 +169,52 @@
|
||||
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
|
||||
.result = ACCEPT,
|
||||
},
|
||||
{
|
||||
"MAP_VALUE_OR_NULL check_ids() in regsafe()",
|
||||
.insns = {
|
||||
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
|
||||
/* r9 = map_lookup_elem(...) */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1,
|
||||
0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
|
||||
/* r8 = map_lookup_elem(...) */
|
||||
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||
BPF_LD_MAP_FD(BPF_REG_1,
|
||||
0),
|
||||
BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
|
||||
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
|
||||
/* r7 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
|
||||
/* r6 = ktime_get_ns() */
|
||||
BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
|
||||
BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
|
||||
/* if r6 > r7 goto +1 ; no new information about the state is derived from
|
||||
* ; this check, thus produced verifier states differ
|
||||
* ; only in 'insn_idx'
|
||||
* r9 = r8 ; optionally share ID between r9 and r8
|
||||
*/
|
||||
BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
|
||||
BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
|
||||
/* if r9 == 0 goto <exit> */
|
||||
BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
|
||||
/* read map value via r8, this is not always
|
||||
* safe because r8 might be not equal to r9.
|
||||
*/
|
||||
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
|
||||
/* exit 0 */
|
||||
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||
BPF_EXIT_INSN(),
|
||||
},
|
||||
.flags = BPF_F_TEST_STATE_FREQ,
|
||||
.fixup_map_hash_8b = { 3, 9 },
|
||||
.result = REJECT,
|
||||
.errstr = "R8 invalid mem access 'map_value_or_null'",
|
||||
.result_unpriv = REJECT,
|
||||
.errstr_unpriv = "",
|
||||
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
|
||||
},
|
||||
|
Loading…
x
Reference in New Issue
Block a user