bpf-next-for-netdev
-----BEGIN PGP SIGNATURE----- iHUEABYIAB0WIQTFp0I1jqZrAX+hPRXbK58LschIgwUCY9RqJgAKCRDbK58LschI gw2IAP9G5uhFO5abBzYLupp6SY3T5j97MUvPwLfFqUEt7EXmuwEA2lCUEWeW0KtR QX+QmzCa6iHxrW7WzP4DUYLue//FJQY= =yYqA -----END PGP SIGNATURE----- Merge tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next Daniel Borkmann says: ==================== bpf-next 2023-01-28 We've added 124 non-merge commits during the last 22 day(s) which contain a total of 124 files changed, 6386 insertions(+), 1827 deletions(-). The main changes are: 1) Implement XDP hints via kfuncs with initial support for RX hash and timestamp metadata kfuncs, from Stanislav Fomichev and Toke Høiland-Jørgensen. Measurements on overhead: https://lore.kernel.org/bpf/875yellcx6.fsf@toke.dk 2) Extend libbpf's bpf_tracing.h support for tracing arguments of kprobes/uprobes and syscall as a special case, from Andrii Nakryiko. 3) Significantly reduce the search time for module symbols by livepatch and BPF, from Jiri Olsa and Zhen Lei. 4) Enable cpumasks to be used as kptrs, which is useful for tracing programs tracking which tasks end up running on which CPUs in different time intervals, from David Vernet. 5) Fix several issues in the dynptr processing such as stack slot liveness propagation, missing checks for PTR_TO_STACK variable offset, etc, from Kumar Kartikeya Dwivedi. 6) Various performance improvements, fixes, and introduction of more than just one XDP program to XSK selftests, from Magnus Karlsson. 7) Big batch to BPF samples to reduce deprecated functionality, from Daniel T. Lee. 8) Enable struct_ops programs to be sleepable in verifier, from David Vernet. 9) Reduce pr_warn() noise on BTF mismatches when they are expected under the CONFIG_MODULE_ALLOW_BTF_MISMATCH config anyway, from Connor O'Brien. 10) Describe modulo and division by zero behavior of the BPF runtime in BPF's instruction specification document, from Dave Thaler. 11) Several improvements to libbpf API documentation in libbpf.h, from Grant Seltzer. 12) Improve resolve_btfids header dependencies related to subcmd and add proper support for HOSTCC, from Ian Rogers. 13) Add ipip6 and ip6ip decapsulation support for bpf_skb_adjust_room() helper along with BPF selftests, from Ziyang Xuan. 14) Simplify the parsing logic of structure parameters for BPF trampoline in the x86-64 JIT compiler, from Pu Lehui. 15) Get BTF working for kernels with CONFIG_RUST enabled by excluding Rust compilation units with pahole, from Martin Rodriguez Reboredo. 16) Get bpf_setsockopt() working for kTLS on top of TCP sockets, from Kui-Feng Lee. 17) Disable stack protection for BPF objects in bpftool given BPF backends don't support it, from Holger Hoffstätte. * tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (124 commits) selftest/bpf: Make crashes more debuggable in test_progs libbpf: Add documentation to map pinning API functions libbpf: Fix malformed documentation formatting selftests/bpf: Properly enable hwtstamp in xdp_hw_metadata selftests/bpf: Calls bpf_setsockopt() on a ktls enabled socket. bpf: Check the protocol of a sock to agree the calls to bpf_setsockopt(). bpf/selftests: Verify struct_ops prog sleepable behavior bpf: Pass const struct bpf_prog * to .check_member libbpf: Support sleepable struct_ops.s section bpf: Allow BPF_PROG_TYPE_STRUCT_OPS programs to be sleepable selftests/bpf: Fix vmtest static compilation error tools/resolve_btfids: Alter how HOSTCC is forced tools/resolve_btfids: Install subcmd headers bpf/docs: Document the nocast aliasing behavior of ___init bpf/docs: Document how nested trusted fields may be defined bpf/docs: Document cpumask kfuncs in a new file selftests/bpf: Add selftest suite for cpumask kfuncs selftests/bpf: Add nested trust selftests suite bpf: Enable cpumasks to be queried and used as kptrs bpf: Disallow NULLable pointers for trusted kfuncs ... ==================== Link: https://lore.kernel.org/r/20230128004827.21371-1-daniel@iogearbox.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
2d104c390f
393
Documentation/bpf/cpumasks.rst
Normal file
393
Documentation/bpf/cpumasks.rst
Normal file
@ -0,0 +1,393 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
.. _cpumasks-header-label:
|
||||
|
||||
==================
|
||||
BPF cpumask kfuncs
|
||||
==================
|
||||
|
||||
1. Introduction
|
||||
===============
|
||||
|
||||
``struct cpumask`` is a bitmap data structure in the kernel whose indices
|
||||
reflect the CPUs on the system. Commonly, cpumasks are used to track which CPUs
|
||||
a task is affinitized to, but they can also be used to e.g. track which cores
|
||||
are associated with a scheduling domain, which cores on a machine are idle,
|
||||
etc.
|
||||
|
||||
BPF provides programs with a set of :ref:`kfuncs-header-label` that can be
|
||||
used to allocate, mutate, query, and free cpumasks.
|
||||
|
||||
2. BPF cpumask objects
|
||||
======================
|
||||
|
||||
There are two different types of cpumasks that can be used by BPF programs.
|
||||
|
||||
2.1 ``struct bpf_cpumask *``
|
||||
----------------------------
|
||||
|
||||
``struct bpf_cpumask *`` is a cpumask that is allocated by BPF, on behalf of a
|
||||
BPF program, and whose lifecycle is entirely controlled by BPF. These cpumasks
|
||||
are RCU-protected, can be mutated, can be used as kptrs, and can be safely cast
|
||||
to a ``struct cpumask *``.
|
||||
|
||||
2.1.1 ``struct bpf_cpumask *`` lifecycle
|
||||
----------------------------------------
|
||||
|
||||
A ``struct bpf_cpumask *`` is allocated, acquired, and released, using the
|
||||
following functions:
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_create
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_acquire
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_release
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct cpumask_map_value {
|
||||
struct bpf_cpumask __kptr_ref * cpumask;
|
||||
};
|
||||
|
||||
struct array_map {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct cpumask_map_value);
|
||||
__uint(max_entries, 65536);
|
||||
} cpumask_map SEC(".maps");
|
||||
|
||||
static int cpumask_map_insert(struct bpf_cpumask *mask, u32 pid)
|
||||
{
|
||||
struct cpumask_map_value local, *v;
|
||||
long status;
|
||||
struct bpf_cpumask *old;
|
||||
u32 key = pid;
|
||||
|
||||
local.cpumask = NULL;
|
||||
status = bpf_map_update_elem(&cpumask_map, &key, &local, 0);
|
||||
if (status) {
|
||||
bpf_cpumask_release(mask);
|
||||
return status;
|
||||
}
|
||||
|
||||
v = bpf_map_lookup_elem(&cpumask_map, &key);
|
||||
if (!v) {
|
||||
bpf_cpumask_release(mask);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
old = bpf_kptr_xchg(&v->cpumask, mask);
|
||||
if (old)
|
||||
bpf_cpumask_release(old);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* A sample tracepoint showing how a task's cpumask can be queried and
|
||||
* recorded as a kptr.
|
||||
*/
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(record_task_cpumask, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
int ret;
|
||||
|
||||
cpumask = bpf_cpumask_create();
|
||||
if (!cpumask)
|
||||
return -ENOMEM;
|
||||
|
||||
if (!bpf_cpumask_full(task->cpus_ptr))
|
||||
bpf_printk("task %s has CPU affinity", task->comm);
|
||||
|
||||
bpf_cpumask_copy(cpumask, task->cpus_ptr);
|
||||
return cpumask_map_insert(cpumask, task->pid);
|
||||
}
|
||||
|
||||
----
|
||||
|
||||
2.1.1 ``struct bpf_cpumask *`` as kptrs
|
||||
---------------------------------------
|
||||
|
||||
As mentioned and illustrated above, these ``struct bpf_cpumask *`` objects can
|
||||
also be stored in a map and used as kptrs. If a ``struct bpf_cpumask *`` is in
|
||||
a map, the reference can be removed from the map with bpf_kptr_xchg(), or
|
||||
opportunistically acquired with bpf_cpumask_kptr_get():
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_kptr_get
|
||||
|
||||
Here is an example of a ``struct bpf_cpumask *`` being retrieved from a map:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/* struct containing the struct bpf_cpumask kptr which is stored in the map. */
|
||||
struct cpumasks_kfunc_map_value {
|
||||
struct bpf_cpumask __kptr_ref * bpf_cpumask;
|
||||
};
|
||||
|
||||
/* The map containing struct cpumasks_kfunc_map_value entries. */
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct cpumasks_kfunc_map_value);
|
||||
__uint(max_entries, 1);
|
||||
} cpumasks_kfunc_map SEC(".maps");
|
||||
|
||||
/* ... */
|
||||
|
||||
/**
|
||||
* A simple example tracepoint program showing how a
|
||||
* struct bpf_cpumask * kptr that is stored in a map can
|
||||
* be acquired using the bpf_cpumask_kptr_get() kfunc.
|
||||
*/
|
||||
SEC("tp_btf/cgroup_mkdir")
|
||||
int BPF_PROG(cgrp_ancestor_example, struct cgroup *cgrp, const char *path)
|
||||
{
|
||||
struct bpf_cpumask *kptr;
|
||||
struct cpumasks_kfunc_map_value *v;
|
||||
u32 key = 0;
|
||||
|
||||
/* Assume a bpf_cpumask * kptr was previously stored in the map. */
|
||||
v = bpf_map_lookup_elem(&cpumasks_kfunc_map, &key);
|
||||
if (!v)
|
||||
return -ENOENT;
|
||||
|
||||
/* Acquire a reference to the bpf_cpumask * kptr that's already stored in the map. */
|
||||
kptr = bpf_cpumask_kptr_get(&v->cpumask);
|
||||
if (!kptr)
|
||||
/* If no bpf_cpumask was present in the map, it's because
|
||||
* we're racing with another CPU that removed it with
|
||||
* bpf_kptr_xchg() between the bpf_map_lookup_elem()
|
||||
* above, and our call to bpf_cpumask_kptr_get().
|
||||
* bpf_cpumask_kptr_get() internally safely handles this
|
||||
* race, and will return NULL if the cpumask is no longer
|
||||
* present in the map by the time we invoke the kfunc.
|
||||
*/
|
||||
return -EBUSY;
|
||||
|
||||
/* Free the reference we just took above. Note that the
|
||||
* original struct bpf_cpumask * kptr is still in the map. It will
|
||||
* be freed either at a later time if another context deletes
|
||||
* it from the map, or automatically by the BPF subsystem if
|
||||
* it's still present when the map is destroyed.
|
||||
*/
|
||||
bpf_cpumask_release(kptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
----
|
||||
|
||||
2.2 ``struct cpumask``
|
||||
----------------------
|
||||
|
||||
``struct cpumask`` is the object that actually contains the cpumask bitmap
|
||||
being queried, mutated, etc. A ``struct bpf_cpumask`` wraps a ``struct
|
||||
cpumask``, which is why it's safe to cast it as such (note however that it is
|
||||
**not** safe to cast a ``struct cpumask *`` to a ``struct bpf_cpumask *``, and
|
||||
the verifier will reject any program that tries to do so).
|
||||
|
||||
As we'll see below, any kfunc that mutates its cpumask argument will take a
|
||||
``struct bpf_cpumask *`` as that argument. Any argument that simply queries the
|
||||
cpumask will instead take a ``struct cpumask *``.
|
||||
|
||||
3. cpumask kfuncs
|
||||
=================
|
||||
|
||||
Above, we described the kfuncs that can be used to allocate, acquire, release,
|
||||
etc a ``struct bpf_cpumask *``. This section of the document will describe the
|
||||
kfuncs for mutating and querying cpumasks.
|
||||
|
||||
3.1 Mutating cpumasks
|
||||
---------------------
|
||||
|
||||
Some cpumask kfuncs are "read-only" in that they don't mutate any of their
|
||||
arguments, whereas others mutate at least one argument (which means that the
|
||||
argument must be a ``struct bpf_cpumask *``, as described above).
|
||||
|
||||
This section will describe all of the cpumask kfuncs which mutate at least one
|
||||
argument. :ref:`cpumasks-querying-label` below describes the read-only kfuncs.
|
||||
|
||||
3.1.1 Setting and clearing CPUs
|
||||
-------------------------------
|
||||
|
||||
bpf_cpumask_set_cpu() and bpf_cpumask_clear_cpu() can be used to set and clear
|
||||
a CPU in a ``struct bpf_cpumask`` respectively:
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_set_cpu bpf_cpumask_clear_cpu
|
||||
|
||||
These kfuncs are pretty straightforward, and can be used, for example, as
|
||||
follows:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/**
|
||||
* A sample tracepoint showing how a cpumask can be queried.
|
||||
*/
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_set_clear_cpu, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
cpumask = bpf_cpumask_create();
|
||||
if (!cpumask)
|
||||
return -ENOMEM;
|
||||
|
||||
bpf_cpumask_set_cpu(0, cpumask);
|
||||
if (!bpf_cpumask_test_cpu(0, cast(cpumask)))
|
||||
/* Should never happen. */
|
||||
goto release_exit;
|
||||
|
||||
bpf_cpumask_clear_cpu(0, cpumask);
|
||||
if (bpf_cpumask_test_cpu(0, cast(cpumask)))
|
||||
/* Should never happen. */
|
||||
goto release_exit;
|
||||
|
||||
/* struct cpumask * pointers such as task->cpus_ptr can also be queried. */
|
||||
if (bpf_cpumask_test_cpu(0, task->cpus_ptr))
|
||||
bpf_printk("task %s can use CPU %d", task->comm, 0);
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
----
|
||||
|
||||
bpf_cpumask_test_and_set_cpu() and bpf_cpumask_test_and_clear_cpu() are
|
||||
complementary kfuncs that allow callers to atomically test and set (or clear)
|
||||
CPUs:
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_test_and_set_cpu bpf_cpumask_test_and_clear_cpu
|
||||
|
||||
----
|
||||
|
||||
We can also set and clear entire ``struct bpf_cpumask *`` objects in one
|
||||
operation using bpf_cpumask_setall() and bpf_cpumask_clear():
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_setall bpf_cpumask_clear
|
||||
|
||||
3.1.2 Operations between cpumasks
|
||||
---------------------------------
|
||||
|
||||
In addition to setting and clearing individual CPUs in a single cpumask,
|
||||
callers can also perform bitwise operations between multiple cpumasks using
|
||||
bpf_cpumask_and(), bpf_cpumask_or(), and bpf_cpumask_xor():
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_and bpf_cpumask_or bpf_cpumask_xor
|
||||
|
||||
The following is an example of how they may be used. Note that some of the
|
||||
kfuncs shown in this example will be covered in more detail below.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
/**
|
||||
* A sample tracepoint showing how a cpumask can be mutated using
|
||||
bitwise operators (and queried).
|
||||
*/
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_and_or_xor, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
|
||||
|
||||
mask1 = bpf_cpumask_create();
|
||||
if (!mask1)
|
||||
return -ENOMEM;
|
||||
|
||||
mask2 = bpf_cpumask_create();
|
||||
if (!mask2) {
|
||||
bpf_cpumask_release(mask1);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
// ...Safely create the other two masks... */
|
||||
|
||||
bpf_cpumask_set_cpu(0, mask1);
|
||||
bpf_cpumask_set_cpu(1, mask2);
|
||||
bpf_cpumask_and(dst1, (const struct cpumask *)mask1, (const struct cpumask *)mask2);
|
||||
if (!bpf_cpumask_empty((const struct cpumask *)dst1))
|
||||
/* Should never happen. */
|
||||
goto release_exit;
|
||||
|
||||
bpf_cpumask_or(dst1, (const struct cpumask *)mask1, (const struct cpumask *)mask2);
|
||||
if (!bpf_cpumask_test_cpu(0, (const struct cpumask *)dst1))
|
||||
/* Should never happen. */
|
||||
goto release_exit;
|
||||
|
||||
if (!bpf_cpumask_test_cpu(1, (const struct cpumask *)dst1))
|
||||
/* Should never happen. */
|
||||
goto release_exit;
|
||||
|
||||
bpf_cpumask_xor(dst2, (const struct cpumask *)mask1, (const struct cpumask *)mask2);
|
||||
if (!bpf_cpumask_equal((const struct cpumask *)dst1,
|
||||
(const struct cpumask *)dst2))
|
||||
/* Should never happen. */
|
||||
goto release_exit;
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(mask1);
|
||||
bpf_cpumask_release(mask2);
|
||||
bpf_cpumask_release(dst1);
|
||||
bpf_cpumask_release(dst2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
----
|
||||
|
||||
The contents of an entire cpumask may be copied to another using
|
||||
bpf_cpumask_copy():
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_copy
|
||||
|
||||
----
|
||||
|
||||
.. _cpumasks-querying-label:
|
||||
|
||||
3.2 Querying cpumasks
|
||||
---------------------
|
||||
|
||||
In addition to the above kfuncs, there is also a set of read-only kfuncs that
|
||||
can be used to query the contents of cpumasks.
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_test_cpu
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset
|
||||
bpf_cpumask_empty bpf_cpumask_full
|
||||
|
||||
.. kernel-doc:: kernel/bpf/cpumask.c
|
||||
:identifiers: bpf_cpumask_any bpf_cpumask_any_and
|
||||
|
||||
----
|
||||
|
||||
Some example usages of these querying kfuncs were shown above. We will not
|
||||
replicate those exmaples here. Note, however, that all of the aforementioned
|
||||
kfuncs are tested in `tools/testing/selftests/bpf/progs/cpumask_success.c`_, so
|
||||
please take a look there if you're looking for more examples of how they can be
|
||||
used.
|
||||
|
||||
.. _tools/testing/selftests/bpf/progs/cpumask_success.c:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/tools/testing/selftests/bpf/progs/cpumask_success.c
|
||||
|
||||
|
||||
4. Adding BPF cpumask kfuncs
|
||||
============================
|
||||
|
||||
The set of supported BPF cpumask kfuncs are not (yet) a 1-1 match with the
|
||||
cpumask operations in include/linux/cpumask.h. Any of those cpumask operations
|
||||
could easily be encapsulated in a new kfunc if and when required. If you'd like
|
||||
to support a new cpumask operation, please feel free to submit a patch. If you
|
||||
do add a new cpumask kfunc, please document it here, and add any relevant
|
||||
selftest testcases to the cpumask selftest suite.
|
@ -20,6 +20,7 @@ that goes into great technical depth about the BPF Architecture.
|
||||
syscall_api
|
||||
helpers
|
||||
kfuncs
|
||||
cpumasks
|
||||
programs
|
||||
maps
|
||||
bpf_prog_run
|
||||
|
@ -99,19 +99,26 @@ code value description
|
||||
BPF_ADD 0x00 dst += src
|
||||
BPF_SUB 0x10 dst -= src
|
||||
BPF_MUL 0x20 dst \*= src
|
||||
BPF_DIV 0x30 dst /= src
|
||||
BPF_DIV 0x30 dst = (src != 0) ? (dst / src) : 0
|
||||
BPF_OR 0x40 dst \|= src
|
||||
BPF_AND 0x50 dst &= src
|
||||
BPF_LSH 0x60 dst <<= src
|
||||
BPF_RSH 0x70 dst >>= src
|
||||
BPF_NEG 0x80 dst = ~src
|
||||
BPF_MOD 0x90 dst %= src
|
||||
BPF_MOD 0x90 dst = (src != 0) ? (dst % src) : dst
|
||||
BPF_XOR 0xa0 dst ^= src
|
||||
BPF_MOV 0xb0 dst = src
|
||||
BPF_ARSH 0xc0 sign extending shift right
|
||||
BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below)
|
||||
======== ===== ==========================================================
|
||||
|
||||
Underflow and overflow are allowed during arithmetic operations, meaning
|
||||
the 64-bit or 32-bit value will wrap. If eBPF program execution would
|
||||
result in division by zero, the destination register is instead set to zero.
|
||||
If execution would result in modulo by zero, for ``BPF_ALU64`` the value of
|
||||
the destination register is unchanged whereas for ``BPF_ALU`` the upper
|
||||
32 bits of the destination register are zeroed.
|
||||
|
||||
``BPF_ADD | BPF_X | BPF_ALU`` means::
|
||||
|
||||
dst_reg = (u32) dst_reg + (u32) src_reg;
|
||||
@ -128,6 +135,11 @@ BPF_END 0xd0 byte swap operations (see `Byte swap instructions`_ below)
|
||||
|
||||
dst_reg = dst_reg ^ imm32
|
||||
|
||||
Also note that the division and modulo operations are unsigned. Thus, for
|
||||
``BPF_ALU``, 'imm' is first interpreted as an unsigned 32-bit value, whereas
|
||||
for ``BPF_ALU64``, 'imm' is first sign extended to 64 bits and the result
|
||||
interpreted as an unsigned 64-bit value. There are no instructions for
|
||||
signed division or modulo.
|
||||
|
||||
Byte swap instructions
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -1,3 +1,7 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
.. _kfuncs-header-label:
|
||||
|
||||
=============================
|
||||
BPF Kernel Functions (kfuncs)
|
||||
=============================
|
||||
@ -163,7 +167,8 @@ KF_ACQUIRE and KF_RET_NULL flags.
|
||||
The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
|
||||
indicates that the all pointer arguments are valid, and that all pointers to
|
||||
BTF objects have been passed in their unmodified form (that is, at a zero
|
||||
offset, and without having been obtained from walking another pointer).
|
||||
offset, and without having been obtained from walking another pointer, with one
|
||||
exception described below).
|
||||
|
||||
There are two types of pointers to kernel objects which are considered "valid":
|
||||
|
||||
@ -176,6 +181,25 @@ KF_TRUSTED_ARGS kfuncs, and may have a non-zero offset.
|
||||
The definition of "valid" pointers is subject to change at any time, and has
|
||||
absolutely no ABI stability guarantees.
|
||||
|
||||
As mentioned above, a nested pointer obtained from walking a trusted pointer is
|
||||
no longer trusted, with one exception. If a struct type has a field that is
|
||||
guaranteed to be valid as long as its parent pointer is trusted, the
|
||||
``BTF_TYPE_SAFE_NESTED`` macro can be used to express that to the verifier as
|
||||
follows:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
BTF_TYPE_SAFE_NESTED(struct task_struct) {
|
||||
const cpumask_t *cpus_ptr;
|
||||
};
|
||||
|
||||
In other words, you must:
|
||||
|
||||
1. Wrap the trusted pointer type in the ``BTF_TYPE_SAFE_NESTED`` macro.
|
||||
|
||||
2. Specify the type and name of the trusted nested field. This field must match
|
||||
the field in the original type definition exactly.
|
||||
|
||||
2.4.6 KF_SLEEPABLE flag
|
||||
-----------------------
|
||||
|
||||
@ -223,6 +247,49 @@ type. An example is shown below::
|
||||
}
|
||||
late_initcall(init_subsystem);
|
||||
|
||||
2.6 Specifying no-cast aliases with ___init
|
||||
--------------------------------------------
|
||||
|
||||
The verifier will always enforce that the BTF type of a pointer passed to a
|
||||
kfunc by a BPF program, matches the type of pointer specified in the kfunc
|
||||
definition. The verifier, does, however, allow types that are equivalent
|
||||
according to the C standard to be passed to the same kfunc arg, even if their
|
||||
BTF_IDs differ.
|
||||
|
||||
For example, for the following type definition:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct bpf_cpumask {
|
||||
cpumask_t cpumask;
|
||||
refcount_t usage;
|
||||
};
|
||||
|
||||
The verifier would allow a ``struct bpf_cpumask *`` to be passed to a kfunc
|
||||
taking a ``cpumask_t *`` (which is a typedef of ``struct cpumask *``). For
|
||||
instance, both ``struct cpumask *`` and ``struct bpf_cpmuask *`` can be passed
|
||||
to bpf_cpumask_test_cpu().
|
||||
|
||||
In some cases, this type-aliasing behavior is not desired. ``struct
|
||||
nf_conn___init`` is one such example:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
struct nf_conn___init {
|
||||
struct nf_conn ct;
|
||||
};
|
||||
|
||||
The C standard would consider these types to be equivalent, but it would not
|
||||
always be safe to pass either type to a trusted kfunc. ``struct
|
||||
nf_conn___init`` represents an allocated ``struct nf_conn`` object that has
|
||||
*not yet been initialized*, so it would therefore be unsafe to pass a ``struct
|
||||
nf_conn___init *`` to a kfunc that's expecting a fully initialized ``struct
|
||||
nf_conn *`` (e.g. ``bpf_ct_change_timeout()``).
|
||||
|
||||
In order to accommodate such requirements, the verifier will enforce strict
|
||||
PTR_TO_BTF_ID type matching if two types have the exact same name, with one
|
||||
being suffixed with ``___init``.
|
||||
|
||||
3. Core kfuncs
|
||||
==============
|
||||
|
||||
@ -420,3 +487,10 @@ the verifier. bpf_cgroup_ancestor() can be used as follows:
|
||||
bpf_cgroup_release(parent);
|
||||
return 0;
|
||||
}
|
||||
|
||||
3.3 struct cpumask * kfuncs
|
||||
---------------------------
|
||||
|
||||
BPF provides a set of kfuncs that can be used to query, allocate, mutate, and
|
||||
destroy struct cpumask * objects. Please refer to :ref:`cpumasks-header-label`
|
||||
for more details.
|
||||
|
@ -120,6 +120,7 @@ Contents:
|
||||
xfrm_proc
|
||||
xfrm_sync
|
||||
xfrm_sysctl
|
||||
xdp-rx-metadata
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
110
Documentation/networking/xdp-rx-metadata.rst
Normal file
110
Documentation/networking/xdp-rx-metadata.rst
Normal file
@ -0,0 +1,110 @@
|
||||
===============
|
||||
XDP RX Metadata
|
||||
===============
|
||||
|
||||
This document describes how an eXpress Data Path (XDP) program can access
|
||||
hardware metadata related to a packet using a set of helper functions,
|
||||
and how it can pass that metadata on to other consumers.
|
||||
|
||||
General Design
|
||||
==============
|
||||
|
||||
XDP has access to a set of kfuncs to manipulate the metadata in an XDP frame.
|
||||
Every device driver that wishes to expose additional packet metadata can
|
||||
implement these kfuncs. The set of kfuncs is declared in ``include/net/xdp.h``
|
||||
via ``XDP_METADATA_KFUNC_xxx``.
|
||||
|
||||
Currently, the following kfuncs are supported. In the future, as more
|
||||
metadata is supported, this set will grow:
|
||||
|
||||
.. kernel-doc:: net/core/xdp.c
|
||||
:identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash
|
||||
|
||||
An XDP program can use these kfuncs to read the metadata into stack
|
||||
variables for its own consumption. Or, to pass the metadata on to other
|
||||
consumers, an XDP program can store it into the metadata area carried
|
||||
ahead of the packet.
|
||||
|
||||
Not all kfuncs have to be implemented by the device driver; when not
|
||||
implemented, the default ones that return ``-EOPNOTSUPP`` will be used.
|
||||
|
||||
Within an XDP frame, the metadata layout (accessed via ``xdp_buff``) is
|
||||
as follows::
|
||||
|
||||
+----------+-----------------+------+
|
||||
| headroom | custom metadata | data |
|
||||
+----------+-----------------+------+
|
||||
^ ^
|
||||
| |
|
||||
xdp_buff->data_meta xdp_buff->data
|
||||
|
||||
An XDP program can store individual metadata items into this ``data_meta``
|
||||
area in whichever format it chooses. Later consumers of the metadata
|
||||
will have to agree on the format by some out of band contract (like for
|
||||
the AF_XDP use case, see below).
|
||||
|
||||
AF_XDP
|
||||
======
|
||||
|
||||
:doc:`af_xdp` use-case implies that there is a contract between the BPF
|
||||
program that redirects XDP frames into the ``AF_XDP`` socket (``XSK``) and
|
||||
the final consumer. Thus the BPF program manually allocates a fixed number of
|
||||
bytes out of metadata via ``bpf_xdp_adjust_meta`` and calls a subset
|
||||
of kfuncs to populate it. The userspace ``XSK`` consumer computes
|
||||
``xsk_umem__get_data() - METADATA_SIZE`` to locate that metadata.
|
||||
Note, ``xsk_umem__get_data`` is defined in ``libxdp`` and
|
||||
``METADATA_SIZE`` is an application-specific constant (``AF_XDP`` receive
|
||||
descriptor does _not_ explicitly carry the size of the metadata).
|
||||
|
||||
Here is the ``AF_XDP`` consumer layout (note missing ``data_meta`` pointer)::
|
||||
|
||||
+----------+-----------------+------+
|
||||
| headroom | custom metadata | data |
|
||||
+----------+-----------------+------+
|
||||
^
|
||||
|
|
||||
rx_desc->address
|
||||
|
||||
XDP_PASS
|
||||
========
|
||||
|
||||
This is the path where the packets processed by the XDP program are passed
|
||||
into the kernel. The kernel creates the ``skb`` out of the ``xdp_buff``
|
||||
contents. Currently, every driver has custom kernel code to parse
|
||||
the descriptors and populate ``skb`` metadata when doing this ``xdp_buff->skb``
|
||||
conversion, and the XDP metadata is not used by the kernel when building
|
||||
``skbs``. However, TC-BPF programs can access the XDP metadata area using
|
||||
the ``data_meta`` pointer.
|
||||
|
||||
In the future, we'd like to support a case where an XDP program
|
||||
can override some of the metadata used for building ``skbs``.
|
||||
|
||||
bpf_redirect_map
|
||||
================
|
||||
|
||||
``bpf_redirect_map`` can redirect the frame to a different device.
|
||||
Some devices (like virtual ethernet links) support running a second XDP
|
||||
program after the redirect. However, the final consumer doesn't have
|
||||
access to the original hardware descriptor and can't access any of
|
||||
the original metadata. The same applies to XDP programs installed
|
||||
into devmaps and cpumaps.
|
||||
|
||||
This means that for redirected packets only custom metadata is
|
||||
currently supported, which has to be prepared by the initial XDP program
|
||||
before redirect. If the frame is eventually passed to the kernel, the
|
||||
``skb`` created from such a frame won't have any hardware metadata populated
|
||||
in its ``skb``. If such a packet is later redirected into an ``XSK``,
|
||||
that will also only have access to the custom metadata.
|
||||
|
||||
bpf_tail_call
|
||||
=============
|
||||
|
||||
Adding programs that access metadata kfuncs to the ``BPF_MAP_TYPE_PROG_ARRAY``
|
||||
is currently not supported.
|
||||
|
||||
Example
|
||||
=======
|
||||
|
||||
See ``tools/testing/selftests/bpf/progs/xdp_metadata.c`` and
|
||||
``tools/testing/selftests/bpf/prog_tests/xdp_metadata.c`` for an example of
|
||||
BPF program that handles XDP metadata.
|
@ -1857,62 +1857,59 @@ emit_jmp:
|
||||
return proglen;
|
||||
}
|
||||
|
||||
static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
|
||||
static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
|
||||
int stack_size)
|
||||
{
|
||||
int i, j, arg_size, nr_regs;
|
||||
int i, j, arg_size;
|
||||
bool next_same_struct = false;
|
||||
|
||||
/* Store function arguments to stack.
|
||||
* For a function that accepts two pointers the sequence will be:
|
||||
* mov QWORD PTR [rbp-0x10],rdi
|
||||
* mov QWORD PTR [rbp-0x8],rsi
|
||||
*/
|
||||
for (i = 0, j = 0; i < min(nr_args, 6); i++) {
|
||||
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) {
|
||||
nr_regs = (m->arg_size[i] + 7) / 8;
|
||||
for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
|
||||
/* The arg_size is at most 16 bytes, enforced by the verifier. */
|
||||
arg_size = m->arg_size[j];
|
||||
if (arg_size > 8) {
|
||||
arg_size = 8;
|
||||
} else {
|
||||
nr_regs = 1;
|
||||
arg_size = m->arg_size[i];
|
||||
next_same_struct = !next_same_struct;
|
||||
}
|
||||
|
||||
while (nr_regs) {
|
||||
emit_stx(prog, bytes_to_bpf_size(arg_size),
|
||||
BPF_REG_FP,
|
||||
j == 5 ? X86_REG_R9 : BPF_REG_1 + j,
|
||||
-(stack_size - j * 8));
|
||||
nr_regs--;
|
||||
j++;
|
||||
}
|
||||
emit_stx(prog, bytes_to_bpf_size(arg_size),
|
||||
BPF_REG_FP,
|
||||
i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
|
||||
-(stack_size - i * 8));
|
||||
|
||||
j = next_same_struct ? j : j + 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args,
|
||||
static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs,
|
||||
int stack_size)
|
||||
{
|
||||
int i, j, arg_size, nr_regs;
|
||||
int i, j, arg_size;
|
||||
bool next_same_struct = false;
|
||||
|
||||
/* Restore function arguments from stack.
|
||||
* For a function that accepts two pointers the sequence will be:
|
||||
* EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
|
||||
* EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
|
||||
*/
|
||||
for (i = 0, j = 0; i < min(nr_args, 6); i++) {
|
||||
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) {
|
||||
nr_regs = (m->arg_size[i] + 7) / 8;
|
||||
for (i = 0, j = 0; i < min(nr_regs, 6); i++) {
|
||||
/* The arg_size is at most 16 bytes, enforced by the verifier. */
|
||||
arg_size = m->arg_size[j];
|
||||
if (arg_size > 8) {
|
||||
arg_size = 8;
|
||||
} else {
|
||||
nr_regs = 1;
|
||||
arg_size = m->arg_size[i];
|
||||
next_same_struct = !next_same_struct;
|
||||
}
|
||||
|
||||
while (nr_regs) {
|
||||
emit_ldx(prog, bytes_to_bpf_size(arg_size),
|
||||
j == 5 ? X86_REG_R9 : BPF_REG_1 + j,
|
||||
BPF_REG_FP,
|
||||
-(stack_size - j * 8));
|
||||
nr_regs--;
|
||||
j++;
|
||||
}
|
||||
emit_ldx(prog, bytes_to_bpf_size(arg_size),
|
||||
i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
|
||||
BPF_REG_FP,
|
||||
-(stack_size - i * 8));
|
||||
|
||||
j = next_same_struct ? j : j + 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2138,8 +2135,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
struct bpf_tramp_links *tlinks,
|
||||
void *func_addr)
|
||||
{
|
||||
int ret, i, nr_args = m->nr_args, extra_nregs = 0;
|
||||
int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off;
|
||||
int i, ret, nr_regs = m->nr_args, stack_size = 0;
|
||||
int regs_off, nregs_off, ip_off, run_ctx_off;
|
||||
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
|
||||
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
|
||||
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
|
||||
@ -2148,17 +2145,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
u8 *prog;
|
||||
bool save_ret;
|
||||
|
||||
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
|
||||
if (nr_args > 6)
|
||||
return -ENOTSUPP;
|
||||
|
||||
for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
|
||||
/* extra registers for struct arguments */
|
||||
for (i = 0; i < m->nr_args; i++)
|
||||
if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
|
||||
extra_nregs += (m->arg_size[i] + 7) / 8 - 1;
|
||||
}
|
||||
if (nr_args + extra_nregs > 6)
|
||||
nr_regs += (m->arg_size[i] + 7) / 8 - 1;
|
||||
|
||||
/* x86-64 supports up to 6 arguments. 7+ can be added in the future */
|
||||
if (nr_regs > 6)
|
||||
return -ENOTSUPP;
|
||||
stack_size += extra_nregs * 8;
|
||||
|
||||
/* Generated trampoline stack layout:
|
||||
*
|
||||
@ -2172,7 +2166,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
* [ ... ]
|
||||
* RBP - regs_off [ reg_arg1 ] program's ctx pointer
|
||||
*
|
||||
* RBP - args_off [ arg regs count ] always
|
||||
* RBP - nregs_off [ regs count ] always
|
||||
*
|
||||
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
|
||||
*
|
||||
@ -2184,11 +2178,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
if (save_ret)
|
||||
stack_size += 8;
|
||||
|
||||
stack_size += nr_regs * 8;
|
||||
regs_off = stack_size;
|
||||
|
||||
/* args count */
|
||||
/* regs count */
|
||||
stack_size += 8;
|
||||
args_off = stack_size;
|
||||
nregs_off = stack_size;
|
||||
|
||||
if (flags & BPF_TRAMP_F_IP_ARG)
|
||||
stack_size += 8; /* room for IP address argument */
|
||||
@ -2221,11 +2216,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
EMIT1(0x53); /* push rbx */
|
||||
|
||||
/* Store number of argument registers of the traced function:
|
||||
* mov rax, nr_args + extra_nregs
|
||||
* mov QWORD PTR [rbp - args_off], rax
|
||||
* mov rax, nr_regs
|
||||
* mov QWORD PTR [rbp - nregs_off], rax
|
||||
*/
|
||||
emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args + extra_nregs);
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off);
|
||||
emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs);
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off);
|
||||
|
||||
if (flags & BPF_TRAMP_F_IP_ARG) {
|
||||
/* Store IP address of the traced function:
|
||||
@ -2236,7 +2231,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
|
||||
}
|
||||
|
||||
save_regs(m, &prog, nr_args, regs_off);
|
||||
save_regs(m, &prog, nr_regs, regs_off);
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
/* arg1: mov rdi, im */
|
||||
@ -2266,7 +2261,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
}
|
||||
|
||||
if (flags & BPF_TRAMP_F_CALL_ORIG) {
|
||||
restore_regs(m, &prog, nr_args, regs_off);
|
||||
restore_regs(m, &prog, nr_regs, regs_off);
|
||||
|
||||
if (flags & BPF_TRAMP_F_ORIG_STACK) {
|
||||
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
|
||||
@ -2307,7 +2302,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
|
||||
}
|
||||
|
||||
if (flags & BPF_TRAMP_F_RESTORE_REGS)
|
||||
restore_regs(m, &prog, nr_args, regs_off);
|
||||
restore_regs(m, &prog, nr_regs, regs_off);
|
||||
|
||||
/* This needs to be done regardless. If there were fmod_ret programs,
|
||||
* the return value is only updated on the stack and still needs to be
|
||||
|
@ -58,9 +58,7 @@ u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe)
|
||||
return hi | lo;
|
||||
}
|
||||
|
||||
void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
|
||||
struct skb_shared_hwtstamps *hwts,
|
||||
u64 timestamp)
|
||||
u64 mlx4_en_get_hwtstamp(struct mlx4_en_dev *mdev, u64 timestamp)
|
||||
{
|
||||
unsigned int seq;
|
||||
u64 nsec;
|
||||
@ -70,8 +68,15 @@ void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
|
||||
nsec = timecounter_cyc2time(&mdev->clock, timestamp);
|
||||
} while (read_seqretry(&mdev->clock_lock, seq));
|
||||
|
||||
return ns_to_ktime(nsec);
|
||||
}
|
||||
|
||||
void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
|
||||
struct skb_shared_hwtstamps *hwts,
|
||||
u64 timestamp)
|
||||
{
|
||||
memset(hwts, 0, sizeof(struct skb_shared_hwtstamps));
|
||||
hwts->hwtstamp = ns_to_ktime(nsec);
|
||||
hwts->hwtstamp = mlx4_en_get_hwtstamp(mdev, timestamp);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2889,6 +2889,11 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
|
||||
.ndo_bpf = mlx4_xdp,
|
||||
};
|
||||
|
||||
static const struct xdp_metadata_ops mlx4_xdp_metadata_ops = {
|
||||
.xmo_rx_timestamp = mlx4_en_xdp_rx_timestamp,
|
||||
.xmo_rx_hash = mlx4_en_xdp_rx_hash,
|
||||
};
|
||||
|
||||
struct mlx4_en_bond {
|
||||
struct work_struct work;
|
||||
struct mlx4_en_priv *priv;
|
||||
@ -3310,6 +3315,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
|
||||
dev->netdev_ops = &mlx4_netdev_ops_master;
|
||||
else
|
||||
dev->netdev_ops = &mlx4_netdev_ops;
|
||||
dev->xdp_metadata_ops = &mlx4_xdp_metadata_ops;
|
||||
dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT;
|
||||
netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
|
||||
netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
|
||||
|
@ -661,9 +661,41 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
|
||||
#define MLX4_CQE_STATUS_IP_ANY (MLX4_CQE_STATUS_IPV4)
|
||||
#endif
|
||||
|
||||
struct mlx4_en_xdp_buff {
|
||||
struct xdp_buff xdp;
|
||||
struct mlx4_cqe *cqe;
|
||||
struct mlx4_en_dev *mdev;
|
||||
struct mlx4_en_rx_ring *ring;
|
||||
struct net_device *dev;
|
||||
};
|
||||
|
||||
int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
|
||||
{
|
||||
struct mlx4_en_xdp_buff *_ctx = (void *)ctx;
|
||||
|
||||
if (unlikely(_ctx->ring->hwtstamp_rx_filter != HWTSTAMP_FILTER_ALL))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*timestamp = mlx4_en_get_hwtstamp(_ctx->mdev,
|
||||
mlx4_en_get_cqe_ts(_ctx->cqe));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
|
||||
{
|
||||
struct mlx4_en_xdp_buff *_ctx = (void *)ctx;
|
||||
|
||||
if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
|
||||
{
|
||||
struct mlx4_en_priv *priv = netdev_priv(dev);
|
||||
struct mlx4_en_xdp_buff mxbuf = {};
|
||||
int factor = priv->cqe_factor;
|
||||
struct mlx4_en_rx_ring *ring;
|
||||
struct bpf_prog *xdp_prog;
|
||||
@ -671,7 +703,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
||||
bool doorbell_pending;
|
||||
bool xdp_redir_flush;
|
||||
struct mlx4_cqe *cqe;
|
||||
struct xdp_buff xdp;
|
||||
int polled = 0;
|
||||
int index;
|
||||
|
||||
@ -681,7 +712,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
||||
ring = priv->rx_ring[cq_ring];
|
||||
|
||||
xdp_prog = rcu_dereference_bh(ring->xdp_prog);
|
||||
xdp_init_buff(&xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq);
|
||||
xdp_init_buff(&mxbuf.xdp, priv->frag_info[0].frag_stride, &ring->xdp_rxq);
|
||||
doorbell_pending = false;
|
||||
xdp_redir_flush = false;
|
||||
|
||||
@ -776,24 +807,28 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
|
||||
priv->frag_info[0].frag_size,
|
||||
DMA_FROM_DEVICE);
|
||||
|
||||
xdp_prepare_buff(&xdp, va - frags[0].page_offset,
|
||||
frags[0].page_offset, length, false);
|
||||
orig_data = xdp.data;
|
||||
xdp_prepare_buff(&mxbuf.xdp, va - frags[0].page_offset,
|
||||
frags[0].page_offset, length, true);
|
||||
orig_data = mxbuf.xdp.data;
|
||||
mxbuf.cqe = cqe;
|
||||
mxbuf.mdev = priv->mdev;
|
||||
mxbuf.ring = ring;
|
||||
mxbuf.dev = dev;
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||
act = bpf_prog_run_xdp(xdp_prog, &mxbuf.xdp);
|
||||
|
||||
length = xdp.data_end - xdp.data;
|
||||
if (xdp.data != orig_data) {
|
||||
frags[0].page_offset = xdp.data -
|
||||
xdp.data_hard_start;
|
||||
va = xdp.data;
|
||||
length = mxbuf.xdp.data_end - mxbuf.xdp.data;
|
||||
if (mxbuf.xdp.data != orig_data) {
|
||||
frags[0].page_offset = mxbuf.xdp.data -
|
||||
mxbuf.xdp.data_hard_start;
|
||||
va = mxbuf.xdp.data;
|
||||
}
|
||||
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
break;
|
||||
case XDP_REDIRECT:
|
||||
if (likely(!xdp_do_redirect(dev, &xdp, xdp_prog))) {
|
||||
if (likely(!xdp_do_redirect(dev, &mxbuf.xdp, xdp_prog))) {
|
||||
ring->xdp_redirect++;
|
||||
xdp_redir_flush = true;
|
||||
frags[0].page = NULL;
|
||||
|
@ -796,10 +796,15 @@ void mlx4_en_update_pfc_stats_bitmap(struct mlx4_dev *dev,
|
||||
int mlx4_en_netdev_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr);
|
||||
|
||||
struct xdp_md;
|
||||
int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp);
|
||||
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash);
|
||||
|
||||
/*
|
||||
* Functions for time stamping
|
||||
*/
|
||||
u64 mlx4_en_get_cqe_ts(struct mlx4_cqe *cqe);
|
||||
u64 mlx4_en_get_hwtstamp(struct mlx4_en_dev *mdev, u64 timestamp);
|
||||
void mlx4_en_fill_hwtstamps(struct mlx4_en_dev *mdev,
|
||||
struct skb_shared_hwtstamps *hwts,
|
||||
u64 timestamp);
|
||||
|
@ -626,10 +626,11 @@ struct mlx5e_rq;
|
||||
typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*);
|
||||
typedef struct sk_buff *
|
||||
(*mlx5e_fp_skb_from_cqe_mpwrq)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
||||
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
|
||||
struct mlx5_cqe64 *cqe, u16 cqe_bcnt,
|
||||
u32 head_offset, u32 page_idx);
|
||||
typedef struct sk_buff *
|
||||
(*mlx5e_fp_skb_from_cqe)(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
||||
u32 cqe_bcnt);
|
||||
struct mlx5_cqe64 *cqe, u32 cqe_bcnt);
|
||||
typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq);
|
||||
typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16);
|
||||
typedef void (*mlx5e_fp_shampo_dealloc_hd)(struct mlx5e_rq*, u16, u16, bool);
|
||||
|
@ -73,6 +73,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);
|
||||
void mlx5e_free_rx_descs(struct mlx5e_rq *rq);
|
||||
void mlx5e_free_rx_in_progress_descs(struct mlx5e_rq *rq);
|
||||
|
||||
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
|
||||
{
|
||||
return config->rx_filter == HWTSTAMP_FILTER_ALL;
|
||||
}
|
||||
|
||||
/* TX */
|
||||
netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
|
||||
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
|
||||
|
@ -156,10 +156,39 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
|
||||
return true;
|
||||
}
|
||||
|
||||
static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
|
||||
{
|
||||
const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
|
||||
|
||||
if (unlikely(!mlx5e_rx_hw_stamp(_ctx->rq->tstamp)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*timestamp = mlx5e_cqe_ts_to_ns(_ctx->rq->ptp_cyc2time,
|
||||
_ctx->rq->clock, get_cqe_ts(_ctx->cqe));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
|
||||
{
|
||||
const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
|
||||
|
||||
if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*hash = be32_to_cpu(_ctx->cqe->rss_hash_result);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct xdp_metadata_ops mlx5e_xdp_metadata_ops = {
|
||||
.xmo_rx_timestamp = mlx5e_xdp_rx_timestamp,
|
||||
.xmo_rx_hash = mlx5e_xdp_rx_hash,
|
||||
};
|
||||
|
||||
/* returns true if packet was consumed by xdp */
|
||||
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
|
||||
struct bpf_prog *prog, struct xdp_buff *xdp)
|
||||
struct bpf_prog *prog, struct mlx5e_xdp_buff *mxbuf)
|
||||
{
|
||||
struct xdp_buff *xdp = &mxbuf->xdp;
|
||||
u32 act;
|
||||
int err;
|
||||
|
||||
|
@ -44,10 +44,16 @@
|
||||
(MLX5E_XDP_INLINE_WQE_MAX_DS_CNT * MLX5_SEND_WQE_DS - \
|
||||
sizeof(struct mlx5_wqe_inline_seg))
|
||||
|
||||
struct mlx5e_xdp_buff {
|
||||
struct xdp_buff xdp;
|
||||
struct mlx5_cqe64 *cqe;
|
||||
struct mlx5e_rq *rq;
|
||||
};
|
||||
|
||||
struct mlx5e_xsk_param;
|
||||
int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
|
||||
bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct page *page,
|
||||
struct bpf_prog *prog, struct xdp_buff *xdp);
|
||||
struct bpf_prog *prog, struct mlx5e_xdp_buff *mlctx);
|
||||
void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
|
||||
bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
|
||||
void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
|
||||
@ -56,6 +62,8 @@ void mlx5e_xdp_rx_poll_complete(struct mlx5e_rq *rq);
|
||||
int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
|
||||
u32 flags);
|
||||
|
||||
extern const struct xdp_metadata_ops mlx5e_xdp_metadata_ops;
|
||||
|
||||
INDIRECT_CALLABLE_DECLARE(bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
|
||||
struct mlx5e_xmit_data *xdptxd,
|
||||
struct skb_shared_info *sinfo,
|
||||
|
@ -8,6 +8,14 @@
|
||||
|
||||
/* RX data path */
|
||||
|
||||
static struct mlx5e_xdp_buff *xsk_buff_to_mxbuf(struct xdp_buff *xdp)
|
||||
{
|
||||
/* mlx5e_xdp_buff shares its layout with xdp_buff_xsk
|
||||
* and private mlx5e_xdp_buff fields fall into xdp_buff_xsk->cb
|
||||
*/
|
||||
return (struct mlx5e_xdp_buff *)xdp;
|
||||
}
|
||||
|
||||
int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
{
|
||||
struct mlx5e_mpw_info *wi = mlx5e_get_mpw_info(rq, ix);
|
||||
@ -22,6 +30,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
goto err;
|
||||
|
||||
BUILD_BUG_ON(sizeof(wi->alloc_units[0]) != sizeof(wi->alloc_units[0].xsk));
|
||||
XSK_CHECK_PRIV_TYPE(struct mlx5e_xdp_buff);
|
||||
batch = xsk_buff_alloc_batch(rq->xsk_pool, (struct xdp_buff **)wi->alloc_units,
|
||||
rq->mpwqe.pages_per_wqe);
|
||||
|
||||
@ -43,25 +52,30 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
|
||||
if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_ALIGNED)) {
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
|
||||
umr_wqe->inline_mtts[i] = (struct mlx5_mtt) {
|
||||
.ptag = cpu_to_be64(addr | MLX5_EN_WR),
|
||||
};
|
||||
mxbuf->rq = rq;
|
||||
}
|
||||
} else if (unlikely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_UNALIGNED)) {
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
|
||||
umr_wqe->inline_ksms[i] = (struct mlx5_ksm) {
|
||||
.key = rq->mkey_be,
|
||||
.va = cpu_to_be64(addr),
|
||||
};
|
||||
mxbuf->rq = rq;
|
||||
}
|
||||
} else if (likely(rq->mpwqe.umr_mode == MLX5E_MPWRQ_UMR_MODE_TRIPLE)) {
|
||||
u32 mapping_size = 1 << (rq->mpwqe.page_shift - 2);
|
||||
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
|
||||
umr_wqe->inline_ksms[i << 2] = (struct mlx5_ksm) {
|
||||
@ -80,6 +94,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
.key = rq->mkey_be,
|
||||
.va = cpu_to_be64(rq->wqe_overflow.addr),
|
||||
};
|
||||
mxbuf->rq = rq;
|
||||
}
|
||||
} else {
|
||||
__be32 pad_size = cpu_to_be32((1 << rq->mpwqe.page_shift) -
|
||||
@ -87,6 +102,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
__be32 frame_size = cpu_to_be32(rq->xsk_pool->chunk_size);
|
||||
|
||||
for (i = 0; i < batch; i++) {
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[i].xsk);
|
||||
dma_addr_t addr = xsk_buff_xdp_get_frame_dma(wi->alloc_units[i].xsk);
|
||||
|
||||
umr_wqe->inline_klms[i << 1] = (struct mlx5_klm) {
|
||||
@ -99,6 +115,7 @@ int mlx5e_xsk_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
|
||||
.va = cpu_to_be64(rq->wqe_overflow.addr),
|
||||
.bcount = pad_size,
|
||||
};
|
||||
mxbuf->rq = rq;
|
||||
}
|
||||
}
|
||||
|
||||
@ -229,11 +246,12 @@ static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, struct xdp_b
|
||||
|
||||
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
||||
struct mlx5e_mpw_info *wi,
|
||||
struct mlx5_cqe64 *cqe,
|
||||
u16 cqe_bcnt,
|
||||
u32 head_offset,
|
||||
u32 page_idx)
|
||||
{
|
||||
struct xdp_buff *xdp = wi->alloc_units[page_idx].xsk;
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->alloc_units[page_idx].xsk);
|
||||
struct bpf_prog *prog;
|
||||
|
||||
/* Check packet size. Note LRO doesn't use linear SKB */
|
||||
@ -249,9 +267,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
||||
*/
|
||||
WARN_ON_ONCE(head_offset);
|
||||
|
||||
xsk_buff_set_size(xdp, cqe_bcnt);
|
||||
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
|
||||
net_prefetch(xdp->data);
|
||||
/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
|
||||
mxbuf->cqe = cqe;
|
||||
xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
|
||||
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
|
||||
net_prefetch(mxbuf->xdp.data);
|
||||
|
||||
/* Possible flows:
|
||||
* - XDP_REDIRECT to XSKMAP:
|
||||
@ -269,7 +289,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
||||
*/
|
||||
|
||||
prog = rcu_dereference(rq->xdp_prog);
|
||||
if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp))) {
|
||||
if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, mxbuf))) {
|
||||
if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
|
||||
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
@ -278,14 +298,15 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
||||
/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
|
||||
* frame. On SKB allocation failure, NULL is returned.
|
||||
*/
|
||||
return mlx5e_xsk_construct_skb(rq, xdp);
|
||||
return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
|
||||
}
|
||||
|
||||
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
|
||||
struct mlx5e_wqe_frag_info *wi,
|
||||
struct mlx5_cqe64 *cqe,
|
||||
u32 cqe_bcnt)
|
||||
{
|
||||
struct xdp_buff *xdp = wi->au->xsk;
|
||||
struct mlx5e_xdp_buff *mxbuf = xsk_buff_to_mxbuf(wi->au->xsk);
|
||||
struct bpf_prog *prog;
|
||||
|
||||
/* wi->offset is not used in this function, because xdp->data and the
|
||||
@ -295,17 +316,19 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
|
||||
*/
|
||||
WARN_ON_ONCE(wi->offset);
|
||||
|
||||
xsk_buff_set_size(xdp, cqe_bcnt);
|
||||
xsk_buff_dma_sync_for_cpu(xdp, rq->xsk_pool);
|
||||
net_prefetch(xdp->data);
|
||||
/* mxbuf->rq is set on allocation, but cqe is per-packet so set it here */
|
||||
mxbuf->cqe = cqe;
|
||||
xsk_buff_set_size(&mxbuf->xdp, cqe_bcnt);
|
||||
xsk_buff_dma_sync_for_cpu(&mxbuf->xdp, rq->xsk_pool);
|
||||
net_prefetch(mxbuf->xdp.data);
|
||||
|
||||
prog = rcu_dereference(rq->xdp_prog);
|
||||
if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, xdp)))
|
||||
if (likely(prog && mlx5e_xdp_handle(rq, NULL, prog, mxbuf)))
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
|
||||
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
|
||||
* will be handled by mlx5e_free_rx_wqe.
|
||||
* On SKB allocation failure, NULL is returned.
|
||||
*/
|
||||
return mlx5e_xsk_construct_skb(rq, xdp);
|
||||
return mlx5e_xsk_construct_skb(rq, &mxbuf->xdp);
|
||||
}
|
||||
|
@ -13,11 +13,13 @@ int mlx5e_xsk_alloc_rx_wqes_batched(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
|
||||
int mlx5e_xsk_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk);
|
||||
struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
|
||||
struct mlx5e_mpw_info *wi,
|
||||
struct mlx5_cqe64 *cqe,
|
||||
u16 cqe_bcnt,
|
||||
u32 head_offset,
|
||||
u32 page_idx);
|
||||
struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
|
||||
struct mlx5e_wqe_frag_info *wi,
|
||||
struct mlx5_cqe64 *cqe,
|
||||
u32 cqe_bcnt);
|
||||
|
||||
#endif /* __MLX5_EN_XSK_RX_H__ */
|
||||
|
@ -5057,6 +5057,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
|
||||
SET_NETDEV_DEV(netdev, mdev->device);
|
||||
|
||||
netdev->netdev_ops = &mlx5e_netdev_ops;
|
||||
netdev->xdp_metadata_ops = &mlx5e_xdp_metadata_ops;
|
||||
|
||||
mlx5e_dcbnl_build_netdev(netdev);
|
||||
|
||||
|
@ -62,10 +62,12 @@
|
||||
|
||||
static struct sk_buff *
|
||||
mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
||||
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
|
||||
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
|
||||
u32 page_idx);
|
||||
static struct sk_buff *
|
||||
mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
||||
u16 cqe_bcnt, u32 head_offset, u32 page_idx);
|
||||
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
|
||||
u32 page_idx);
|
||||
static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
|
||||
static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
|
||||
static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
|
||||
@ -76,11 +78,6 @@ const struct mlx5e_rx_handlers mlx5e_rx_handlers_nic = {
|
||||
.handle_rx_cqe_mpwqe_shampo = mlx5e_handle_rx_cqe_mpwrq_shampo,
|
||||
};
|
||||
|
||||
static inline bool mlx5e_rx_hw_stamp(struct hwtstamp_config *config)
|
||||
{
|
||||
return config->rx_filter == HWTSTAMP_FILTER_ALL;
|
||||
}
|
||||
|
||||
static inline void mlx5e_read_cqe_slot(struct mlx5_cqwq *wq,
|
||||
u32 cqcc, void *data)
|
||||
{
|
||||
@ -1575,16 +1572,19 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
|
||||
return skb;
|
||||
}
|
||||
|
||||
static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
|
||||
u32 len, struct xdp_buff *xdp)
|
||||
static void mlx5e_fill_mxbuf(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
|
||||
void *va, u16 headroom, u32 len,
|
||||
struct mlx5e_xdp_buff *mxbuf)
|
||||
{
|
||||
xdp_init_buff(xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
|
||||
xdp_prepare_buff(xdp, va, headroom, len, true);
|
||||
xdp_init_buff(&mxbuf->xdp, rq->buff.frame0_sz, &rq->xdp_rxq);
|
||||
xdp_prepare_buff(&mxbuf->xdp, va, headroom, len, true);
|
||||
mxbuf->cqe = cqe;
|
||||
mxbuf->rq = rq;
|
||||
}
|
||||
|
||||
static struct sk_buff *
|
||||
mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
||||
u32 cqe_bcnt)
|
||||
struct mlx5_cqe64 *cqe, u32 cqe_bcnt)
|
||||
{
|
||||
union mlx5e_alloc_unit *au = wi->au;
|
||||
u16 rx_headroom = rq->buff.headroom;
|
||||
@ -1606,16 +1606,16 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
||||
|
||||
prog = rcu_dereference(rq->xdp_prog);
|
||||
if (prog) {
|
||||
struct xdp_buff xdp;
|
||||
struct mlx5e_xdp_buff mxbuf;
|
||||
|
||||
net_prefetchw(va); /* xdp_frame data area */
|
||||
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
|
||||
if (mlx5e_xdp_handle(rq, au->page, prog, &xdp))
|
||||
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
|
||||
if (mlx5e_xdp_handle(rq, au->page, prog, &mxbuf))
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
|
||||
rx_headroom = xdp.data - xdp.data_hard_start;
|
||||
metasize = xdp.data - xdp.data_meta;
|
||||
cqe_bcnt = xdp.data_end - xdp.data;
|
||||
rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start;
|
||||
metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta;
|
||||
cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data;
|
||||
}
|
||||
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
|
||||
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
|
||||
@ -1630,16 +1630,16 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
||||
|
||||
static struct sk_buff *
|
||||
mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi,
|
||||
u32 cqe_bcnt)
|
||||
struct mlx5_cqe64 *cqe, u32 cqe_bcnt)
|
||||
{
|
||||
struct mlx5e_rq_frag_info *frag_info = &rq->wqe.info.arr[0];
|
||||
struct mlx5e_wqe_frag_info *head_wi = wi;
|
||||
union mlx5e_alloc_unit *au = wi->au;
|
||||
u16 rx_headroom = rq->buff.headroom;
|
||||
struct skb_shared_info *sinfo;
|
||||
struct mlx5e_xdp_buff mxbuf;
|
||||
u32 frag_consumed_bytes;
|
||||
struct bpf_prog *prog;
|
||||
struct xdp_buff xdp;
|
||||
struct sk_buff *skb;
|
||||
dma_addr_t addr;
|
||||
u32 truesize;
|
||||
@ -1654,8 +1654,8 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
||||
net_prefetchw(va); /* xdp_frame data area */
|
||||
net_prefetch(va + rx_headroom);
|
||||
|
||||
mlx5e_fill_xdp_buff(rq, va, rx_headroom, frag_consumed_bytes, &xdp);
|
||||
sinfo = xdp_get_shared_info_from_buff(&xdp);
|
||||
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, frag_consumed_bytes, &mxbuf);
|
||||
sinfo = xdp_get_shared_info_from_buff(&mxbuf.xdp);
|
||||
truesize = 0;
|
||||
|
||||
cqe_bcnt -= frag_consumed_bytes;
|
||||
@ -1673,13 +1673,13 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
||||
dma_sync_single_for_cpu(rq->pdev, addr + wi->offset,
|
||||
frag_consumed_bytes, rq->buff.map_dir);
|
||||
|
||||
if (!xdp_buff_has_frags(&xdp)) {
|
||||
if (!xdp_buff_has_frags(&mxbuf.xdp)) {
|
||||
/* Init on the first fragment to avoid cold cache access
|
||||
* when possible.
|
||||
*/
|
||||
sinfo->nr_frags = 0;
|
||||
sinfo->xdp_frags_size = 0;
|
||||
xdp_buff_set_frags_flag(&xdp);
|
||||
xdp_buff_set_frags_flag(&mxbuf.xdp);
|
||||
}
|
||||
|
||||
frag = &sinfo->frags[sinfo->nr_frags++];
|
||||
@ -1688,7 +1688,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
||||
skb_frag_size_set(frag, frag_consumed_bytes);
|
||||
|
||||
if (page_is_pfmemalloc(au->page))
|
||||
xdp_buff_set_frag_pfmemalloc(&xdp);
|
||||
xdp_buff_set_frag_pfmemalloc(&mxbuf.xdp);
|
||||
|
||||
sinfo->xdp_frags_size += frag_consumed_bytes;
|
||||
truesize += frag_info->frag_stride;
|
||||
@ -1701,7 +1701,7 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
||||
au = head_wi->au;
|
||||
|
||||
prog = rcu_dereference(rq->xdp_prog);
|
||||
if (prog && mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
|
||||
if (prog && mlx5e_xdp_handle(rq, au->page, prog, &mxbuf)) {
|
||||
if (test_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
|
||||
int i;
|
||||
|
||||
@ -1711,22 +1711,22 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
}
|
||||
|
||||
skb = mlx5e_build_linear_skb(rq, xdp.data_hard_start, rq->buff.frame0_sz,
|
||||
xdp.data - xdp.data_hard_start,
|
||||
xdp.data_end - xdp.data,
|
||||
xdp.data - xdp.data_meta);
|
||||
skb = mlx5e_build_linear_skb(rq, mxbuf.xdp.data_hard_start, rq->buff.frame0_sz,
|
||||
mxbuf.xdp.data - mxbuf.xdp.data_hard_start,
|
||||
mxbuf.xdp.data_end - mxbuf.xdp.data,
|
||||
mxbuf.xdp.data - mxbuf.xdp.data_meta);
|
||||
if (unlikely(!skb))
|
||||
return NULL;
|
||||
|
||||
page_ref_inc(au->page);
|
||||
|
||||
if (unlikely(xdp_buff_has_frags(&xdp))) {
|
||||
if (unlikely(xdp_buff_has_frags(&mxbuf.xdp))) {
|
||||
int i;
|
||||
|
||||
/* sinfo->nr_frags is reset by build_skb, calculate again. */
|
||||
xdp_update_skb_shared_info(skb, wi - head_wi - 1,
|
||||
sinfo->xdp_frags_size, truesize,
|
||||
xdp_buff_is_frag_pfmemalloc(&xdp));
|
||||
xdp_buff_is_frag_pfmemalloc(&mxbuf.xdp));
|
||||
|
||||
for (i = 0; i < sinfo->nr_frags; i++) {
|
||||
skb_frag_t *frag = &sinfo->frags[i];
|
||||
@ -1777,7 +1777,7 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
||||
mlx5e_skb_from_cqe_linear,
|
||||
mlx5e_skb_from_cqe_nonlinear,
|
||||
mlx5e_xsk_skb_from_cqe_linear,
|
||||
rq, wi, cqe_bcnt);
|
||||
rq, wi, cqe, cqe_bcnt);
|
||||
if (!skb) {
|
||||
/* probably for XDP */
|
||||
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
|
||||
@ -1830,7 +1830,7 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
||||
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
|
||||
mlx5e_skb_from_cqe_linear,
|
||||
mlx5e_skb_from_cqe_nonlinear,
|
||||
rq, wi, cqe_bcnt);
|
||||
rq, wi, cqe, cqe_bcnt);
|
||||
if (!skb) {
|
||||
/* probably for XDP */
|
||||
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)) {
|
||||
@ -1889,7 +1889,7 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64
|
||||
skb = INDIRECT_CALL_2(rq->mpwqe.skb_from_cqe_mpwrq,
|
||||
mlx5e_skb_from_cqe_mpwrq_linear,
|
||||
mlx5e_skb_from_cqe_mpwrq_nonlinear,
|
||||
rq, wi, cqe_bcnt, head_offset, page_idx);
|
||||
rq, wi, cqe, cqe_bcnt, head_offset, page_idx);
|
||||
if (!skb)
|
||||
goto mpwrq_cqe_out;
|
||||
|
||||
@ -1940,7 +1940,8 @@ mlx5e_fill_skb_data(struct sk_buff *skb, struct mlx5e_rq *rq,
|
||||
|
||||
static struct sk_buff *
|
||||
mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
||||
u16 cqe_bcnt, u32 head_offset, u32 page_idx)
|
||||
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
|
||||
u32 page_idx)
|
||||
{
|
||||
union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
|
||||
u16 headlen = min_t(u16, MLX5E_RX_MAX_HEAD, cqe_bcnt);
|
||||
@ -1979,7 +1980,8 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w
|
||||
|
||||
static struct sk_buff *
|
||||
mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
||||
u16 cqe_bcnt, u32 head_offset, u32 page_idx)
|
||||
struct mlx5_cqe64 *cqe, u16 cqe_bcnt, u32 head_offset,
|
||||
u32 page_idx)
|
||||
{
|
||||
union mlx5e_alloc_unit *au = &wi->alloc_units[page_idx];
|
||||
u16 rx_headroom = rq->buff.headroom;
|
||||
@ -2007,19 +2009,19 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
|
||||
|
||||
prog = rcu_dereference(rq->xdp_prog);
|
||||
if (prog) {
|
||||
struct xdp_buff xdp;
|
||||
struct mlx5e_xdp_buff mxbuf;
|
||||
|
||||
net_prefetchw(va); /* xdp_frame data area */
|
||||
mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
|
||||
if (mlx5e_xdp_handle(rq, au->page, prog, &xdp)) {
|
||||
mlx5e_fill_mxbuf(rq, cqe, va, rx_headroom, cqe_bcnt, &mxbuf);
|
||||
if (mlx5e_xdp_handle(rq, au->page, prog, &mxbuf)) {
|
||||
if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
|
||||
__set_bit(page_idx, wi->xdp_xmit_bitmap); /* non-atomic */
|
||||
return NULL; /* page/packet was consumed by XDP */
|
||||
}
|
||||
|
||||
rx_headroom = xdp.data - xdp.data_hard_start;
|
||||
metasize = xdp.data - xdp.data_meta;
|
||||
cqe_bcnt = xdp.data_end - xdp.data;
|
||||
rx_headroom = mxbuf.xdp.data - mxbuf.xdp.data_hard_start;
|
||||
metasize = mxbuf.xdp.data - mxbuf.xdp.data_meta;
|
||||
cqe_bcnt = mxbuf.xdp.data_end - mxbuf.xdp.data;
|
||||
}
|
||||
frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
|
||||
skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt, metasize);
|
||||
@ -2174,8 +2176,8 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq
|
||||
if (likely(head_size))
|
||||
*skb = mlx5e_skb_from_cqe_shampo(rq, wi, cqe, header_index);
|
||||
else
|
||||
*skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe_bcnt, data_offset,
|
||||
page_idx);
|
||||
*skb = mlx5e_skb_from_cqe_mpwrq_nonlinear(rq, wi, cqe, cqe_bcnt,
|
||||
data_offset, page_idx);
|
||||
if (unlikely(!*skb))
|
||||
goto free_hd_entry;
|
||||
|
||||
@ -2249,7 +2251,8 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq
|
||||
mlx5e_skb_from_cqe_mpwrq_linear,
|
||||
mlx5e_skb_from_cqe_mpwrq_nonlinear,
|
||||
mlx5e_xsk_skb_from_cqe_mpwrq_linear,
|
||||
rq, wi, cqe_bcnt, head_offset, page_idx);
|
||||
rq, wi, cqe, cqe_bcnt, head_offset,
|
||||
page_idx);
|
||||
if (!skb)
|
||||
goto mpwrq_cqe_out;
|
||||
|
||||
@ -2494,7 +2497,7 @@ static void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
|
||||
skb = INDIRECT_CALL_2(rq->wqe.skb_from_cqe,
|
||||
mlx5e_skb_from_cqe_linear,
|
||||
mlx5e_skb_from_cqe_nonlinear,
|
||||
rq, wi, cqe_bcnt);
|
||||
rq, wi, cqe, cqe_bcnt);
|
||||
if (!skb)
|
||||
goto wq_free_wqe;
|
||||
|
||||
@ -2586,7 +2589,7 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe
|
||||
goto free_wqe;
|
||||
}
|
||||
|
||||
skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe_bcnt);
|
||||
skb = mlx5e_skb_from_cqe_nonlinear(rq, wi, cqe, cqe_bcnt);
|
||||
if (!skb)
|
||||
goto free_wqe;
|
||||
|
||||
|
@ -315,10 +315,6 @@ nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf)
|
||||
NSIM_EA(bpf->extack, "xdpoffload of non-bound program");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (!bpf_offload_dev_match(bpf->prog, ns->netdev)) {
|
||||
NSIM_EA(bpf->extack, "program bound to different dev");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
state = bpf->prog->aux->offload->dev_priv;
|
||||
if (WARN_ON(strcmp(state->state, "xlated"))) {
|
||||
|
@ -116,6 +116,11 @@ static struct {
|
||||
{ "peer_ifindex" },
|
||||
};
|
||||
|
||||
struct veth_xdp_buff {
|
||||
struct xdp_buff xdp;
|
||||
struct sk_buff *skb;
|
||||
};
|
||||
|
||||
static int veth_get_link_ksettings(struct net_device *dev,
|
||||
struct ethtool_link_ksettings *cmd)
|
||||
{
|
||||
@ -592,23 +597,25 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
|
||||
rcu_read_lock();
|
||||
xdp_prog = rcu_dereference(rq->xdp_prog);
|
||||
if (likely(xdp_prog)) {
|
||||
struct xdp_buff xdp;
|
||||
struct veth_xdp_buff vxbuf;
|
||||
struct xdp_buff *xdp = &vxbuf.xdp;
|
||||
u32 act;
|
||||
|
||||
xdp_convert_frame_to_buff(frame, &xdp);
|
||||
xdp.rxq = &rq->xdp_rxq;
|
||||
xdp_convert_frame_to_buff(frame, xdp);
|
||||
xdp->rxq = &rq->xdp_rxq;
|
||||
vxbuf.skb = NULL;
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||
act = bpf_prog_run_xdp(xdp_prog, xdp);
|
||||
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
if (xdp_update_frame_from_buff(&xdp, frame))
|
||||
if (xdp_update_frame_from_buff(xdp, frame))
|
||||
goto err_xdp;
|
||||
break;
|
||||
case XDP_TX:
|
||||
orig_frame = *frame;
|
||||
xdp.rxq->mem = frame->mem;
|
||||
if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
|
||||
xdp->rxq->mem = frame->mem;
|
||||
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
|
||||
trace_xdp_exception(rq->dev, xdp_prog, act);
|
||||
frame = &orig_frame;
|
||||
stats->rx_drops++;
|
||||
@ -619,8 +626,8 @@ static struct xdp_frame *veth_xdp_rcv_one(struct veth_rq *rq,
|
||||
goto xdp_xmit;
|
||||
case XDP_REDIRECT:
|
||||
orig_frame = *frame;
|
||||
xdp.rxq->mem = frame->mem;
|
||||
if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
|
||||
xdp->rxq->mem = frame->mem;
|
||||
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
|
||||
frame = &orig_frame;
|
||||
stats->rx_drops++;
|
||||
goto err_xdp;
|
||||
@ -801,7 +808,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
|
||||
{
|
||||
void *orig_data, *orig_data_end;
|
||||
struct bpf_prog *xdp_prog;
|
||||
struct xdp_buff xdp;
|
||||
struct veth_xdp_buff vxbuf;
|
||||
struct xdp_buff *xdp = &vxbuf.xdp;
|
||||
u32 act, metalen;
|
||||
int off;
|
||||
|
||||
@ -815,22 +823,23 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
|
||||
}
|
||||
|
||||
__skb_push(skb, skb->data - skb_mac_header(skb));
|
||||
if (veth_convert_skb_to_xdp_buff(rq, &xdp, &skb))
|
||||
if (veth_convert_skb_to_xdp_buff(rq, xdp, &skb))
|
||||
goto drop;
|
||||
vxbuf.skb = skb;
|
||||
|
||||
orig_data = xdp.data;
|
||||
orig_data_end = xdp.data_end;
|
||||
orig_data = xdp->data;
|
||||
orig_data_end = xdp->data_end;
|
||||
|
||||
act = bpf_prog_run_xdp(xdp_prog, &xdp);
|
||||
act = bpf_prog_run_xdp(xdp_prog, xdp);
|
||||
|
||||
switch (act) {
|
||||
case XDP_PASS:
|
||||
break;
|
||||
case XDP_TX:
|
||||
veth_xdp_get(&xdp);
|
||||
veth_xdp_get(xdp);
|
||||
consume_skb(skb);
|
||||
xdp.rxq->mem = rq->xdp_mem;
|
||||
if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
|
||||
xdp->rxq->mem = rq->xdp_mem;
|
||||
if (unlikely(veth_xdp_tx(rq, xdp, bq) < 0)) {
|
||||
trace_xdp_exception(rq->dev, xdp_prog, act);
|
||||
stats->rx_drops++;
|
||||
goto err_xdp;
|
||||
@ -839,10 +848,10 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
|
||||
rcu_read_unlock();
|
||||
goto xdp_xmit;
|
||||
case XDP_REDIRECT:
|
||||
veth_xdp_get(&xdp);
|
||||
veth_xdp_get(xdp);
|
||||
consume_skb(skb);
|
||||
xdp.rxq->mem = rq->xdp_mem;
|
||||
if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
|
||||
xdp->rxq->mem = rq->xdp_mem;
|
||||
if (xdp_do_redirect(rq->dev, xdp, xdp_prog)) {
|
||||
stats->rx_drops++;
|
||||
goto err_xdp;
|
||||
}
|
||||
@ -862,7 +871,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
|
||||
rcu_read_unlock();
|
||||
|
||||
/* check if bpf_xdp_adjust_head was used */
|
||||
off = orig_data - xdp.data;
|
||||
off = orig_data - xdp->data;
|
||||
if (off > 0)
|
||||
__skb_push(skb, off);
|
||||
else if (off < 0)
|
||||
@ -871,21 +880,21 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
|
||||
skb_reset_mac_header(skb);
|
||||
|
||||
/* check if bpf_xdp_adjust_tail was used */
|
||||
off = xdp.data_end - orig_data_end;
|
||||
off = xdp->data_end - orig_data_end;
|
||||
if (off != 0)
|
||||
__skb_put(skb, off); /* positive on grow, negative on shrink */
|
||||
|
||||
/* XDP frag metadata (e.g. nr_frags) are updated in eBPF helpers
|
||||
* (e.g. bpf_xdp_adjust_tail), we need to update data_len here.
|
||||
*/
|
||||
if (xdp_buff_has_frags(&xdp))
|
||||
if (xdp_buff_has_frags(xdp))
|
||||
skb->data_len = skb_shinfo(skb)->xdp_frags_size;
|
||||
else
|
||||
skb->data_len = 0;
|
||||
|
||||
skb->protocol = eth_type_trans(skb, rq->dev);
|
||||
|
||||
metalen = xdp.data - xdp.data_meta;
|
||||
metalen = xdp->data - xdp->data_meta;
|
||||
if (metalen)
|
||||
skb_metadata_set(skb, metalen);
|
||||
out:
|
||||
@ -898,7 +907,7 @@ xdp_drop:
|
||||
return NULL;
|
||||
err_xdp:
|
||||
rcu_read_unlock();
|
||||
xdp_return_buff(&xdp);
|
||||
xdp_return_buff(xdp);
|
||||
xdp_xmit:
|
||||
return NULL;
|
||||
}
|
||||
@ -1596,6 +1605,28 @@ static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
|
||||
}
|
||||
}
|
||||
|
||||
static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
|
||||
{
|
||||
struct veth_xdp_buff *_ctx = (void *)ctx;
|
||||
|
||||
if (!_ctx->skb)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*timestamp = skb_hwtstamps(_ctx->skb)->hwtstamp;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
|
||||
{
|
||||
struct veth_xdp_buff *_ctx = (void *)ctx;
|
||||
|
||||
if (!_ctx->skb)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
*hash = skb_get_hash(_ctx->skb);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct net_device_ops veth_netdev_ops = {
|
||||
.ndo_init = veth_dev_init,
|
||||
.ndo_open = veth_open,
|
||||
@ -1617,6 +1648,11 @@ static const struct net_device_ops veth_netdev_ops = {
|
||||
.ndo_get_peer_dev = veth_peer_dev,
|
||||
};
|
||||
|
||||
static const struct xdp_metadata_ops veth_xdp_metadata_ops = {
|
||||
.xmo_rx_timestamp = veth_xdp_rx_timestamp,
|
||||
.xmo_rx_hash = veth_xdp_rx_hash,
|
||||
};
|
||||
|
||||
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
|
||||
NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
|
||||
NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
|
||||
@ -1633,6 +1669,7 @@ static void veth_setup(struct net_device *dev)
|
||||
dev->priv_flags |= IFF_PHONY_HEADROOM;
|
||||
|
||||
dev->netdev_ops = &veth_netdev_ops;
|
||||
dev->xdp_metadata_ops = &veth_xdp_metadata_ops;
|
||||
dev->ethtool_ops = &veth_ethtool_ops;
|
||||
dev->features |= NETIF_F_LLTX;
|
||||
dev->features |= VETH_FEATURES;
|
||||
|
@ -1261,7 +1261,8 @@ struct bpf_prog_aux {
|
||||
enum bpf_prog_type saved_dst_prog_type;
|
||||
enum bpf_attach_type saved_dst_attach_type;
|
||||
bool verifier_zext; /* Zero extensions has been inserted by verifier. */
|
||||
bool offload_requested;
|
||||
bool dev_bound; /* Program is bound to the netdev. */
|
||||
bool offload_requested; /* Program is bound and offloaded to the netdev. */
|
||||
bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
|
||||
bool func_proto_unreliable;
|
||||
bool sleepable;
|
||||
@ -1421,7 +1422,8 @@ struct bpf_struct_ops {
|
||||
const struct bpf_verifier_ops *verifier_ops;
|
||||
int (*init)(struct btf *btf);
|
||||
int (*check_member)(const struct btf_type *t,
|
||||
const struct btf_member *member);
|
||||
const struct btf_member *member,
|
||||
const struct bpf_prog *prog);
|
||||
int (*init_member)(const struct btf_type *t,
|
||||
const struct btf_member *member,
|
||||
void *kdata, const void *udata);
|
||||
@ -1472,6 +1474,7 @@ struct bpf_dummy_ops {
|
||||
int (*test_1)(struct bpf_dummy_ops_state *cb);
|
||||
int (*test_2)(struct bpf_dummy_ops_state *cb, int a1, unsigned short a2,
|
||||
char a3, unsigned long a4);
|
||||
int (*test_sleepable)(struct bpf_dummy_ops_state *cb);
|
||||
};
|
||||
|
||||
int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
@ -1523,9 +1526,9 @@ struct bpf_array {
|
||||
u32 index_mask;
|
||||
struct bpf_array_aux *aux;
|
||||
union {
|
||||
char value[0] __aligned(8);
|
||||
void *ptrs[0] __aligned(8);
|
||||
void __percpu *pptrs[0] __aligned(8);
|
||||
DECLARE_FLEX_ARRAY(char, value) __aligned(8);
|
||||
DECLARE_FLEX_ARRAY(void *, ptrs) __aligned(8);
|
||||
DECLARE_FLEX_ARRAY(void __percpu *, pptrs) __aligned(8);
|
||||
};
|
||||
};
|
||||
|
||||
@ -2186,6 +2189,14 @@ struct bpf_core_ctx {
|
||||
const struct btf *btf;
|
||||
};
|
||||
|
||||
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
|
||||
const struct bpf_reg_state *reg,
|
||||
int off);
|
||||
|
||||
bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
|
||||
const struct btf *reg_btf, u32 reg_id,
|
||||
const struct btf *arg_btf, u32 arg_id);
|
||||
|
||||
int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
|
||||
int relo_idx, void *insn);
|
||||
|
||||
@ -2451,7 +2462,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux,
|
||||
bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool);
|
||||
|
||||
int bpf_prog_offload_compile(struct bpf_prog *prog);
|
||||
void bpf_prog_offload_destroy(struct bpf_prog *prog);
|
||||
void bpf_prog_dev_bound_destroy(struct bpf_prog *prog);
|
||||
int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
|
||||
struct bpf_prog *prog);
|
||||
|
||||
@ -2479,14 +2490,26 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev);
|
||||
void unpriv_ebpf_notify(int new_state);
|
||||
|
||||
#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL)
|
||||
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr);
|
||||
int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
|
||||
struct bpf_prog_aux *prog_aux);
|
||||
void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id);
|
||||
int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr);
|
||||
int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog);
|
||||
void bpf_dev_bound_netdev_unregister(struct net_device *dev);
|
||||
|
||||
static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
|
||||
{
|
||||
return aux->dev_bound;
|
||||
}
|
||||
|
||||
static inline bool bpf_prog_is_offloaded(const struct bpf_prog_aux *aux)
|
||||
{
|
||||
return aux->offload_requested;
|
||||
}
|
||||
|
||||
static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
|
||||
bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs);
|
||||
|
||||
static inline bool bpf_map_is_offloaded(struct bpf_map *map)
|
||||
{
|
||||
return unlikely(map->ops == &bpf_map_offload_ops);
|
||||
}
|
||||
@ -2507,18 +2530,50 @@ void sock_map_unhash(struct sock *sk);
|
||||
void sock_map_destroy(struct sock *sk);
|
||||
void sock_map_close(struct sock *sk, long timeout);
|
||||
#else
|
||||
static inline int bpf_prog_offload_init(struct bpf_prog *prog,
|
||||
union bpf_attr *attr)
|
||||
static inline int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
|
||||
struct bpf_prog_aux *prog_aux)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux)
|
||||
static inline void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog,
|
||||
u32 func_id)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline int bpf_prog_dev_bound_init(struct bpf_prog *prog,
|
||||
union bpf_attr *attr)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog,
|
||||
struct bpf_prog *old_prog)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline void bpf_dev_bound_netdev_unregister(struct net_device *dev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool bpf_prog_is_dev_bound(const struct bpf_prog_aux *aux)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool bpf_map_is_dev_bound(struct bpf_map *map)
|
||||
static inline bool bpf_prog_is_offloaded(struct bpf_prog_aux *aux)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool bpf_map_is_offloaded(struct bpf_map *map)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -70,7 +70,10 @@ struct bpf_reg_state {
|
||||
u32 btf_id;
|
||||
};
|
||||
|
||||
u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
|
||||
struct { /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
|
||||
u32 mem_size;
|
||||
u32 dynptr_id; /* for dynptr slices */
|
||||
};
|
||||
|
||||
/* For dynptr stack slots */
|
||||
struct {
|
||||
|
@ -879,11 +879,13 @@ static inline bool module_sig_ok(struct module *module)
|
||||
#endif /* CONFIG_MODULE_SIG */
|
||||
|
||||
#if defined(CONFIG_MODULES) && defined(CONFIG_KALLSYMS)
|
||||
int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
||||
int module_kallsyms_on_each_symbol(const char *modname,
|
||||
int (*fn)(void *, const char *,
|
||||
struct module *, unsigned long),
|
||||
void *data);
|
||||
#else
|
||||
static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
||||
static inline int module_kallsyms_on_each_symbol(const char *modname,
|
||||
int (*fn)(void *, const char *,
|
||||
struct module *, unsigned long),
|
||||
void *data)
|
||||
{
|
||||
|
@ -74,6 +74,7 @@ struct udp_tunnel_nic_info;
|
||||
struct udp_tunnel_nic;
|
||||
struct bpf_prog;
|
||||
struct xdp_buff;
|
||||
struct xdp_md;
|
||||
|
||||
void synchronize_net(void);
|
||||
void netdev_set_default_ethtool_ops(struct net_device *dev,
|
||||
@ -1618,6 +1619,11 @@ struct net_device_ops {
|
||||
bool cycles);
|
||||
};
|
||||
|
||||
struct xdp_metadata_ops {
|
||||
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
|
||||
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash);
|
||||
};
|
||||
|
||||
/**
|
||||
* enum netdev_priv_flags - &struct net_device priv_flags
|
||||
*
|
||||
@ -1801,6 +1807,7 @@ enum netdev_ml_priv_type {
|
||||
*
|
||||
* @netdev_ops: Includes several pointers to callbacks,
|
||||
* if one wants to override the ndo_*() functions
|
||||
* @xdp_metadata_ops: Includes pointers to XDP metadata callbacks.
|
||||
* @ethtool_ops: Management operations
|
||||
* @l3mdev_ops: Layer 3 master device operations
|
||||
* @ndisc_ops: Includes callbacks for different IPv6 neighbour
|
||||
@ -2050,6 +2057,7 @@ struct net_device {
|
||||
unsigned int flags;
|
||||
unsigned long long priv_flags;
|
||||
const struct net_device_ops *netdev_ops;
|
||||
const struct xdp_metadata_ops *xdp_metadata_ops;
|
||||
int ifindex;
|
||||
unsigned short gflags;
|
||||
unsigned short hard_header_len;
|
||||
|
@ -409,4 +409,25 @@ void xdp_attachment_setup(struct xdp_attachment_info *info,
|
||||
|
||||
#define DEV_MAP_BULK_SIZE XDP_BULK_QUEUE_SIZE
|
||||
|
||||
#define XDP_METADATA_KFUNC_xxx \
|
||||
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_TIMESTAMP, \
|
||||
bpf_xdp_metadata_rx_timestamp) \
|
||||
XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \
|
||||
bpf_xdp_metadata_rx_hash) \
|
||||
|
||||
enum {
|
||||
#define XDP_METADATA_KFUNC(name, _) name,
|
||||
XDP_METADATA_KFUNC_xxx
|
||||
#undef XDP_METADATA_KFUNC
|
||||
MAX_XDP_METADATA_KFUNC,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_NET
|
||||
u32 bpf_xdp_metadata_kfunc_id(int id);
|
||||
bool bpf_dev_bound_kfunc_id(u32 btf_id);
|
||||
#else
|
||||
static inline u32 bpf_xdp_metadata_kfunc_id(int id) { return 0; }
|
||||
static inline bool bpf_dev_bound_kfunc_id(u32 btf_id) { return false; }
|
||||
#endif
|
||||
|
||||
#endif /* __LINUX_NET_XDP_H__ */
|
||||
|
@ -19,8 +19,11 @@ struct xdp_sock;
|
||||
struct device;
|
||||
struct page;
|
||||
|
||||
#define XSK_PRIV_MAX 24
|
||||
|
||||
struct xdp_buff_xsk {
|
||||
struct xdp_buff xdp;
|
||||
u8 cb[XSK_PRIV_MAX];
|
||||
dma_addr_t dma;
|
||||
dma_addr_t frame_dma;
|
||||
struct xsk_buff_pool *pool;
|
||||
@ -28,6 +31,8 @@ struct xdp_buff_xsk {
|
||||
struct list_head free_list_node;
|
||||
};
|
||||
|
||||
#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
|
||||
|
||||
struct xsk_dma_map {
|
||||
dma_addr_t *dma_pages;
|
||||
struct device *dev;
|
||||
|
@ -1156,6 +1156,11 @@ enum bpf_link_type {
|
||||
*/
|
||||
#define BPF_F_XDP_HAS_FRAGS (1U << 5)
|
||||
|
||||
/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded
|
||||
* program becomes device-bound but can access XDP metadata.
|
||||
*/
|
||||
#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
|
||||
|
||||
/* link_create.kprobe_multi.flags used in LINK_CREATE command for
|
||||
* BPF_TRACE_KPROBE_MULTI attach type to create return probe.
|
||||
*/
|
||||
@ -2647,6 +2652,11 @@ union bpf_attr {
|
||||
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
|
||||
* L2 type as Ethernet.
|
||||
*
|
||||
* * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
|
||||
* **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
|
||||
* Indicate the new IP header version after decapsulating the outer
|
||||
* IP header. Used when the inner and outer IP versions are different.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
@ -5807,6 +5817,8 @@ enum {
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
|
||||
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
|
||||
BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
|
||||
BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -1939,7 +1939,7 @@ config RUST
|
||||
depends on !MODVERSIONS
|
||||
depends on !GCC_PLUGINS
|
||||
depends on !RANDSTRUCT
|
||||
depends on !DEBUG_INFO_BTF
|
||||
depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE
|
||||
select CONSTRUCTORS
|
||||
help
|
||||
Enables Rust support in the kernel.
|
||||
|
@ -36,6 +36,7 @@ obj-$(CONFIG_DEBUG_INFO_BTF) += sysfs_btf.o
|
||||
endif
|
||||
ifeq ($(CONFIG_BPF_JIT),y)
|
||||
obj-$(CONFIG_BPF_SYSCALL) += bpf_struct_ops.o
|
||||
obj-$(CONFIG_BPF_SYSCALL) += cpumask.o
|
||||
obj-${CONFIG_BPF_LSM} += bpf_lsm.o
|
||||
endif
|
||||
obj-$(CONFIG_BPF_PRELOAD) += preload/
|
||||
|
160
kernel/bpf/btf.c
160
kernel/bpf/btf.c
@ -336,6 +336,12 @@ const char *btf_type_str(const struct btf_type *t)
|
||||
/* Type name size */
|
||||
#define BTF_SHOW_NAME_SIZE 80
|
||||
|
||||
/*
|
||||
* The suffix of a type that indicates it cannot alias another type when
|
||||
* comparing BTF IDs for kfunc invocations.
|
||||
*/
|
||||
#define NOCAST_ALIAS_SUFFIX "___init"
|
||||
|
||||
/*
|
||||
* Common data to all BTF show operations. Private show functions can add
|
||||
* their own data to a structure containing a struct btf_show and consult it
|
||||
@ -1397,12 +1403,18 @@ __printf(4, 5) static void __btf_verifier_log_type(struct btf_verifier_env *env,
|
||||
if (!bpf_verifier_log_needed(log))
|
||||
return;
|
||||
|
||||
/* btf verifier prints all types it is processing via
|
||||
* btf_verifier_log_type(..., fmt = NULL).
|
||||
* Skip those prints for in-kernel BTF verification.
|
||||
*/
|
||||
if (log->level == BPF_LOG_KERNEL && !fmt)
|
||||
return;
|
||||
if (log->level == BPF_LOG_KERNEL) {
|
||||
/* btf verifier prints all types it is processing via
|
||||
* btf_verifier_log_type(..., fmt = NULL).
|
||||
* Skip those prints for in-kernel BTF verification.
|
||||
*/
|
||||
if (!fmt)
|
||||
return;
|
||||
|
||||
/* Skip logging when loading module BTF with mismatches permitted */
|
||||
if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
|
||||
return;
|
||||
}
|
||||
|
||||
__btf_verifier_log(log, "[%u] %s %s%s",
|
||||
env->log_type_id,
|
||||
@ -1441,8 +1453,15 @@ static void btf_verifier_log_member(struct btf_verifier_env *env,
|
||||
if (!bpf_verifier_log_needed(log))
|
||||
return;
|
||||
|
||||
if (log->level == BPF_LOG_KERNEL && !fmt)
|
||||
return;
|
||||
if (log->level == BPF_LOG_KERNEL) {
|
||||
if (!fmt)
|
||||
return;
|
||||
|
||||
/* Skip logging when loading module BTF with mismatches permitted */
|
||||
if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
|
||||
return;
|
||||
}
|
||||
|
||||
/* The CHECK_META phase already did a btf dump.
|
||||
*
|
||||
* If member is logged again, it must hit an error in
|
||||
@ -7261,11 +7280,14 @@ static int btf_module_notify(struct notifier_block *nb, unsigned long op,
|
||||
}
|
||||
btf = btf_parse_module(mod->name, mod->btf_data, mod->btf_data_size);
|
||||
if (IS_ERR(btf)) {
|
||||
pr_warn("failed to validate module [%s] BTF: %ld\n",
|
||||
mod->name, PTR_ERR(btf));
|
||||
kfree(btf_mod);
|
||||
if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH))
|
||||
if (!IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) {
|
||||
pr_warn("failed to validate module [%s] BTF: %ld\n",
|
||||
mod->name, PTR_ERR(btf));
|
||||
err = PTR_ERR(btf);
|
||||
} else {
|
||||
pr_warn_once("Kernel module BTF mismatch detected, BTF debug info may be unavailable for some modules\n");
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
err = btf_alloc_id(btf);
|
||||
@ -8211,3 +8233,119 @@ out:
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
bool btf_nested_type_is_trusted(struct bpf_verifier_log *log,
|
||||
const struct bpf_reg_state *reg,
|
||||
int off)
|
||||
{
|
||||
struct btf *btf = reg->btf;
|
||||
const struct btf_type *walk_type, *safe_type;
|
||||
const char *tname;
|
||||
char safe_tname[64];
|
||||
long ret, safe_id;
|
||||
const struct btf_member *member, *m_walk = NULL;
|
||||
u32 i;
|
||||
const char *walk_name;
|
||||
|
||||
walk_type = btf_type_by_id(btf, reg->btf_id);
|
||||
if (!walk_type)
|
||||
return false;
|
||||
|
||||
tname = btf_name_by_offset(btf, walk_type->name_off);
|
||||
|
||||
ret = snprintf(safe_tname, sizeof(safe_tname), "%s__safe_fields", tname);
|
||||
if (ret < 0)
|
||||
return false;
|
||||
|
||||
safe_id = btf_find_by_name_kind(btf, safe_tname, BTF_INFO_KIND(walk_type->info));
|
||||
if (safe_id < 0)
|
||||
return false;
|
||||
|
||||
safe_type = btf_type_by_id(btf, safe_id);
|
||||
if (!safe_type)
|
||||
return false;
|
||||
|
||||
for_each_member(i, walk_type, member) {
|
||||
u32 moff;
|
||||
|
||||
/* We're looking for the PTR_TO_BTF_ID member in the struct
|
||||
* type we're walking which matches the specified offset.
|
||||
* Below, we'll iterate over the fields in the safe variant of
|
||||
* the struct and see if any of them has a matching type /
|
||||
* name.
|
||||
*/
|
||||
moff = __btf_member_bit_offset(walk_type, member) / 8;
|
||||
if (off == moff) {
|
||||
m_walk = member;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (m_walk == NULL)
|
||||
return false;
|
||||
|
||||
walk_name = __btf_name_by_offset(btf, m_walk->name_off);
|
||||
for_each_member(i, safe_type, member) {
|
||||
const char *m_name = __btf_name_by_offset(btf, member->name_off);
|
||||
|
||||
/* If we match on both type and name, the field is considered trusted. */
|
||||
if (m_walk->type == member->type && !strcmp(walk_name, m_name))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool btf_type_ids_nocast_alias(struct bpf_verifier_log *log,
|
||||
const struct btf *reg_btf, u32 reg_id,
|
||||
const struct btf *arg_btf, u32 arg_id)
|
||||
{
|
||||
const char *reg_name, *arg_name, *search_needle;
|
||||
const struct btf_type *reg_type, *arg_type;
|
||||
int reg_len, arg_len, cmp_len;
|
||||
size_t pattern_len = sizeof(NOCAST_ALIAS_SUFFIX) - sizeof(char);
|
||||
|
||||
reg_type = btf_type_by_id(reg_btf, reg_id);
|
||||
if (!reg_type)
|
||||
return false;
|
||||
|
||||
arg_type = btf_type_by_id(arg_btf, arg_id);
|
||||
if (!arg_type)
|
||||
return false;
|
||||
|
||||
reg_name = btf_name_by_offset(reg_btf, reg_type->name_off);
|
||||
arg_name = btf_name_by_offset(arg_btf, arg_type->name_off);
|
||||
|
||||
reg_len = strlen(reg_name);
|
||||
arg_len = strlen(arg_name);
|
||||
|
||||
/* Exactly one of the two type names may be suffixed with ___init, so
|
||||
* if the strings are the same size, they can't possibly be no-cast
|
||||
* aliases of one another. If you have two of the same type names, e.g.
|
||||
* they're both nf_conn___init, it would be improper to return true
|
||||
* because they are _not_ no-cast aliases, they are the same type.
|
||||
*/
|
||||
if (reg_len == arg_len)
|
||||
return false;
|
||||
|
||||
/* Either of the two names must be the other name, suffixed with ___init. */
|
||||
if ((reg_len != arg_len + pattern_len) &&
|
||||
(arg_len != reg_len + pattern_len))
|
||||
return false;
|
||||
|
||||
if (reg_len < arg_len) {
|
||||
search_needle = strstr(arg_name, NOCAST_ALIAS_SUFFIX);
|
||||
cmp_len = reg_len;
|
||||
} else {
|
||||
search_needle = strstr(reg_name, NOCAST_ALIAS_SUFFIX);
|
||||
cmp_len = arg_len;
|
||||
}
|
||||
|
||||
if (!search_needle)
|
||||
return false;
|
||||
|
||||
/* ___init suffix must come at the end of the name */
|
||||
if (*(search_needle + pattern_len) != '\0')
|
||||
return false;
|
||||
|
||||
return !strncmp(reg_name, arg_name, cmp_len);
|
||||
}
|
||||
|
@ -2096,6 +2096,14 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
|
||||
if (fp->kprobe_override)
|
||||
return false;
|
||||
|
||||
/* XDP programs inserted into maps are not guaranteed to run on
|
||||
* a particular netdev (and can run outside driver context entirely
|
||||
* in the case of devmap and cpumap). Until device checks
|
||||
* are implemented, prohibit adding dev-bound programs to program maps.
|
||||
*/
|
||||
if (bpf_prog_is_dev_bound(fp->aux))
|
||||
return false;
|
||||
|
||||
spin_lock(&map->owner.lock);
|
||||
if (!map->owner.type) {
|
||||
/* There's no owner yet where we could check for
|
||||
@ -2182,7 +2190,7 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
|
||||
* valid program, which in this case would simply not
|
||||
* be JITed, but falls back to the interpreter.
|
||||
*/
|
||||
if (!bpf_prog_is_dev_bound(fp->aux)) {
|
||||
if (!bpf_prog_is_offloaded(fp->aux)) {
|
||||
*err = bpf_prog_alloc_jited_linfo(fp);
|
||||
if (*err)
|
||||
return fp;
|
||||
@ -2554,7 +2562,7 @@ static void bpf_prog_free_deferred(struct work_struct *work)
|
||||
bpf_free_used_maps(aux);
|
||||
bpf_free_used_btfs(aux);
|
||||
if (bpf_prog_is_dev_bound(aux))
|
||||
bpf_prog_offload_destroy(aux->prog);
|
||||
bpf_prog_dev_bound_destroy(aux->prog);
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
if (aux->prog->has_callchain_buf)
|
||||
put_callchain_buffers();
|
||||
|
476
kernel/bpf/cpumask.c
Normal file
476
kernel/bpf/cpumask.c
Normal file
@ -0,0 +1,476 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Copyright (c) 2023 Meta, Inc */
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/bpf_mem_alloc.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/cpumask.h>
|
||||
|
||||
/**
|
||||
* struct bpf_cpumask - refcounted BPF cpumask wrapper structure
|
||||
* @cpumask: The actual cpumask embedded in the struct.
|
||||
* @usage: Object reference counter. When the refcount goes to 0, the
|
||||
* memory is released back to the BPF allocator, which provides
|
||||
* RCU safety.
|
||||
*
|
||||
* Note that we explicitly embed a cpumask_t rather than a cpumask_var_t. This
|
||||
* is done to avoid confusing the verifier due to the typedef of cpumask_var_t
|
||||
* changing depending on whether CONFIG_CPUMASK_OFFSTACK is defined or not. See
|
||||
* the details in <linux/cpumask.h>. The consequence is that this structure is
|
||||
* likely a bit larger than it needs to be when CONFIG_CPUMASK_OFFSTACK is
|
||||
* defined due to embedding the whole NR_CPUS-size bitmap, but the extra memory
|
||||
* overhead is minimal. For the more typical case of CONFIG_CPUMASK_OFFSTACK
|
||||
* not being defined, the structure is the same size regardless.
|
||||
*/
|
||||
struct bpf_cpumask {
|
||||
cpumask_t cpumask;
|
||||
refcount_t usage;
|
||||
};
|
||||
|
||||
static struct bpf_mem_alloc bpf_cpumask_ma;
|
||||
|
||||
static bool cpu_valid(u32 cpu)
|
||||
{
|
||||
return cpu < nr_cpu_ids;
|
||||
}
|
||||
|
||||
__diag_push();
|
||||
__diag_ignore_all("-Wmissing-prototypes",
|
||||
"Global kfuncs as their definitions will be in BTF");
|
||||
|
||||
/**
|
||||
* bpf_cpumask_create() - Create a mutable BPF cpumask.
|
||||
*
|
||||
* Allocates a cpumask that can be queried, mutated, acquired, and released by
|
||||
* a BPF program. The cpumask returned by this function must either be embedded
|
||||
* in a map as a kptr, or freed with bpf_cpumask_release().
|
||||
*
|
||||
* bpf_cpumask_create() allocates memory using the BPF memory allocator, and
|
||||
* will not block. It may return NULL if no memory is available.
|
||||
*/
|
||||
struct bpf_cpumask *bpf_cpumask_create(void)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
cpumask = bpf_mem_alloc(&bpf_cpumask_ma, sizeof(*cpumask));
|
||||
if (!cpumask)
|
||||
return NULL;
|
||||
|
||||
memset(cpumask, 0, sizeof(*cpumask));
|
||||
refcount_set(&cpumask->usage, 1);
|
||||
|
||||
return cpumask;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_acquire() - Acquire a reference to a BPF cpumask.
|
||||
* @cpumask: The BPF cpumask being acquired. The cpumask must be a trusted
|
||||
* pointer.
|
||||
*
|
||||
* Acquires a reference to a BPF cpumask. The cpumask returned by this function
|
||||
* must either be embedded in a map as a kptr, or freed with
|
||||
* bpf_cpumask_release().
|
||||
*/
|
||||
struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask)
|
||||
{
|
||||
refcount_inc(&cpumask->usage);
|
||||
return cpumask;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_kptr_get() - Attempt to acquire a reference to a BPF cpumask
|
||||
* stored in a map.
|
||||
* @cpumaskp: A pointer to a BPF cpumask map value.
|
||||
*
|
||||
* Attempts to acquire a reference to a BPF cpumask stored in a map value. The
|
||||
* cpumask returned by this function must either be embedded in a map as a
|
||||
* kptr, or freed with bpf_cpumask_release(). This function may return NULL if
|
||||
* no BPF cpumask was found in the specified map value.
|
||||
*/
|
||||
struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumaskp)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
/* The BPF memory allocator frees memory backing its caches in an RCU
|
||||
* callback. Thus, we can safely use RCU to ensure that the cpumask is
|
||||
* safe to read.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
cpumask = READ_ONCE(*cpumaskp);
|
||||
if (cpumask && !refcount_inc_not_zero(&cpumask->usage))
|
||||
cpumask = NULL;
|
||||
|
||||
rcu_read_unlock();
|
||||
return cpumask;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_release() - Release a previously acquired BPF cpumask.
|
||||
* @cpumask: The cpumask being released.
|
||||
*
|
||||
* Releases a previously acquired reference to a BPF cpumask. When the final
|
||||
* reference of the BPF cpumask has been released, it is subsequently freed in
|
||||
* an RCU callback in the BPF memory allocator.
|
||||
*/
|
||||
void bpf_cpumask_release(struct bpf_cpumask *cpumask)
|
||||
{
|
||||
if (!cpumask)
|
||||
return;
|
||||
|
||||
if (refcount_dec_and_test(&cpumask->usage)) {
|
||||
migrate_disable();
|
||||
bpf_mem_free(&bpf_cpumask_ma, cpumask);
|
||||
migrate_enable();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_first() - Get the index of the first nonzero bit in the cpumask.
|
||||
* @cpumask: The cpumask being queried.
|
||||
*
|
||||
* Find the index of the first nonzero bit of the cpumask. A struct bpf_cpumask
|
||||
* pointer may be safely passed to this function.
|
||||
*/
|
||||
u32 bpf_cpumask_first(const struct cpumask *cpumask)
|
||||
{
|
||||
return cpumask_first(cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_first_zero() - Get the index of the first unset bit in the
|
||||
* cpumask.
|
||||
* @cpumask: The cpumask being queried.
|
||||
*
|
||||
* Find the index of the first unset bit of the cpumask. A struct bpf_cpumask
|
||||
* pointer may be safely passed to this function.
|
||||
*/
|
||||
u32 bpf_cpumask_first_zero(const struct cpumask *cpumask)
|
||||
{
|
||||
return cpumask_first_zero(cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_set_cpu() - Set a bit for a CPU in a BPF cpumask.
|
||||
* @cpu: The CPU to be set in the cpumask.
|
||||
* @cpumask: The BPF cpumask in which a bit is being set.
|
||||
*/
|
||||
void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
|
||||
{
|
||||
if (!cpu_valid(cpu))
|
||||
return;
|
||||
|
||||
cpumask_set_cpu(cpu, (struct cpumask *)cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_clear_cpu() - Clear a bit for a CPU in a BPF cpumask.
|
||||
* @cpu: The CPU to be cleared from the cpumask.
|
||||
* @cpumask: The BPF cpumask in which a bit is being cleared.
|
||||
*/
|
||||
void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
|
||||
{
|
||||
if (!cpu_valid(cpu))
|
||||
return;
|
||||
|
||||
cpumask_clear_cpu(cpu, (struct cpumask *)cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_test_cpu() - Test whether a CPU is set in a cpumask.
|
||||
* @cpu: The CPU being queried for.
|
||||
* @cpumask: The cpumask being queried for containing a CPU.
|
||||
*
|
||||
* Return:
|
||||
* * true - @cpu is set in the cpumask
|
||||
* * false - @cpu was not set in the cpumask, or @cpu is an invalid cpu.
|
||||
*/
|
||||
bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask)
|
||||
{
|
||||
if (!cpu_valid(cpu))
|
||||
return false;
|
||||
|
||||
return cpumask_test_cpu(cpu, (struct cpumask *)cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_test_and_set_cpu() - Atomically test and set a CPU in a BPF cpumask.
|
||||
* @cpu: The CPU being set and queried for.
|
||||
* @cpumask: The BPF cpumask being set and queried for containing a CPU.
|
||||
*
|
||||
* Return:
|
||||
* * true - @cpu is set in the cpumask
|
||||
* * false - @cpu was not set in the cpumask, or @cpu is invalid.
|
||||
*/
|
||||
bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask)
|
||||
{
|
||||
if (!cpu_valid(cpu))
|
||||
return false;
|
||||
|
||||
return cpumask_test_and_set_cpu(cpu, (struct cpumask *)cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_test_and_clear_cpu() - Atomically test and clear a CPU in a BPF
|
||||
* cpumask.
|
||||
* @cpu: The CPU being cleared and queried for.
|
||||
* @cpumask: The BPF cpumask being cleared and queried for containing a CPU.
|
||||
*
|
||||
* Return:
|
||||
* * true - @cpu is set in the cpumask
|
||||
* * false - @cpu was not set in the cpumask, or @cpu is invalid.
|
||||
*/
|
||||
bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask)
|
||||
{
|
||||
if (!cpu_valid(cpu))
|
||||
return false;
|
||||
|
||||
return cpumask_test_and_clear_cpu(cpu, (struct cpumask *)cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_setall() - Set all of the bits in a BPF cpumask.
|
||||
* @cpumask: The BPF cpumask having all of its bits set.
|
||||
*/
|
||||
void bpf_cpumask_setall(struct bpf_cpumask *cpumask)
|
||||
{
|
||||
cpumask_setall((struct cpumask *)cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_clear() - Clear all of the bits in a BPF cpumask.
|
||||
* @cpumask: The BPF cpumask being cleared.
|
||||
*/
|
||||
void bpf_cpumask_clear(struct bpf_cpumask *cpumask)
|
||||
{
|
||||
cpumask_clear((struct cpumask *)cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_and() - AND two cpumasks and store the result.
|
||||
* @dst: The BPF cpumask where the result is being stored.
|
||||
* @src1: The first input.
|
||||
* @src2: The second input.
|
||||
*
|
||||
* Return:
|
||||
* * true - @dst has at least one bit set following the operation
|
||||
* * false - @dst is empty following the operation
|
||||
*
|
||||
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
|
||||
*/
|
||||
bool bpf_cpumask_and(struct bpf_cpumask *dst,
|
||||
const struct cpumask *src1,
|
||||
const struct cpumask *src2)
|
||||
{
|
||||
return cpumask_and((struct cpumask *)dst, src1, src2);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_or() - OR two cpumasks and store the result.
|
||||
* @dst: The BPF cpumask where the result is being stored.
|
||||
* @src1: The first input.
|
||||
* @src2: The second input.
|
||||
*
|
||||
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
|
||||
*/
|
||||
void bpf_cpumask_or(struct bpf_cpumask *dst,
|
||||
const struct cpumask *src1,
|
||||
const struct cpumask *src2)
|
||||
{
|
||||
cpumask_or((struct cpumask *)dst, src1, src2);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_xor() - XOR two cpumasks and store the result.
|
||||
* @dst: The BPF cpumask where the result is being stored.
|
||||
* @src1: The first input.
|
||||
* @src2: The second input.
|
||||
*
|
||||
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
|
||||
*/
|
||||
void bpf_cpumask_xor(struct bpf_cpumask *dst,
|
||||
const struct cpumask *src1,
|
||||
const struct cpumask *src2)
|
||||
{
|
||||
cpumask_xor((struct cpumask *)dst, src1, src2);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_equal() - Check two cpumasks for equality.
|
||||
* @src1: The first input.
|
||||
* @src2: The second input.
|
||||
*
|
||||
* Return:
|
||||
* * true - @src1 and @src2 have the same bits set.
|
||||
* * false - @src1 and @src2 differ in at least one bit.
|
||||
*
|
||||
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
|
||||
*/
|
||||
bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2)
|
||||
{
|
||||
return cpumask_equal(src1, src2);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_intersects() - Check two cpumasks for overlap.
|
||||
* @src1: The first input.
|
||||
* @src2: The second input.
|
||||
*
|
||||
* Return:
|
||||
* * true - @src1 and @src2 have at least one of the same bits set.
|
||||
* * false - @src1 and @src2 don't have any of the same bits set.
|
||||
*
|
||||
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
|
||||
*/
|
||||
bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2)
|
||||
{
|
||||
return cpumask_intersects(src1, src2);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_subset() - Check if a cpumask is a subset of another.
|
||||
* @src1: The first cpumask being checked as a subset.
|
||||
* @src2: The second cpumask being checked as a superset.
|
||||
*
|
||||
* Return:
|
||||
* * true - All of the bits of @src1 are set in @src2.
|
||||
* * false - At least one bit in @src1 is not set in @src2.
|
||||
*
|
||||
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
|
||||
*/
|
||||
bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2)
|
||||
{
|
||||
return cpumask_subset(src1, src2);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_empty() - Check if a cpumask is empty.
|
||||
* @cpumask: The cpumask being checked.
|
||||
*
|
||||
* Return:
|
||||
* * true - None of the bits in @cpumask are set.
|
||||
* * false - At least one bit in @cpumask is set.
|
||||
*
|
||||
* A struct bpf_cpumask pointer may be safely passed to @cpumask.
|
||||
*/
|
||||
bool bpf_cpumask_empty(const struct cpumask *cpumask)
|
||||
{
|
||||
return cpumask_empty(cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_full() - Check if a cpumask has all bits set.
|
||||
* @cpumask: The cpumask being checked.
|
||||
*
|
||||
* Return:
|
||||
* * true - All of the bits in @cpumask are set.
|
||||
* * false - At least one bit in @cpumask is cleared.
|
||||
*
|
||||
* A struct bpf_cpumask pointer may be safely passed to @cpumask.
|
||||
*/
|
||||
bool bpf_cpumask_full(const struct cpumask *cpumask)
|
||||
{
|
||||
return cpumask_full(cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_copy() - Copy the contents of a cpumask into a BPF cpumask.
|
||||
* @dst: The BPF cpumask being copied into.
|
||||
* @src: The cpumask being copied.
|
||||
*
|
||||
* A struct bpf_cpumask pointer may be safely passed to @src.
|
||||
*/
|
||||
void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src)
|
||||
{
|
||||
cpumask_copy((struct cpumask *)dst, src);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_any() - Return a random set CPU from a cpumask.
|
||||
* @cpumask: The cpumask being queried.
|
||||
*
|
||||
* Return:
|
||||
* * A random set bit within [0, num_cpus) if at least one bit is set.
|
||||
* * >= num_cpus if no bit is set.
|
||||
*
|
||||
* A struct bpf_cpumask pointer may be safely passed to @src.
|
||||
*/
|
||||
u32 bpf_cpumask_any(const struct cpumask *cpumask)
|
||||
{
|
||||
return cpumask_any(cpumask);
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_cpumask_any_and() - Return a random set CPU from the AND of two
|
||||
* cpumasks.
|
||||
* @src1: The first cpumask.
|
||||
* @src2: The second cpumask.
|
||||
*
|
||||
* Return:
|
||||
* * A random set bit within [0, num_cpus) if at least one bit is set.
|
||||
* * >= num_cpus if no bit is set.
|
||||
*
|
||||
* struct bpf_cpumask pointers may be safely passed to @src1 and @src2.
|
||||
*/
|
||||
u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2)
|
||||
{
|
||||
return cpumask_any_and(src1, src2);
|
||||
}
|
||||
|
||||
__diag_pop();
|
||||
|
||||
BTF_SET8_START(cpumask_kfunc_btf_ids)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_create, KF_ACQUIRE | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_release, KF_RELEASE | KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_first, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_first_zero, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_set_cpu, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_clear_cpu, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_test_cpu, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_test_and_set_cpu, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_test_and_clear_cpu, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_setall, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_clear, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_and, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_or, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_xor, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_equal, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_intersects, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_subset, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_empty, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_full, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_any, KF_TRUSTED_ARGS)
|
||||
BTF_ID_FLAGS(func, bpf_cpumask_any_and, KF_TRUSTED_ARGS)
|
||||
BTF_SET8_END(cpumask_kfunc_btf_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set cpumask_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &cpumask_kfunc_btf_ids,
|
||||
};
|
||||
|
||||
BTF_ID_LIST(cpumask_dtor_ids)
|
||||
BTF_ID(struct, bpf_cpumask)
|
||||
BTF_ID(func, bpf_cpumask_release)
|
||||
|
||||
static int __init cpumask_kfunc_init(void)
|
||||
{
|
||||
int ret;
|
||||
const struct btf_id_dtor_kfunc cpumask_dtors[] = {
|
||||
{
|
||||
.btf_id = cpumask_dtor_ids[0],
|
||||
.kfunc_btf_id = cpumask_dtor_ids[1]
|
||||
},
|
||||
};
|
||||
|
||||
ret = bpf_mem_alloc_init(&bpf_cpumask_ma, 0, false);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &cpumask_kfunc_set);
|
||||
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &cpumask_kfunc_set);
|
||||
return ret ?: register_btf_id_dtor_kfuncs(cpumask_dtors,
|
||||
ARRAY_SIZE(cpumask_dtors),
|
||||
THIS_MODULE);
|
||||
}
|
||||
|
||||
late_initcall(cpumask_kfunc_init);
|
@ -41,7 +41,7 @@ struct bpf_offload_dev {
|
||||
struct bpf_offload_netdev {
|
||||
struct rhash_head l;
|
||||
struct net_device *netdev;
|
||||
struct bpf_offload_dev *offdev;
|
||||
struct bpf_offload_dev *offdev; /* NULL when bound-only */
|
||||
struct list_head progs;
|
||||
struct list_head maps;
|
||||
struct list_head offdev_netdevs;
|
||||
@ -56,7 +56,6 @@ static const struct rhashtable_params offdevs_params = {
|
||||
};
|
||||
|
||||
static struct rhashtable offdevs;
|
||||
static bool offdevs_inited;
|
||||
|
||||
static int bpf_dev_offload_check(struct net_device *netdev)
|
||||
{
|
||||
@ -72,58 +71,218 @@ bpf_offload_find_netdev(struct net_device *netdev)
|
||||
{
|
||||
lockdep_assert_held(&bpf_devs_lock);
|
||||
|
||||
if (!offdevs_inited)
|
||||
return NULL;
|
||||
return rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
|
||||
}
|
||||
|
||||
int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr)
|
||||
static int __bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
|
||||
struct net_device *netdev)
|
||||
{
|
||||
struct bpf_offload_netdev *ondev;
|
||||
int err;
|
||||
|
||||
ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
|
||||
if (!ondev)
|
||||
return -ENOMEM;
|
||||
|
||||
ondev->netdev = netdev;
|
||||
ondev->offdev = offdev;
|
||||
INIT_LIST_HEAD(&ondev->progs);
|
||||
INIT_LIST_HEAD(&ondev->maps);
|
||||
|
||||
err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
|
||||
if (err) {
|
||||
netdev_warn(netdev, "failed to register for BPF offload\n");
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
if (offdev)
|
||||
list_add(&ondev->offdev_netdevs, &offdev->netdevs);
|
||||
return 0;
|
||||
|
||||
err_free:
|
||||
kfree(ondev);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_prog_offload *offload = prog->aux->offload;
|
||||
|
||||
if (offload->dev_state)
|
||||
offload->offdev->ops->destroy(prog);
|
||||
|
||||
list_del_init(&offload->offloads);
|
||||
kfree(offload);
|
||||
prog->aux->offload = NULL;
|
||||
}
|
||||
|
||||
static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
|
||||
enum bpf_netdev_command cmd)
|
||||
{
|
||||
struct netdev_bpf data = {};
|
||||
struct net_device *netdev;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
data.command = cmd;
|
||||
data.offmap = offmap;
|
||||
/* Caller must make sure netdev is valid */
|
||||
netdev = offmap->netdev;
|
||||
|
||||
return netdev->netdev_ops->ndo_bpf(netdev, &data);
|
||||
}
|
||||
|
||||
static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
|
||||
{
|
||||
WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
|
||||
/* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
|
||||
bpf_map_free_id(&offmap->map, true);
|
||||
list_del_init(&offmap->offloads);
|
||||
offmap->netdev = NULL;
|
||||
}
|
||||
|
||||
static void __bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
|
||||
struct net_device *netdev)
|
||||
{
|
||||
struct bpf_offload_netdev *ondev, *altdev = NULL;
|
||||
struct bpf_offloaded_map *offmap, *mtmp;
|
||||
struct bpf_prog_offload *offload, *ptmp;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
|
||||
if (WARN_ON(!ondev))
|
||||
return;
|
||||
|
||||
WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
|
||||
|
||||
/* Try to move the objects to another netdev of the device */
|
||||
if (offdev) {
|
||||
list_del(&ondev->offdev_netdevs);
|
||||
altdev = list_first_entry_or_null(&offdev->netdevs,
|
||||
struct bpf_offload_netdev,
|
||||
offdev_netdevs);
|
||||
}
|
||||
|
||||
if (altdev) {
|
||||
list_for_each_entry(offload, &ondev->progs, offloads)
|
||||
offload->netdev = altdev->netdev;
|
||||
list_splice_init(&ondev->progs, &altdev->progs);
|
||||
|
||||
list_for_each_entry(offmap, &ondev->maps, offloads)
|
||||
offmap->netdev = altdev->netdev;
|
||||
list_splice_init(&ondev->maps, &altdev->maps);
|
||||
} else {
|
||||
list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
|
||||
__bpf_prog_offload_destroy(offload->prog);
|
||||
list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
|
||||
__bpf_map_offload_destroy(offmap);
|
||||
}
|
||||
|
||||
WARN_ON(!list_empty(&ondev->progs));
|
||||
WARN_ON(!list_empty(&ondev->maps));
|
||||
kfree(ondev);
|
||||
}
|
||||
|
||||
static int __bpf_prog_dev_bound_init(struct bpf_prog *prog, struct net_device *netdev)
|
||||
{
|
||||
struct bpf_offload_netdev *ondev;
|
||||
struct bpf_prog_offload *offload;
|
||||
int err;
|
||||
|
||||
if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
|
||||
attr->prog_type != BPF_PROG_TYPE_XDP)
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->prog_flags)
|
||||
return -EINVAL;
|
||||
|
||||
offload = kzalloc(sizeof(*offload), GFP_USER);
|
||||
if (!offload)
|
||||
return -ENOMEM;
|
||||
|
||||
offload->prog = prog;
|
||||
offload->netdev = netdev;
|
||||
|
||||
offload->netdev = dev_get_by_index(current->nsproxy->net_ns,
|
||||
attr->prog_ifindex);
|
||||
err = bpf_dev_offload_check(offload->netdev);
|
||||
if (err)
|
||||
goto err_maybe_put;
|
||||
|
||||
down_write(&bpf_devs_lock);
|
||||
ondev = bpf_offload_find_netdev(offload->netdev);
|
||||
if (!ondev) {
|
||||
err = -EINVAL;
|
||||
goto err_unlock;
|
||||
if (bpf_prog_is_offloaded(prog->aux)) {
|
||||
err = -EINVAL;
|
||||
goto err_free;
|
||||
}
|
||||
|
||||
/* When only binding to the device, explicitly
|
||||
* create an entry in the hashtable.
|
||||
*/
|
||||
err = __bpf_offload_dev_netdev_register(NULL, offload->netdev);
|
||||
if (err)
|
||||
goto err_free;
|
||||
ondev = bpf_offload_find_netdev(offload->netdev);
|
||||
}
|
||||
offload->offdev = ondev->offdev;
|
||||
prog->aux->offload = offload;
|
||||
list_add_tail(&offload->offloads, &ondev->progs);
|
||||
dev_put(offload->netdev);
|
||||
up_write(&bpf_devs_lock);
|
||||
|
||||
return 0;
|
||||
err_unlock:
|
||||
up_write(&bpf_devs_lock);
|
||||
err_maybe_put:
|
||||
if (offload->netdev)
|
||||
dev_put(offload->netdev);
|
||||
err_free:
|
||||
kfree(offload);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_prog_dev_bound_init(struct bpf_prog *prog, union bpf_attr *attr)
|
||||
{
|
||||
struct net_device *netdev;
|
||||
int err;
|
||||
|
||||
if (attr->prog_type != BPF_PROG_TYPE_SCHED_CLS &&
|
||||
attr->prog_type != BPF_PROG_TYPE_XDP)
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->prog_flags & ~BPF_F_XDP_DEV_BOUND_ONLY)
|
||||
return -EINVAL;
|
||||
|
||||
if (attr->prog_type == BPF_PROG_TYPE_SCHED_CLS &&
|
||||
attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY)
|
||||
return -EINVAL;
|
||||
|
||||
netdev = dev_get_by_index(current->nsproxy->net_ns, attr->prog_ifindex);
|
||||
if (!netdev)
|
||||
return -EINVAL;
|
||||
|
||||
err = bpf_dev_offload_check(netdev);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
prog->aux->offload_requested = !(attr->prog_flags & BPF_F_XDP_DEV_BOUND_ONLY);
|
||||
|
||||
down_write(&bpf_devs_lock);
|
||||
err = __bpf_prog_dev_bound_init(prog, netdev);
|
||||
up_write(&bpf_devs_lock);
|
||||
|
||||
out:
|
||||
dev_put(netdev);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_prog_dev_bound_inherit(struct bpf_prog *new_prog, struct bpf_prog *old_prog)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!bpf_prog_is_dev_bound(old_prog->aux))
|
||||
return 0;
|
||||
|
||||
if (bpf_prog_is_offloaded(old_prog->aux))
|
||||
return -EINVAL;
|
||||
|
||||
new_prog->aux->dev_bound = old_prog->aux->dev_bound;
|
||||
new_prog->aux->offload_requested = old_prog->aux->offload_requested;
|
||||
|
||||
down_write(&bpf_devs_lock);
|
||||
if (!old_prog->aux->offload) {
|
||||
err = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = __bpf_prog_dev_bound_init(new_prog, old_prog->aux->offload->netdev);
|
||||
|
||||
out:
|
||||
up_write(&bpf_devs_lock);
|
||||
return err;
|
||||
}
|
||||
|
||||
int bpf_prog_offload_verifier_prep(struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_prog_offload *offload;
|
||||
@ -209,24 +368,25 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
|
||||
up_read(&bpf_devs_lock);
|
||||
}
|
||||
|
||||
static void __bpf_prog_offload_destroy(struct bpf_prog *prog)
|
||||
void bpf_prog_dev_bound_destroy(struct bpf_prog *prog)
|
||||
{
|
||||
struct bpf_prog_offload *offload = prog->aux->offload;
|
||||
struct bpf_offload_netdev *ondev;
|
||||
struct net_device *netdev;
|
||||
|
||||
if (offload->dev_state)
|
||||
offload->offdev->ops->destroy(prog);
|
||||
|
||||
list_del_init(&offload->offloads);
|
||||
kfree(offload);
|
||||
prog->aux->offload = NULL;
|
||||
}
|
||||
|
||||
void bpf_prog_offload_destroy(struct bpf_prog *prog)
|
||||
{
|
||||
rtnl_lock();
|
||||
down_write(&bpf_devs_lock);
|
||||
if (prog->aux->offload)
|
||||
if (prog->aux->offload) {
|
||||
list_del_init(&prog->aux->offload->offloads);
|
||||
|
||||
netdev = prog->aux->offload->netdev;
|
||||
__bpf_prog_offload_destroy(prog);
|
||||
|
||||
ondev = bpf_offload_find_netdev(netdev);
|
||||
if (!ondev->offdev && list_empty(&ondev->progs))
|
||||
__bpf_offload_dev_netdev_unregister(NULL, netdev);
|
||||
}
|
||||
up_write(&bpf_devs_lock);
|
||||
rtnl_unlock();
|
||||
}
|
||||
|
||||
static int bpf_prog_offload_translate(struct bpf_prog *prog)
|
||||
@ -340,22 +500,6 @@ int bpf_prog_offload_info_fill(struct bpf_prog_info *info,
|
||||
const struct bpf_prog_ops bpf_offload_prog_ops = {
|
||||
};
|
||||
|
||||
static int bpf_map_offload_ndo(struct bpf_offloaded_map *offmap,
|
||||
enum bpf_netdev_command cmd)
|
||||
{
|
||||
struct netdev_bpf data = {};
|
||||
struct net_device *netdev;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
data.command = cmd;
|
||||
data.offmap = offmap;
|
||||
/* Caller must make sure netdev is valid */
|
||||
netdev = offmap->netdev;
|
||||
|
||||
return netdev->netdev_ops->ndo_bpf(netdev, &data);
|
||||
}
|
||||
|
||||
struct bpf_map *bpf_map_offload_map_alloc(union bpf_attr *attr)
|
||||
{
|
||||
struct net *net = current->nsproxy->net_ns;
|
||||
@ -405,15 +549,6 @@ err_unlock:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static void __bpf_map_offload_destroy(struct bpf_offloaded_map *offmap)
|
||||
{
|
||||
WARN_ON(bpf_map_offload_ndo(offmap, BPF_OFFLOAD_MAP_FREE));
|
||||
/* Make sure BPF_MAP_GET_NEXT_ID can't find this dead map */
|
||||
bpf_map_free_id(&offmap->map, true);
|
||||
list_del_init(&offmap->offloads);
|
||||
offmap->netdev = NULL;
|
||||
}
|
||||
|
||||
void bpf_map_offload_map_free(struct bpf_map *map)
|
||||
{
|
||||
struct bpf_offloaded_map *offmap = map_to_offmap(map);
|
||||
@ -573,12 +708,28 @@ bool bpf_offload_dev_match(struct bpf_prog *prog, struct net_device *netdev)
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_offload_dev_match);
|
||||
|
||||
bool bpf_prog_dev_bound_match(const struct bpf_prog *lhs, const struct bpf_prog *rhs)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if (bpf_prog_is_offloaded(lhs->aux) != bpf_prog_is_offloaded(rhs->aux))
|
||||
return false;
|
||||
|
||||
down_read(&bpf_devs_lock);
|
||||
ret = lhs->aux->offload && rhs->aux->offload &&
|
||||
lhs->aux->offload->netdev &&
|
||||
lhs->aux->offload->netdev == rhs->aux->offload->netdev;
|
||||
up_read(&bpf_devs_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
|
||||
{
|
||||
struct bpf_offloaded_map *offmap;
|
||||
bool ret;
|
||||
|
||||
if (!bpf_map_is_dev_bound(map))
|
||||
if (!bpf_map_is_offloaded(map))
|
||||
return bpf_map_offload_neutral(map);
|
||||
offmap = map_to_offmap(map);
|
||||
|
||||
@ -592,32 +743,11 @@ bool bpf_offload_prog_map_match(struct bpf_prog *prog, struct bpf_map *map)
|
||||
int bpf_offload_dev_netdev_register(struct bpf_offload_dev *offdev,
|
||||
struct net_device *netdev)
|
||||
{
|
||||
struct bpf_offload_netdev *ondev;
|
||||
int err;
|
||||
|
||||
ondev = kzalloc(sizeof(*ondev), GFP_KERNEL);
|
||||
if (!ondev)
|
||||
return -ENOMEM;
|
||||
|
||||
ondev->netdev = netdev;
|
||||
ondev->offdev = offdev;
|
||||
INIT_LIST_HEAD(&ondev->progs);
|
||||
INIT_LIST_HEAD(&ondev->maps);
|
||||
|
||||
down_write(&bpf_devs_lock);
|
||||
err = rhashtable_insert_fast(&offdevs, &ondev->l, offdevs_params);
|
||||
if (err) {
|
||||
netdev_warn(netdev, "failed to register for BPF offload\n");
|
||||
goto err_unlock_free;
|
||||
}
|
||||
|
||||
list_add(&ondev->offdev_netdevs, &offdev->netdevs);
|
||||
err = __bpf_offload_dev_netdev_register(offdev, netdev);
|
||||
up_write(&bpf_devs_lock);
|
||||
return 0;
|
||||
|
||||
err_unlock_free:
|
||||
up_write(&bpf_devs_lock);
|
||||
kfree(ondev);
|
||||
return err;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
|
||||
@ -625,43 +755,8 @@ EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_register);
|
||||
void bpf_offload_dev_netdev_unregister(struct bpf_offload_dev *offdev,
|
||||
struct net_device *netdev)
|
||||
{
|
||||
struct bpf_offload_netdev *ondev, *altdev;
|
||||
struct bpf_offloaded_map *offmap, *mtmp;
|
||||
struct bpf_prog_offload *offload, *ptmp;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
down_write(&bpf_devs_lock);
|
||||
ondev = rhashtable_lookup_fast(&offdevs, &netdev, offdevs_params);
|
||||
if (WARN_ON(!ondev))
|
||||
goto unlock;
|
||||
|
||||
WARN_ON(rhashtable_remove_fast(&offdevs, &ondev->l, offdevs_params));
|
||||
list_del(&ondev->offdev_netdevs);
|
||||
|
||||
/* Try to move the objects to another netdev of the device */
|
||||
altdev = list_first_entry_or_null(&offdev->netdevs,
|
||||
struct bpf_offload_netdev,
|
||||
offdev_netdevs);
|
||||
if (altdev) {
|
||||
list_for_each_entry(offload, &ondev->progs, offloads)
|
||||
offload->netdev = altdev->netdev;
|
||||
list_splice_init(&ondev->progs, &altdev->progs);
|
||||
|
||||
list_for_each_entry(offmap, &ondev->maps, offloads)
|
||||
offmap->netdev = altdev->netdev;
|
||||
list_splice_init(&ondev->maps, &altdev->maps);
|
||||
} else {
|
||||
list_for_each_entry_safe(offload, ptmp, &ondev->progs, offloads)
|
||||
__bpf_prog_offload_destroy(offload->prog);
|
||||
list_for_each_entry_safe(offmap, mtmp, &ondev->maps, offloads)
|
||||
__bpf_map_offload_destroy(offmap);
|
||||
}
|
||||
|
||||
WARN_ON(!list_empty(&ondev->progs));
|
||||
WARN_ON(!list_empty(&ondev->maps));
|
||||
kfree(ondev);
|
||||
unlock:
|
||||
__bpf_offload_dev_netdev_unregister(offdev, netdev);
|
||||
up_write(&bpf_devs_lock);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_offload_dev_netdev_unregister);
|
||||
@ -670,18 +765,6 @@ struct bpf_offload_dev *
|
||||
bpf_offload_dev_create(const struct bpf_prog_offload_ops *ops, void *priv)
|
||||
{
|
||||
struct bpf_offload_dev *offdev;
|
||||
int err;
|
||||
|
||||
down_write(&bpf_devs_lock);
|
||||
if (!offdevs_inited) {
|
||||
err = rhashtable_init(&offdevs, &offdevs_params);
|
||||
if (err) {
|
||||
up_write(&bpf_devs_lock);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
offdevs_inited = true;
|
||||
}
|
||||
up_write(&bpf_devs_lock);
|
||||
|
||||
offdev = kzalloc(sizeof(*offdev), GFP_KERNEL);
|
||||
if (!offdev)
|
||||
@ -707,3 +790,67 @@ void *bpf_offload_dev_priv(struct bpf_offload_dev *offdev)
|
||||
return offdev->priv;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(bpf_offload_dev_priv);
|
||||
|
||||
void bpf_dev_bound_netdev_unregister(struct net_device *dev)
|
||||
{
|
||||
struct bpf_offload_netdev *ondev;
|
||||
|
||||
ASSERT_RTNL();
|
||||
|
||||
down_write(&bpf_devs_lock);
|
||||
ondev = bpf_offload_find_netdev(dev);
|
||||
if (ondev && !ondev->offdev)
|
||||
__bpf_offload_dev_netdev_unregister(NULL, ondev->netdev);
|
||||
up_write(&bpf_devs_lock);
|
||||
}
|
||||
|
||||
int bpf_dev_bound_kfunc_check(struct bpf_verifier_log *log,
|
||||
struct bpf_prog_aux *prog_aux)
|
||||
{
|
||||
if (!bpf_prog_is_dev_bound(prog_aux)) {
|
||||
bpf_log(log, "metadata kfuncs require device-bound program\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (bpf_prog_is_offloaded(prog_aux)) {
|
||||
bpf_log(log, "metadata kfuncs can't be offloaded\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id)
|
||||
{
|
||||
const struct xdp_metadata_ops *ops;
|
||||
void *p = NULL;
|
||||
|
||||
/* We don't hold bpf_devs_lock while resolving several
|
||||
* kfuncs and can race with the unregister_netdevice().
|
||||
* We rely on bpf_dev_bound_match() check at attach
|
||||
* to render this program unusable.
|
||||
*/
|
||||
down_read(&bpf_devs_lock);
|
||||
if (!prog->aux->offload)
|
||||
goto out;
|
||||
|
||||
ops = prog->aux->offload->netdev->xdp_metadata_ops;
|
||||
if (!ops)
|
||||
goto out;
|
||||
|
||||
if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_TIMESTAMP))
|
||||
p = ops->xmo_rx_timestamp;
|
||||
else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH))
|
||||
p = ops->xmo_rx_hash;
|
||||
out:
|
||||
up_read(&bpf_devs_lock);
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
static int __init bpf_offload_init(void)
|
||||
{
|
||||
return rhashtable_init(&offdevs, &offdevs_params);
|
||||
}
|
||||
|
||||
late_initcall(bpf_offload_init);
|
||||
|
@ -181,7 +181,7 @@ static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
|
||||
int err;
|
||||
|
||||
/* Need to create a kthread, thus must support schedule */
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
if (bpf_map_is_offloaded(map)) {
|
||||
return bpf_map_offload_update_elem(map, key, value, flags);
|
||||
} else if (map->map_type == BPF_MAP_TYPE_CPUMAP ||
|
||||
map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
|
||||
@ -238,7 +238,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
|
||||
void *ptr;
|
||||
int err;
|
||||
|
||||
if (bpf_map_is_dev_bound(map))
|
||||
if (bpf_map_is_offloaded(map))
|
||||
return bpf_map_offload_lookup_elem(map, key, value);
|
||||
|
||||
bpf_disable_instrumentation();
|
||||
@ -1483,7 +1483,7 @@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
|
||||
goto err_put;
|
||||
}
|
||||
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
if (bpf_map_is_offloaded(map)) {
|
||||
err = bpf_map_offload_delete_elem(map, key);
|
||||
goto out;
|
||||
} else if (IS_FD_PROG_ARRAY(map) ||
|
||||
@ -1547,7 +1547,7 @@ static int map_get_next_key(union bpf_attr *attr)
|
||||
if (!next_key)
|
||||
goto free_key;
|
||||
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
if (bpf_map_is_offloaded(map)) {
|
||||
err = bpf_map_offload_get_next_key(map, key, next_key);
|
||||
goto out;
|
||||
}
|
||||
@ -1605,7 +1605,7 @@ int generic_map_delete_batch(struct bpf_map *map,
|
||||
map->key_size))
|
||||
break;
|
||||
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
if (bpf_map_is_offloaded(map)) {
|
||||
err = bpf_map_offload_delete_elem(map, key);
|
||||
break;
|
||||
}
|
||||
@ -1851,7 +1851,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
|
||||
map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_HASH ||
|
||||
map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
|
||||
if (!bpf_map_is_dev_bound(map)) {
|
||||
if (!bpf_map_is_offloaded(map)) {
|
||||
bpf_disable_instrumentation();
|
||||
rcu_read_lock();
|
||||
err = map->ops->map_lookup_and_delete_elem(map, key, value, attr->flags);
|
||||
@ -1944,7 +1944,7 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
|
||||
if (!ops)
|
||||
return -EINVAL;
|
||||
|
||||
if (!bpf_prog_is_dev_bound(prog->aux))
|
||||
if (!bpf_prog_is_offloaded(prog->aux))
|
||||
prog->aux->ops = ops;
|
||||
else
|
||||
prog->aux->ops = &bpf_offload_prog_ops;
|
||||
@ -2245,7 +2245,7 @@ bool bpf_prog_get_ok(struct bpf_prog *prog,
|
||||
|
||||
if (prog->type != *attach_type)
|
||||
return false;
|
||||
if (bpf_prog_is_dev_bound(prog->aux) && !attach_drv)
|
||||
if (bpf_prog_is_offloaded(prog->aux) && !attach_drv)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -2481,7 +2481,8 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
|
||||
BPF_F_TEST_STATE_FREQ |
|
||||
BPF_F_SLEEPABLE |
|
||||
BPF_F_TEST_RND_HI32 |
|
||||
BPF_F_XDP_HAS_FRAGS))
|
||||
BPF_F_XDP_HAS_FRAGS |
|
||||
BPF_F_XDP_DEV_BOUND_ONLY))
|
||||
return -EINVAL;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
|
||||
@ -2565,7 +2566,7 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
|
||||
prog->aux->attach_btf = attach_btf;
|
||||
prog->aux->attach_btf_id = attr->attach_btf_id;
|
||||
prog->aux->dst_prog = dst_prog;
|
||||
prog->aux->offload_requested = !!attr->prog_ifindex;
|
||||
prog->aux->dev_bound = !!attr->prog_ifindex;
|
||||
prog->aux->sleepable = attr->prog_flags & BPF_F_SLEEPABLE;
|
||||
prog->aux->xdp_has_frags = attr->prog_flags & BPF_F_XDP_HAS_FRAGS;
|
||||
|
||||
@ -2589,7 +2590,14 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr)
|
||||
prog->gpl_compatible = is_gpl ? 1 : 0;
|
||||
|
||||
if (bpf_prog_is_dev_bound(prog->aux)) {
|
||||
err = bpf_prog_offload_init(prog, attr);
|
||||
err = bpf_prog_dev_bound_init(prog, attr);
|
||||
if (err)
|
||||
goto free_prog_sec;
|
||||
}
|
||||
|
||||
if (type == BPF_PROG_TYPE_EXT && dst_prog &&
|
||||
bpf_prog_is_dev_bound(dst_prog->aux)) {
|
||||
err = bpf_prog_dev_bound_inherit(prog, dst_prog);
|
||||
if (err)
|
||||
goto free_prog_sec;
|
||||
}
|
||||
@ -3987,7 +3995,7 @@ static int bpf_prog_get_info_by_fd(struct file *file,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (bpf_prog_is_dev_bound(prog->aux)) {
|
||||
if (bpf_prog_is_offloaded(prog->aux)) {
|
||||
err = bpf_prog_offload_info_fill(&info, prog);
|
||||
if (err)
|
||||
return err;
|
||||
@ -4215,7 +4223,7 @@ static int bpf_map_get_info_by_fd(struct file *file,
|
||||
}
|
||||
info.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
|
||||
|
||||
if (bpf_map_is_dev_bound(map)) {
|
||||
if (bpf_map_is_offloaded(map)) {
|
||||
err = bpf_map_offload_info_fill(&info, map);
|
||||
if (err)
|
||||
return err;
|
||||
|
@ -255,6 +255,7 @@ struct bpf_call_arg_meta {
|
||||
int mem_size;
|
||||
u64 msize_max_value;
|
||||
int ref_obj_id;
|
||||
int dynptr_id;
|
||||
int map_uid;
|
||||
int func_id;
|
||||
struct btf *btf;
|
||||
@ -638,25 +639,11 @@ static void print_liveness(struct bpf_verifier_env *env,
|
||||
verbose(env, "D");
|
||||
}
|
||||
|
||||
static int get_spi(s32 off)
|
||||
static int __get_spi(s32 off)
|
||||
{
|
||||
return (-off - 1) / BPF_REG_SIZE;
|
||||
}
|
||||
|
||||
static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
|
||||
{
|
||||
int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
|
||||
|
||||
/* We need to check that slots between [spi - nr_slots + 1, spi] are
|
||||
* within [0, allocated_stack).
|
||||
*
|
||||
* Please note that the spi grows downwards. For example, a dynptr
|
||||
* takes the size of two stack slots; the first slot will be at
|
||||
* spi and the second slot will be at spi - 1.
|
||||
*/
|
||||
return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
|
||||
}
|
||||
|
||||
static struct bpf_func_state *func(struct bpf_verifier_env *env,
|
||||
const struct bpf_reg_state *reg)
|
||||
{
|
||||
@ -665,6 +652,46 @@ static struct bpf_func_state *func(struct bpf_verifier_env *env,
|
||||
return cur->frame[reg->frameno];
|
||||
}
|
||||
|
||||
static bool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
|
||||
{
|
||||
int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
|
||||
|
||||
/* We need to check that slots between [spi - nr_slots + 1, spi] are
|
||||
* within [0, allocated_stack).
|
||||
*
|
||||
* Please note that the spi grows downwards. For example, a dynptr
|
||||
* takes the size of two stack slots; the first slot will be at
|
||||
* spi and the second slot will be at spi - 1.
|
||||
*/
|
||||
return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
|
||||
}
|
||||
|
||||
static int dynptr_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
|
||||
{
|
||||
int off, spi;
|
||||
|
||||
if (!tnum_is_const(reg->var_off)) {
|
||||
verbose(env, "dynptr has to be at a constant offset\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
off = reg->off + reg->var_off.value;
|
||||
if (off % BPF_REG_SIZE) {
|
||||
verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
spi = __get_spi(off);
|
||||
if (spi < 1) {
|
||||
verbose(env, "cannot pass in dynptr at an offset=%d\n", off);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!is_spi_bounds_valid(func(env, reg), spi, BPF_DYNPTR_NR_SLOTS))
|
||||
return -ERANGE;
|
||||
return spi;
|
||||
}
|
||||
|
||||
static const char *kernel_type_name(const struct btf* btf, u32 id)
|
||||
{
|
||||
return btf_name_by_offset(btf, btf_type_by_id(btf, id)->name_off);
|
||||
@ -727,37 +754,58 @@ static bool dynptr_type_refcounted(enum bpf_dynptr_type type)
|
||||
|
||||
static void __mark_dynptr_reg(struct bpf_reg_state *reg,
|
||||
enum bpf_dynptr_type type,
|
||||
bool first_slot);
|
||||
bool first_slot, int dynptr_id);
|
||||
|
||||
static void __mark_reg_not_init(const struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg);
|
||||
|
||||
static void mark_dynptr_stack_regs(struct bpf_reg_state *sreg1,
|
||||
static void mark_dynptr_stack_regs(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *sreg1,
|
||||
struct bpf_reg_state *sreg2,
|
||||
enum bpf_dynptr_type type)
|
||||
{
|
||||
__mark_dynptr_reg(sreg1, type, true);
|
||||
__mark_dynptr_reg(sreg2, type, false);
|
||||
int id = ++env->id_gen;
|
||||
|
||||
__mark_dynptr_reg(sreg1, type, true, id);
|
||||
__mark_dynptr_reg(sreg2, type, false, id);
|
||||
}
|
||||
|
||||
static void mark_dynptr_cb_reg(struct bpf_reg_state *reg,
|
||||
static void mark_dynptr_cb_reg(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg,
|
||||
enum bpf_dynptr_type type)
|
||||
{
|
||||
__mark_dynptr_reg(reg, type, true);
|
||||
__mark_dynptr_reg(reg, type, true, ++env->id_gen);
|
||||
}
|
||||
|
||||
static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *state, int spi);
|
||||
|
||||
static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
|
||||
enum bpf_arg_type arg_type, int insn_idx)
|
||||
{
|
||||
struct bpf_func_state *state = func(env, reg);
|
||||
enum bpf_dynptr_type type;
|
||||
int spi, i, id;
|
||||
int spi, i, id, err;
|
||||
|
||||
spi = get_spi(reg->off);
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0)
|
||||
return spi;
|
||||
|
||||
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
|
||||
return -EINVAL;
|
||||
/* We cannot assume both spi and spi - 1 belong to the same dynptr,
|
||||
* hence we need to call destroy_if_dynptr_stack_slot twice for both,
|
||||
* to ensure that for the following example:
|
||||
* [d1][d1][d2][d2]
|
||||
* spi 3 2 1 0
|
||||
* So marking spi = 2 should lead to destruction of both d1 and d2. In
|
||||
* case they do belong to same dynptr, second call won't see slot_type
|
||||
* as STACK_DYNPTR and will simply skip destruction.
|
||||
*/
|
||||
err = destroy_if_dynptr_stack_slot(env, state, spi);
|
||||
if (err)
|
||||
return err;
|
||||
err = destroy_if_dynptr_stack_slot(env, state, spi - 1);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = 0; i < BPF_REG_SIZE; i++) {
|
||||
state->stack[spi].slot_type[i] = STACK_DYNPTR;
|
||||
@ -768,7 +816,7 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
|
||||
if (type == BPF_DYNPTR_TYPE_INVALID)
|
||||
return -EINVAL;
|
||||
|
||||
mark_dynptr_stack_regs(&state->stack[spi].spilled_ptr,
|
||||
mark_dynptr_stack_regs(env, &state->stack[spi].spilled_ptr,
|
||||
&state->stack[spi - 1].spilled_ptr, type);
|
||||
|
||||
if (dynptr_type_refcounted(type)) {
|
||||
@ -781,6 +829,9 @@ static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_
|
||||
state->stack[spi - 1].spilled_ptr.ref_obj_id = id;
|
||||
}
|
||||
|
||||
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
|
||||
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -789,10 +840,9 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
|
||||
struct bpf_func_state *state = func(env, reg);
|
||||
int spi, i;
|
||||
|
||||
spi = get_spi(reg->off);
|
||||
|
||||
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
|
||||
return -EINVAL;
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0)
|
||||
return spi;
|
||||
|
||||
for (i = 0; i < BPF_REG_SIZE; i++) {
|
||||
state->stack[spi].slot_type[i] = STACK_INVALID;
|
||||
@ -805,43 +855,133 @@ static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_re
|
||||
|
||||
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
|
||||
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
|
||||
|
||||
/* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot?
|
||||
*
|
||||
* While we don't allow reading STACK_INVALID, it is still possible to
|
||||
* do <8 byte writes marking some but not all slots as STACK_MISC. Then,
|
||||
* helpers or insns can do partial read of that part without failing,
|
||||
* but check_stack_range_initialized, check_stack_read_var_off, and
|
||||
* check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of
|
||||
* the slot conservatively. Hence we need to prevent those liveness
|
||||
* marking walks.
|
||||
*
|
||||
* This was not a problem before because STACK_INVALID is only set by
|
||||
* default (where the default reg state has its reg->parent as NULL), or
|
||||
* in clean_live_states after REG_LIVE_DONE (at which point
|
||||
* mark_reg_read won't walk reg->parent chain), but not randomly during
|
||||
* verifier state exploration (like we did above). Hence, for our case
|
||||
* parentage chain will still be live (i.e. reg->parent may be
|
||||
* non-NULL), while earlier reg->parent was NULL, so we need
|
||||
* REG_LIVE_WRITTEN to screen off read marker propagation when it is
|
||||
* done later on reads or by mark_dynptr_read as well to unnecessary
|
||||
* mark registers in verifier state.
|
||||
*/
|
||||
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
|
||||
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
|
||||
{
|
||||
struct bpf_func_state *state = func(env, reg);
|
||||
int spi, i;
|
||||
static void __mark_reg_unknown(const struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg);
|
||||
|
||||
static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
|
||||
struct bpf_func_state *state, int spi)
|
||||
{
|
||||
struct bpf_func_state *fstate;
|
||||
struct bpf_reg_state *dreg;
|
||||
int i, dynptr_id;
|
||||
|
||||
/* We always ensure that STACK_DYNPTR is never set partially,
|
||||
* hence just checking for slot_type[0] is enough. This is
|
||||
* different for STACK_SPILL, where it may be only set for
|
||||
* 1 byte, so code has to use is_spilled_reg.
|
||||
*/
|
||||
if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
|
||||
return 0;
|
||||
|
||||
/* Reposition spi to first slot */
|
||||
if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
|
||||
spi = spi + 1;
|
||||
|
||||
if (dynptr_type_refcounted(state->stack[spi].spilled_ptr.dynptr.type)) {
|
||||
verbose(env, "cannot overwrite referenced dynptr\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mark_stack_slot_scratched(env, spi);
|
||||
mark_stack_slot_scratched(env, spi - 1);
|
||||
|
||||
/* Writing partially to one dynptr stack slot destroys both. */
|
||||
for (i = 0; i < BPF_REG_SIZE; i++) {
|
||||
state->stack[spi].slot_type[i] = STACK_INVALID;
|
||||
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
|
||||
}
|
||||
|
||||
dynptr_id = state->stack[spi].spilled_ptr.id;
|
||||
/* Invalidate any slices associated with this dynptr */
|
||||
bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({
|
||||
/* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */
|
||||
if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM)
|
||||
continue;
|
||||
if (dreg->dynptr_id == dynptr_id) {
|
||||
if (!env->allow_ptr_leaks)
|
||||
__mark_reg_not_init(env, dreg);
|
||||
else
|
||||
__mark_reg_unknown(env, dreg);
|
||||
}
|
||||
}));
|
||||
|
||||
/* Do not release reference state, we are destroying dynptr on stack,
|
||||
* not using some helper to release it. Just reset register.
|
||||
*/
|
||||
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
|
||||
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
|
||||
|
||||
/* Same reason as unmark_stack_slots_dynptr above */
|
||||
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
|
||||
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
|
||||
int spi)
|
||||
{
|
||||
if (reg->type == CONST_PTR_TO_DYNPTR)
|
||||
return false;
|
||||
|
||||
spi = get_spi(reg->off);
|
||||
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS))
|
||||
return true;
|
||||
|
||||
for (i = 0; i < BPF_REG_SIZE; i++) {
|
||||
if (state->stack[spi].slot_type[i] == STACK_DYNPTR ||
|
||||
state->stack[spi - 1].slot_type[i] == STACK_DYNPTR)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* For -ERANGE (i.e. spi not falling into allocated stack slots), we
|
||||
* will do check_mem_access to check and update stack bounds later, so
|
||||
* return true for that case.
|
||||
*/
|
||||
if (spi < 0)
|
||||
return spi == -ERANGE;
|
||||
/* We allow overwriting existing unreferenced STACK_DYNPTR slots, see
|
||||
* mark_stack_slots_dynptr which calls destroy_if_dynptr_stack_slot to
|
||||
* ensure dynptr objects at the slots we are touching are completely
|
||||
* destructed before we reinitialize them for a new one. For referenced
|
||||
* ones, destroy_if_dynptr_stack_slot returns an error early instead of
|
||||
* delaying it until the end where the user will get "Unreleased
|
||||
* reference" error.
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
|
||||
static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
|
||||
int spi)
|
||||
{
|
||||
struct bpf_func_state *state = func(env, reg);
|
||||
int spi;
|
||||
int i;
|
||||
|
||||
/* This already represents first slot of initialized bpf_dynptr */
|
||||
if (reg->type == CONST_PTR_TO_DYNPTR)
|
||||
return true;
|
||||
|
||||
spi = get_spi(reg->off);
|
||||
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
|
||||
!state->stack[spi].spilled_ptr.dynptr.first_slot)
|
||||
if (spi < 0)
|
||||
return false;
|
||||
if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
|
||||
return false;
|
||||
|
||||
for (i = 0; i < BPF_REG_SIZE; i++) {
|
||||
@ -868,7 +1008,9 @@ static bool is_dynptr_type_expected(struct bpf_verifier_env *env, struct bpf_reg
|
||||
if (reg->type == CONST_PTR_TO_DYNPTR) {
|
||||
return reg->dynptr.type == dynptr_type;
|
||||
} else {
|
||||
spi = get_spi(reg->off);
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0)
|
||||
return false;
|
||||
return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
|
||||
}
|
||||
}
|
||||
@ -1449,7 +1591,7 @@ static void mark_reg_known_zero(struct bpf_verifier_env *env,
|
||||
}
|
||||
|
||||
static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type,
|
||||
bool first_slot)
|
||||
bool first_slot, int dynptr_id)
|
||||
{
|
||||
/* reg->type has no meaning for STACK_DYNPTR, but when we set reg for
|
||||
* callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply
|
||||
@ -1457,6 +1599,8 @@ static void __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type ty
|
||||
*/
|
||||
__mark_reg_known_zero(reg);
|
||||
reg->type = CONST_PTR_TO_DYNPTR;
|
||||
/* Give each dynptr a unique id to uniquely associate slices to it. */
|
||||
reg->id = dynptr_id;
|
||||
reg->dynptr.type = type;
|
||||
reg->dynptr.first_slot = first_slot;
|
||||
}
|
||||
@ -2189,6 +2333,12 @@ static int add_kfunc_call(struct bpf_verifier_env *env, u32 func_id, s16 offset)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (bpf_dev_bound_kfunc_id(func_id)) {
|
||||
err = bpf_dev_bound_kfunc_check(&env->log, prog_aux);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
desc = &tab->descs[tab->nr_descs++];
|
||||
desc->func_id = func_id;
|
||||
desc->imm = call_imm;
|
||||
@ -2390,6 +2540,32 @@ static int mark_reg_read(struct bpf_verifier_env *env,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mark_dynptr_read(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
|
||||
{
|
||||
struct bpf_func_state *state = func(env, reg);
|
||||
int spi, ret;
|
||||
|
||||
/* For CONST_PTR_TO_DYNPTR, it must have already been done by
|
||||
* check_reg_arg in check_helper_call and mark_btf_func_reg_size in
|
||||
* check_kfunc_call.
|
||||
*/
|
||||
if (reg->type == CONST_PTR_TO_DYNPTR)
|
||||
return 0;
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0)
|
||||
return spi;
|
||||
/* Caller ensures dynptr is valid and initialized, which means spi is in
|
||||
* bounds and spi is the first dynptr slot. Simply mark stack slot as
|
||||
* read.
|
||||
*/
|
||||
ret = mark_reg_read(env, &state->stack[spi].spilled_ptr,
|
||||
state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
|
||||
if (ret)
|
||||
return ret;
|
||||
return mark_reg_read(env, &state->stack[spi - 1].spilled_ptr,
|
||||
state->stack[spi - 1].spilled_ptr.parent, REG_LIVE_READ64);
|
||||
}
|
||||
|
||||
/* This function is supposed to be used by the following 32-bit optimization
|
||||
* code only. It returns TRUE if the source or destination register operates
|
||||
* on 64-bit, otherwise return FALSE.
|
||||
@ -3311,6 +3487,10 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
|
||||
env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
|
||||
}
|
||||
|
||||
err = destroy_if_dynptr_stack_slot(env, state, spi);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
mark_stack_slot_scratched(env, spi);
|
||||
if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
|
||||
!register_is_null(reg) && env->bpf_capable) {
|
||||
@ -3424,6 +3604,14 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
for (i = min_off; i < max_off; i++) {
|
||||
int spi;
|
||||
|
||||
spi = __get_spi(i);
|
||||
err = destroy_if_dynptr_stack_slot(env, state, spi);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
|
||||
/* Variable offset writes destroy any spilled pointers in range. */
|
||||
for (i = min_off; i < max_off; i++) {
|
||||
@ -4763,6 +4951,25 @@ static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BTF_TYPE_SAFE_NESTED(__type) __PASTE(__type, __safe_fields)
|
||||
|
||||
BTF_TYPE_SAFE_NESTED(struct task_struct) {
|
||||
const cpumask_t *cpus_ptr;
|
||||
};
|
||||
|
||||
static bool nested_ptr_is_trusted(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *reg,
|
||||
int off)
|
||||
{
|
||||
/* If its parent is not trusted, it can't regain its trusted status. */
|
||||
if (!is_trusted_reg(reg))
|
||||
return false;
|
||||
|
||||
BTF_TYPE_EMIT(BTF_TYPE_SAFE_NESTED(struct task_struct));
|
||||
|
||||
return btf_nested_type_is_trusted(&env->log, reg, off);
|
||||
}
|
||||
|
||||
static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
|
||||
struct bpf_reg_state *regs,
|
||||
int regno, int off, int size,
|
||||
@ -4851,10 +5058,17 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
|
||||
if (type_flag(reg->type) & PTR_UNTRUSTED)
|
||||
flag |= PTR_UNTRUSTED;
|
||||
|
||||
/* By default any pointer obtained from walking a trusted pointer is
|
||||
* no longer trusted except the rcu case below.
|
||||
/* By default any pointer obtained from walking a trusted pointer is no
|
||||
* longer trusted, unless the field being accessed has explicitly been
|
||||
* marked as inheriting its parent's state of trust.
|
||||
*
|
||||
* An RCU-protected pointer can also be deemed trusted if we are in an
|
||||
* RCU read region. This case is handled below.
|
||||
*/
|
||||
flag &= ~PTR_TRUSTED;
|
||||
if (nested_ptr_is_trusted(env, reg, off))
|
||||
flag |= PTR_TRUSTED;
|
||||
else
|
||||
flag &= ~PTR_TRUSTED;
|
||||
|
||||
if (flag & MEM_RCU) {
|
||||
/* Mark value register as MEM_RCU only if it is protected by
|
||||
@ -5451,6 +5665,31 @@ static int check_stack_range_initialized(
|
||||
}
|
||||
|
||||
if (meta && meta->raw_mode) {
|
||||
/* Ensure we won't be overwriting dynptrs when simulating byte
|
||||
* by byte access in check_helper_call using meta.access_size.
|
||||
* This would be a problem if we have a helper in the future
|
||||
* which takes:
|
||||
*
|
||||
* helper(uninit_mem, len, dynptr)
|
||||
*
|
||||
* Now, uninint_mem may overlap with dynptr pointer. Hence, it
|
||||
* may end up writing to dynptr itself when touching memory from
|
||||
* arg 1. This can be relaxed on a case by case basis for known
|
||||
* safe cases, but reject due to the possibilitiy of aliasing by
|
||||
* default.
|
||||
*/
|
||||
for (i = min_off; i < max_off + access_size; i++) {
|
||||
int stack_off = -i - 1;
|
||||
|
||||
spi = __get_spi(i);
|
||||
/* raw_mode may write past allocated_stack */
|
||||
if (state->allocated_stack <= stack_off)
|
||||
continue;
|
||||
if (state->stack[spi].slot_type[stack_off % BPF_REG_SIZE] == STACK_DYNPTR) {
|
||||
verbose(env, "potential write to dynptr at off=%d disallowed\n", i);
|
||||
return -EACCES;
|
||||
}
|
||||
}
|
||||
meta->access_size = access_size;
|
||||
meta->regno = regno;
|
||||
return 0;
|
||||
@ -5938,6 +6177,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
|
||||
enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta)
|
||||
{
|
||||
struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
|
||||
int spi = 0;
|
||||
|
||||
/* MEM_UNINIT and MEM_RDONLY are exclusive, when applied to an
|
||||
* ARG_PTR_TO_DYNPTR (or ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_*):
|
||||
@ -5948,12 +6188,14 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
|
||||
}
|
||||
/* CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to
|
||||
* check_func_arg_reg_off's logic. We only need to check offset
|
||||
* alignment for PTR_TO_STACK.
|
||||
* and its alignment for PTR_TO_STACK.
|
||||
*/
|
||||
if (reg->type == PTR_TO_STACK && (reg->off % BPF_REG_SIZE)) {
|
||||
verbose(env, "cannot pass in dynptr at an offset=%d\n", reg->off);
|
||||
return -EINVAL;
|
||||
if (reg->type == PTR_TO_STACK) {
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0 && spi != -ERANGE)
|
||||
return spi;
|
||||
}
|
||||
|
||||
/* MEM_UNINIT - Points to memory that is an appropriate candidate for
|
||||
* constructing a mutable bpf_dynptr object.
|
||||
*
|
||||
@ -5970,7 +6212,7 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
|
||||
* to.
|
||||
*/
|
||||
if (arg_type & MEM_UNINIT) {
|
||||
if (!is_dynptr_reg_valid_uninit(env, reg)) {
|
||||
if (!is_dynptr_reg_valid_uninit(env, reg, spi)) {
|
||||
verbose(env, "Dynptr has to be an uninitialized dynptr\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -5985,13 +6227,15 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
|
||||
|
||||
meta->uninit_dynptr_regno = regno;
|
||||
} else /* MEM_RDONLY and None case from above */ {
|
||||
int err;
|
||||
|
||||
/* For the reg->type == PTR_TO_STACK case, bpf_dynptr is never const */
|
||||
if (reg->type == CONST_PTR_TO_DYNPTR && !(arg_type & MEM_RDONLY)) {
|
||||
verbose(env, "cannot pass pointer to const bpf_dynptr, the helper mutates it\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!is_dynptr_reg_valid_init(env, reg)) {
|
||||
if (!is_dynptr_reg_valid_init(env, reg, spi)) {
|
||||
verbose(env,
|
||||
"Expected an initialized dynptr as arg #%d\n",
|
||||
regno);
|
||||
@ -6018,6 +6262,10 @@ int process_dynptr_func(struct bpf_verifier_env *env, int regno,
|
||||
err_extra, regno);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
err = mark_dynptr_read(env, reg);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -6355,15 +6603,29 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
|
||||
}
|
||||
}
|
||||
|
||||
static u32 dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
|
||||
static int dynptr_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
|
||||
{
|
||||
struct bpf_func_state *state = func(env, reg);
|
||||
int spi;
|
||||
|
||||
if (reg->type == CONST_PTR_TO_DYNPTR)
|
||||
return reg->id;
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0)
|
||||
return spi;
|
||||
return state->stack[spi].spilled_ptr.id;
|
||||
}
|
||||
|
||||
static int dynptr_ref_obj_id(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
|
||||
{
|
||||
struct bpf_func_state *state = func(env, reg);
|
||||
int spi;
|
||||
|
||||
if (reg->type == CONST_PTR_TO_DYNPTR)
|
||||
return reg->ref_obj_id;
|
||||
|
||||
spi = get_spi(reg->off);
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0)
|
||||
return spi;
|
||||
return state->stack[spi].spilled_ptr.ref_obj_id;
|
||||
}
|
||||
|
||||
@ -6437,9 +6699,8 @@ skip_type_check:
|
||||
* PTR_TO_STACK.
|
||||
*/
|
||||
if (reg->type == PTR_TO_STACK) {
|
||||
spi = get_spi(reg->off);
|
||||
if (!is_spi_bounds_valid(state, spi, BPF_DYNPTR_NR_SLOTS) ||
|
||||
!state->stack[spi].spilled_ptr.ref_obj_id) {
|
||||
spi = dynptr_get_spi(env, reg);
|
||||
if (spi < 0 || !state->stack[spi].spilled_ptr.ref_obj_id) {
|
||||
verbose(env, "arg %d is an unacquired reference\n", regno);
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -7421,7 +7682,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
|
||||
* callback_fn(const struct bpf_dynptr_t* dynptr, void *callback_ctx);
|
||||
*/
|
||||
__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
|
||||
mark_dynptr_cb_reg(&callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
|
||||
mark_dynptr_cb_reg(env, &callee->regs[BPF_REG_1], BPF_DYNPTR_TYPE_LOCAL);
|
||||
callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
|
||||
|
||||
/* unused */
|
||||
@ -7927,13 +8188,32 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
||||
for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
|
||||
if (arg_type_is_dynptr(fn->arg_type[i])) {
|
||||
struct bpf_reg_state *reg = ®s[BPF_REG_1 + i];
|
||||
int id, ref_obj_id;
|
||||
|
||||
if (meta.dynptr_id) {
|
||||
verbose(env, "verifier internal error: meta.dynptr_id already set\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (meta.ref_obj_id) {
|
||||
verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
meta.ref_obj_id = dynptr_ref_obj_id(env, reg);
|
||||
id = dynptr_id(env, reg);
|
||||
if (id < 0) {
|
||||
verbose(env, "verifier internal error: failed to obtain dynptr id\n");
|
||||
return id;
|
||||
}
|
||||
|
||||
ref_obj_id = dynptr_ref_obj_id(env, reg);
|
||||
if (ref_obj_id < 0) {
|
||||
verbose(env, "verifier internal error: failed to obtain dynptr ref_obj_id\n");
|
||||
return ref_obj_id;
|
||||
}
|
||||
|
||||
meta.dynptr_id = id;
|
||||
meta.ref_obj_id = ref_obj_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -8089,6 +8369,9 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (is_dynptr_ref_function(func_id))
|
||||
regs[BPF_REG_0].dynptr_id = meta.dynptr_id;
|
||||
|
||||
if (is_ptr_cast_function(func_id) || is_dynptr_ref_function(func_id)) {
|
||||
/* For release_reference() */
|
||||
regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
|
||||
@ -8545,9 +8828,37 @@ static int process_kf_arg_ptr_to_btf_id(struct bpf_verifier_env *env,
|
||||
reg_ref_id = *reg2btf_ids[base_type(reg->type)];
|
||||
}
|
||||
|
||||
if (is_kfunc_trusted_args(meta) || (is_kfunc_release(meta) && reg->ref_obj_id))
|
||||
/* Enforce strict type matching for calls to kfuncs that are acquiring
|
||||
* or releasing a reference, or are no-cast aliases. We do _not_
|
||||
* enforce strict matching for plain KF_TRUSTED_ARGS kfuncs by default,
|
||||
* as we want to enable BPF programs to pass types that are bitwise
|
||||
* equivalent without forcing them to explicitly cast with something
|
||||
* like bpf_cast_to_kern_ctx().
|
||||
*
|
||||
* For example, say we had a type like the following:
|
||||
*
|
||||
* struct bpf_cpumask {
|
||||
* cpumask_t cpumask;
|
||||
* refcount_t usage;
|
||||
* };
|
||||
*
|
||||
* Note that as specified in <linux/cpumask.h>, cpumask_t is typedef'ed
|
||||
* to a struct cpumask, so it would be safe to pass a struct
|
||||
* bpf_cpumask * to a kfunc expecting a struct cpumask *.
|
||||
*
|
||||
* The philosophy here is similar to how we allow scalars of different
|
||||
* types to be passed to kfuncs as long as the size is the same. The
|
||||
* only difference here is that we're simply allowing
|
||||
* btf_struct_ids_match() to walk the struct at the 0th offset, and
|
||||
* resolve types.
|
||||
*/
|
||||
if (is_kfunc_acquire(meta) ||
|
||||
(is_kfunc_release(meta) && reg->ref_obj_id) ||
|
||||
btf_type_ids_nocast_alias(&env->log, reg_btf, reg_ref_id, meta->btf, ref_id))
|
||||
strict_type_match = true;
|
||||
|
||||
WARN_ON_ONCE(is_kfunc_trusted_args(meta) && reg->off);
|
||||
|
||||
reg_ref_t = btf_type_skip_modifiers(reg_btf, reg_ref_id, ®_ref_id);
|
||||
reg_ref_tname = btf_name_by_offset(reg_btf, reg_ref_t->name_off);
|
||||
if (!btf_struct_ids_match(&env->log, reg_btf, reg_ref_id, reg->off, meta->btf, ref_id, strict_type_match)) {
|
||||
@ -8891,6 +9202,12 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (is_kfunc_trusted_args(meta) &&
|
||||
(register_is_null(reg) || type_may_be_null(reg->type))) {
|
||||
verbose(env, "Possibly NULL pointer passed to trusted arg%d\n", i);
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (reg->ref_obj_id) {
|
||||
if (is_kfunc_release(meta) && meta->ref_obj_id) {
|
||||
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
|
||||
@ -13223,10 +13540,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
|
||||
return false;
|
||||
if (i % BPF_REG_SIZE != BPF_REG_SIZE - 1)
|
||||
continue;
|
||||
if (!is_spilled_reg(&old->stack[spi]))
|
||||
continue;
|
||||
if (!regsafe(env, &old->stack[spi].spilled_ptr,
|
||||
&cur->stack[spi].spilled_ptr, idmap))
|
||||
/* Both old and cur are having same slot_type */
|
||||
switch (old->stack[spi].slot_type[BPF_REG_SIZE - 1]) {
|
||||
case STACK_SPILL:
|
||||
/* when explored and current stack slot are both storing
|
||||
* spilled registers, check that stored pointers types
|
||||
* are the same as well.
|
||||
@ -13237,7 +13553,30 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
|
||||
* such verifier states are not equivalent.
|
||||
* return false to continue verification of this path
|
||||
*/
|
||||
if (!regsafe(env, &old->stack[spi].spilled_ptr,
|
||||
&cur->stack[spi].spilled_ptr, idmap))
|
||||
return false;
|
||||
break;
|
||||
case STACK_DYNPTR:
|
||||
{
|
||||
const struct bpf_reg_state *old_reg, *cur_reg;
|
||||
|
||||
old_reg = &old->stack[spi].spilled_ptr;
|
||||
cur_reg = &cur->stack[spi].spilled_ptr;
|
||||
if (old_reg->dynptr.type != cur_reg->dynptr.type ||
|
||||
old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
|
||||
!check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
|
||||
return false;
|
||||
break;
|
||||
}
|
||||
case STACK_MISC:
|
||||
case STACK_ZERO:
|
||||
case STACK_INVALID:
|
||||
continue;
|
||||
/* Ensure that new unhandled slot types return false by default */
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -13834,7 +14173,7 @@ static int do_check(struct bpf_verifier_env *env)
|
||||
env->prev_log_len = env->log.len_used;
|
||||
}
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
if (bpf_prog_is_offloaded(env->prog->aux)) {
|
||||
err = bpf_prog_offload_verify_insn(env, env->insn_idx,
|
||||
env->prev_insn_idx);
|
||||
if (err)
|
||||
@ -14314,7 +14653,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
|
||||
}
|
||||
}
|
||||
|
||||
if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
|
||||
if ((bpf_prog_is_offloaded(prog->aux) || bpf_map_is_offloaded(map)) &&
|
||||
!bpf_offload_prog_map_match(prog, map)) {
|
||||
verbose(env, "offload device mismatch between prog and map\n");
|
||||
return -EINVAL;
|
||||
@ -14795,7 +15134,7 @@ static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
|
||||
unsigned int orig_prog_len = env->prog->len;
|
||||
int err;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux))
|
||||
if (bpf_prog_is_offloaded(env->prog->aux))
|
||||
bpf_prog_offload_remove_insns(env, off, cnt);
|
||||
|
||||
err = bpf_remove_insns(env->prog, off, cnt);
|
||||
@ -14876,7 +15215,7 @@ static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
|
||||
else
|
||||
continue;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux))
|
||||
if (bpf_prog_is_offloaded(env->prog->aux))
|
||||
bpf_prog_offload_replace_insn(env, i, &ja);
|
||||
|
||||
memcpy(insn, &ja, sizeof(ja));
|
||||
@ -15063,7 +15402,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
|
||||
}
|
||||
}
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux))
|
||||
if (bpf_prog_is_offloaded(env->prog->aux))
|
||||
return 0;
|
||||
|
||||
insn = env->prog->insnsi + delta;
|
||||
@ -15463,7 +15802,7 @@ static int fixup_call_args(struct bpf_verifier_env *env)
|
||||
int err = 0;
|
||||
|
||||
if (env->prog->jit_requested &&
|
||||
!bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
!bpf_prog_is_offloaded(env->prog->aux)) {
|
||||
err = jit_subprogs(env);
|
||||
if (err == 0)
|
||||
return 0;
|
||||
@ -15507,12 +15846,25 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
struct bpf_insn *insn_buf, int insn_idx, int *cnt)
|
||||
{
|
||||
const struct bpf_kfunc_desc *desc;
|
||||
void *xdp_kfunc;
|
||||
|
||||
if (!insn->imm) {
|
||||
verbose(env, "invalid kernel function call not eliminated in verifier pass\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
*cnt = 0;
|
||||
|
||||
if (bpf_dev_bound_kfunc_id(insn->imm)) {
|
||||
xdp_kfunc = bpf_dev_bound_resolve_kfunc(env->prog, insn->imm);
|
||||
if (xdp_kfunc) {
|
||||
insn->imm = BPF_CALL_IMM(xdp_kfunc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* fallback to default kfunc when not supported by netdev */
|
||||
}
|
||||
|
||||
/* insn->imm has the btf func_id. Replace it with
|
||||
* an address (relative to __bpf_call_base).
|
||||
*/
|
||||
@ -15523,7 +15875,6 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
*cnt = 0;
|
||||
insn->imm = desc->imm;
|
||||
if (insn->off)
|
||||
return 0;
|
||||
@ -16449,7 +16800,7 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
|
||||
}
|
||||
|
||||
if (st_ops->check_member) {
|
||||
int err = st_ops->check_member(t, member);
|
||||
int err = st_ops->check_member(t, member, prog);
|
||||
|
||||
if (err) {
|
||||
verbose(env, "attach to unsupported member %s of struct %s\n",
|
||||
@ -16530,6 +16881,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
|
||||
if (tgt_prog) {
|
||||
struct bpf_prog_aux *aux = tgt_prog->aux;
|
||||
|
||||
if (bpf_prog_is_dev_bound(prog->aux) &&
|
||||
!bpf_prog_dev_bound_match(prog, tgt_prog)) {
|
||||
bpf_log(log, "Target program bound device mismatch");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for (i = 0; i < aux->func_info_cnt; i++)
|
||||
if (aux->func_info[i].type_id == btf_id) {
|
||||
subprog = i;
|
||||
@ -16751,6 +17108,24 @@ BTF_ID(func, rcu_read_unlock_strict)
|
||||
#endif
|
||||
BTF_SET_END(btf_id_deny)
|
||||
|
||||
static bool can_be_sleepable(struct bpf_prog *prog)
|
||||
{
|
||||
if (prog->type == BPF_PROG_TYPE_TRACING) {
|
||||
switch (prog->expected_attach_type) {
|
||||
case BPF_TRACE_FENTRY:
|
||||
case BPF_TRACE_FEXIT:
|
||||
case BPF_MODIFY_RETURN:
|
||||
case BPF_TRACE_ITER:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return prog->type == BPF_PROG_TYPE_LSM ||
|
||||
prog->type == BPF_PROG_TYPE_KPROBE /* only for uprobes */ ||
|
||||
prog->type == BPF_PROG_TYPE_STRUCT_OPS;
|
||||
}
|
||||
|
||||
static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
{
|
||||
struct bpf_prog *prog = env->prog;
|
||||
@ -16769,9 +17144,8 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
|
||||
prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_KPROBE) {
|
||||
verbose(env, "Only fentry/fexit/fmod_ret, lsm, and kprobe/uprobe programs can be sleepable\n");
|
||||
if (prog->aux->sleepable && !can_be_sleepable(prog)) {
|
||||
verbose(env, "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -16950,7 +17324,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
|
||||
if (ret < 0)
|
||||
goto skip_full_check;
|
||||
|
||||
if (bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
if (bpf_prog_is_offloaded(env->prog->aux)) {
|
||||
ret = bpf_prog_offload_verifier_prep(env->prog);
|
||||
if (ret)
|
||||
goto skip_full_check;
|
||||
@ -16963,7 +17337,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
|
||||
ret = do_check_subprogs(env);
|
||||
ret = ret ?: do_check_main(env);
|
||||
|
||||
if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
|
||||
if (ret == 0 && bpf_prog_is_offloaded(env->prog->aux))
|
||||
ret = bpf_prog_offload_finalize(env);
|
||||
|
||||
skip_full_check:
|
||||
@ -16998,7 +17372,7 @@ skip_full_check:
|
||||
/* do 32-bit optimization after insn patching has done so those patched
|
||||
* insns could be handled correctly.
|
||||
*/
|
||||
if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
|
||||
if (ret == 0 && !bpf_prog_is_offloaded(env->prog->aux)) {
|
||||
ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
|
||||
env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
|
||||
: false;
|
||||
|
@ -118,7 +118,6 @@ static struct klp_object *klp_find_object(struct klp_patch *patch,
|
||||
}
|
||||
|
||||
struct klp_find_arg {
|
||||
const char *objname;
|
||||
const char *name;
|
||||
unsigned long addr;
|
||||
unsigned long count;
|
||||
@ -148,15 +147,9 @@ static int klp_find_callback(void *data, const char *name,
|
||||
{
|
||||
struct klp_find_arg *args = data;
|
||||
|
||||
if ((mod && !args->objname) || (!mod && args->objname))
|
||||
return 0;
|
||||
|
||||
if (strcmp(args->name, name))
|
||||
return 0;
|
||||
|
||||
if (args->objname && strcmp(args->objname, mod->name))
|
||||
return 0;
|
||||
|
||||
return klp_match_callback(data, addr);
|
||||
}
|
||||
|
||||
@ -164,7 +157,6 @@ static int klp_find_object_symbol(const char *objname, const char *name,
|
||||
unsigned long sympos, unsigned long *addr)
|
||||
{
|
||||
struct klp_find_arg args = {
|
||||
.objname = objname,
|
||||
.name = name,
|
||||
.addr = 0,
|
||||
.count = 0,
|
||||
@ -172,7 +164,7 @@ static int klp_find_object_symbol(const char *objname, const char *name,
|
||||
};
|
||||
|
||||
if (objname)
|
||||
module_kallsyms_on_each_symbol(klp_find_callback, &args);
|
||||
module_kallsyms_on_each_symbol(objname, klp_find_callback, &args);
|
||||
else
|
||||
kallsyms_on_each_match_symbol(klp_match_callback, name, &args);
|
||||
|
||||
|
@ -494,7 +494,8 @@ unsigned long module_kallsyms_lookup_name(const char *name)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
||||
int module_kallsyms_on_each_symbol(const char *modname,
|
||||
int (*fn)(void *, const char *,
|
||||
struct module *, unsigned long),
|
||||
void *data)
|
||||
{
|
||||
@ -509,6 +510,9 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
||||
if (mod->state == MODULE_STATE_UNFORMED)
|
||||
continue;
|
||||
|
||||
if (modname && strcmp(modname, mod->name))
|
||||
continue;
|
||||
|
||||
/* Use rcu_dereference_sched() to remain compliant with the sparse tool */
|
||||
preempt_disable();
|
||||
kallsyms = rcu_dereference_sched(mod->kallsyms);
|
||||
@ -525,6 +529,13 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* The given module is found, the subsequent modules do not
|
||||
* need to be compared.
|
||||
*/
|
||||
if (modname)
|
||||
break;
|
||||
}
|
||||
out:
|
||||
mutex_unlock(&module_mutex);
|
||||
|
@ -2685,69 +2685,77 @@ static void symbols_swap_r(void *a, void *b, int size, const void *priv)
|
||||
}
|
||||
}
|
||||
|
||||
struct module_addr_args {
|
||||
unsigned long *addrs;
|
||||
u32 addrs_cnt;
|
||||
struct modules_array {
|
||||
struct module **mods;
|
||||
int mods_cnt;
|
||||
int mods_cap;
|
||||
};
|
||||
|
||||
static int module_callback(void *data, const char *name,
|
||||
struct module *mod, unsigned long addr)
|
||||
static int add_module(struct modules_array *arr, struct module *mod)
|
||||
{
|
||||
struct module_addr_args *args = data;
|
||||
struct module **mods;
|
||||
|
||||
/* We iterate all modules symbols and for each we:
|
||||
* - search for it in provided addresses array
|
||||
* - if found we check if we already have the module pointer stored
|
||||
* (we iterate modules sequentially, so we can check just the last
|
||||
* module pointer)
|
||||
* - take module reference and store it
|
||||
*/
|
||||
if (!bsearch(&addr, args->addrs, args->addrs_cnt, sizeof(addr),
|
||||
bpf_kprobe_multi_addrs_cmp))
|
||||
return 0;
|
||||
|
||||
if (args->mods && args->mods[args->mods_cnt - 1] == mod)
|
||||
return 0;
|
||||
|
||||
if (args->mods_cnt == args->mods_cap) {
|
||||
args->mods_cap = max(16, args->mods_cap * 3 / 2);
|
||||
mods = krealloc_array(args->mods, args->mods_cap, sizeof(*mods), GFP_KERNEL);
|
||||
if (arr->mods_cnt == arr->mods_cap) {
|
||||
arr->mods_cap = max(16, arr->mods_cap * 3 / 2);
|
||||
mods = krealloc_array(arr->mods, arr->mods_cap, sizeof(*mods), GFP_KERNEL);
|
||||
if (!mods)
|
||||
return -ENOMEM;
|
||||
args->mods = mods;
|
||||
arr->mods = mods;
|
||||
}
|
||||
|
||||
if (!try_module_get(mod))
|
||||
return -EINVAL;
|
||||
|
||||
args->mods[args->mods_cnt] = mod;
|
||||
args->mods_cnt++;
|
||||
arr->mods[arr->mods_cnt] = mod;
|
||||
arr->mods_cnt++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool has_module(struct modules_array *arr, struct module *mod)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = arr->mods_cnt - 1; i >= 0; i--) {
|
||||
if (arr->mods[i] == mod)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static int get_modules_for_addrs(struct module ***mods, unsigned long *addrs, u32 addrs_cnt)
|
||||
{
|
||||
struct module_addr_args args = {
|
||||
.addrs = addrs,
|
||||
.addrs_cnt = addrs_cnt,
|
||||
};
|
||||
int err;
|
||||
struct modules_array arr = {};
|
||||
u32 i, err = 0;
|
||||
|
||||
for (i = 0; i < addrs_cnt; i++) {
|
||||
struct module *mod;
|
||||
|
||||
preempt_disable();
|
||||
mod = __module_address(addrs[i]);
|
||||
/* Either no module or we it's already stored */
|
||||
if (!mod || has_module(&arr, mod)) {
|
||||
preempt_enable();
|
||||
continue;
|
||||
}
|
||||
if (!try_module_get(mod))
|
||||
err = -EINVAL;
|
||||
preempt_enable();
|
||||
if (err)
|
||||
break;
|
||||
err = add_module(&arr, mod);
|
||||
if (err) {
|
||||
module_put(mod);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* We return either err < 0 in case of error, ... */
|
||||
err = module_kallsyms_on_each_symbol(module_callback, &args);
|
||||
if (err) {
|
||||
kprobe_multi_put_modules(args.mods, args.mods_cnt);
|
||||
kfree(args.mods);
|
||||
kprobe_multi_put_modules(arr.mods, arr.mods_cnt);
|
||||
kfree(arr.mods);
|
||||
return err;
|
||||
}
|
||||
|
||||
/* or number of modules found if everything is ok. */
|
||||
*mods = args.mods;
|
||||
return args.mods_cnt;
|
||||
*mods = arr.mods;
|
||||
return arr.mods_cnt;
|
||||
}
|
||||
|
||||
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
|
||||
@ -2860,13 +2868,6 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr
|
||||
bpf_kprobe_multi_cookie_cmp,
|
||||
bpf_kprobe_multi_cookie_swap,
|
||||
link);
|
||||
} else {
|
||||
/*
|
||||
* We need to sort addrs array even if there are no cookies
|
||||
* provided, to allow bsearch in get_modules_for_addrs.
|
||||
*/
|
||||
sort(addrs, cnt, sizeof(*addrs),
|
||||
bpf_kprobe_multi_addrs_cmp, NULL);
|
||||
}
|
||||
|
||||
err = get_modules_for_addrs(&link->mods, addrs, cnt);
|
||||
|
@ -8324,7 +8324,7 @@ int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *a
|
||||
found_all = kallsyms_on_each_symbol(kallsyms_callback, &args);
|
||||
if (found_all)
|
||||
return 0;
|
||||
found_all = module_kallsyms_on_each_symbol(kallsyms_callback, &args);
|
||||
found_all = module_kallsyms_on_each_symbol(NULL, kallsyms_callback, &args);
|
||||
return found_all ? 0 : -ESRCH;
|
||||
}
|
||||
|
||||
|
@ -389,6 +389,15 @@ config PAHOLE_HAS_BTF_TAG
|
||||
btf_decl_tag) or not. Currently only clang compiler implements
|
||||
these attributes, so make the config depend on CC_IS_CLANG.
|
||||
|
||||
config PAHOLE_HAS_LANG_EXCLUDE
|
||||
def_bool PAHOLE_VERSION >= 124
|
||||
help
|
||||
Support for the --lang_exclude flag which makes pahole exclude
|
||||
compilation units from the supplied language. Used in Kbuild to
|
||||
omit Rust CUs which are not supported in version 1.24 of pahole,
|
||||
otherwise it would emit malformed kernel and module binaries when
|
||||
using DEBUG_INFO_BTF_MODULES.
|
||||
|
||||
config DEBUG_INFO_BTF_MODULES
|
||||
def_bool y
|
||||
depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF
|
||||
|
@ -154,6 +154,23 @@ static bool bpf_dummy_ops_is_valid_access(int off, int size,
|
||||
return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
|
||||
}
|
||||
|
||||
static int bpf_dummy_ops_check_member(const struct btf_type *t,
|
||||
const struct btf_member *member,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
u32 moff = __btf_member_bit_offset(t, member) / 8;
|
||||
|
||||
switch (moff) {
|
||||
case offsetof(struct bpf_dummy_ops, test_sleepable):
|
||||
break;
|
||||
default:
|
||||
if (prog->aux->sleepable)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
|
||||
const struct bpf_reg_state *reg,
|
||||
int off, int size, enum bpf_access_type atype,
|
||||
@ -208,6 +225,7 @@ static void bpf_dummy_unreg(void *kdata)
|
||||
struct bpf_struct_ops bpf_bpf_dummy_ops = {
|
||||
.verifier_ops = &bpf_dummy_verifier_ops,
|
||||
.init = bpf_dummy_init,
|
||||
.check_member = bpf_dummy_ops_check_member,
|
||||
.init_member = bpf_dummy_init_member,
|
||||
.reg = bpf_dummy_reg,
|
||||
.unreg = bpf_dummy_unreg,
|
||||
|
@ -1300,6 +1300,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
|
||||
if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES)
|
||||
return -EINVAL;
|
||||
|
||||
if (bpf_prog_is_dev_bound(prog->aux))
|
||||
return -EINVAL;
|
||||
|
||||
if (do_live) {
|
||||
if (!batch_size)
|
||||
batch_size = NAPI_POLL_WEIGHT;
|
||||
|
@ -9223,8 +9223,12 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
|
||||
NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
|
||||
return -EEXIST;
|
||||
}
|
||||
if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
|
||||
NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
|
||||
if (!offload && bpf_prog_is_offloaded(new_prog->aux)) {
|
||||
NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) {
|
||||
NL_SET_ERR_MSG(extack, "Program bound to different device");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
|
||||
@ -10829,6 +10833,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
|
||||
dev_shutdown(dev);
|
||||
|
||||
dev_xdp_uninstall(dev);
|
||||
bpf_dev_bound_netdev_unregister(dev);
|
||||
|
||||
netdev_offload_xstats_disable_all(dev);
|
||||
|
||||
|
@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
|
||||
#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
|
||||
|
||||
#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
|
||||
BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
|
||||
|
||||
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2( \
|
||||
BPF_ADJ_ROOM_ENCAP_L2_MASK))
|
||||
BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
|
||||
BPF_F_ADJ_ROOM_DECAP_L3_MASK)
|
||||
|
||||
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
|
||||
u64 flags)
|
||||
@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
|
||||
int ret;
|
||||
|
||||
if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
|
||||
BPF_F_ADJ_ROOM_DECAP_L3_MASK |
|
||||
BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
|
||||
return -EINVAL;
|
||||
|
||||
@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
/* Match skb->protocol to new outer l3 protocol */
|
||||
if (skb->protocol == htons(ETH_P_IP) &&
|
||||
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
|
||||
skb->protocol = htons(ETH_P_IPV6);
|
||||
else if (skb->protocol == htons(ETH_P_IPV6) &&
|
||||
flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
|
||||
skb->protocol = htons(ETH_P_IP);
|
||||
|
||||
if (skb_is_gso(skb)) {
|
||||
struct skb_shared_info *shinfo = skb_shinfo(skb);
|
||||
|
||||
@ -3608,6 +3621,22 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
|
||||
if (!shrink)
|
||||
return -EINVAL;
|
||||
|
||||
switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
|
||||
case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
|
||||
len_min = sizeof(struct iphdr);
|
||||
break;
|
||||
case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
|
||||
len_min = sizeof(struct ipv6hdr);
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
len_cur = skb->len - skb_network_offset(skb);
|
||||
if ((shrink && (len_diff_abs >= len_cur ||
|
||||
len_cur - len_diff_abs < len_min)) ||
|
||||
@ -5179,7 +5208,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
|
||||
char *optval, int *optlen,
|
||||
bool getopt)
|
||||
{
|
||||
if (sk->sk_prot->setsockopt != tcp_setsockopt)
|
||||
if (sk->sk_protocol != IPPROTO_TCP)
|
||||
return -EINVAL;
|
||||
|
||||
switch (optname) {
|
||||
@ -6851,9 +6880,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
|
||||
FIELD)); \
|
||||
} while (0)
|
||||
|
||||
if (insn > insn_buf)
|
||||
return insn - insn_buf;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_tcp_sock, rtt_min):
|
||||
BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
|
||||
@ -8738,7 +8764,7 @@ static bool xdp_is_valid_access(int off, int size,
|
||||
}
|
||||
|
||||
if (type == BPF_WRITE) {
|
||||
if (bpf_prog_is_dev_bound(prog->aux)) {
|
||||
if (bpf_prog_is_offloaded(prog->aux)) {
|
||||
switch (off) {
|
||||
case offsetof(struct xdp_md, rx_queue_index):
|
||||
return __is_valid_xdp_access(off, size);
|
||||
@ -10151,9 +10177,6 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
|
||||
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
|
||||
} while (0)
|
||||
|
||||
if (insn > insn_buf)
|
||||
return insn - insn_buf;
|
||||
|
||||
switch (si->off) {
|
||||
case offsetof(struct bpf_sock_ops, op):
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
|
||||
|
@ -4,6 +4,7 @@
|
||||
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
|
||||
*/
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/mm.h>
|
||||
@ -709,3 +710,66 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
|
||||
|
||||
return nxdpf;
|
||||
}
|
||||
|
||||
__diag_push();
|
||||
__diag_ignore_all("-Wmissing-prototypes",
|
||||
"Global functions as their definitions will be in vmlinux BTF");
|
||||
|
||||
/**
|
||||
* bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp.
|
||||
* @ctx: XDP context pointer.
|
||||
* @timestamp: Return value pointer.
|
||||
*
|
||||
* Returns 0 on success or ``-errno`` on error.
|
||||
*/
|
||||
int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
/**
|
||||
* bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
|
||||
* @ctx: XDP context pointer.
|
||||
* @hash: Return value pointer.
|
||||
*
|
||||
* Returns 0 on success or ``-errno`` on error.
|
||||
*/
|
||||
int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
__diag_pop();
|
||||
|
||||
BTF_SET8_START(xdp_metadata_kfunc_ids)
|
||||
#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
|
||||
XDP_METADATA_KFUNC_xxx
|
||||
#undef XDP_METADATA_KFUNC
|
||||
BTF_SET8_END(xdp_metadata_kfunc_ids)
|
||||
|
||||
static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &xdp_metadata_kfunc_ids,
|
||||
};
|
||||
|
||||
BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
|
||||
#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
|
||||
XDP_METADATA_KFUNC_xxx
|
||||
#undef XDP_METADATA_KFUNC
|
||||
|
||||
u32 bpf_xdp_metadata_kfunc_id(int id)
|
||||
{
|
||||
/* xdp_metadata_kfunc_ids is sorted and can't be used */
|
||||
return xdp_metadata_kfunc_ids_unsorted[id];
|
||||
}
|
||||
|
||||
bool bpf_dev_bound_kfunc_id(u32 btf_id)
|
||||
{
|
||||
return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
|
||||
}
|
||||
|
||||
static int __init xdp_metadata_init(void)
|
||||
{
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
|
||||
}
|
||||
late_initcall(xdp_metadata_init);
|
||||
|
@ -248,7 +248,8 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
|
||||
}
|
||||
|
||||
static int bpf_tcp_ca_check_member(const struct btf_type *t,
|
||||
const struct btf_member *member)
|
||||
const struct btf_member *member,
|
||||
const struct bpf_prog *prog)
|
||||
{
|
||||
if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
|
||||
return -ENOTSUPP;
|
||||
|
@ -131,7 +131,7 @@ always-y += tracex4_kern.o
|
||||
always-y += tracex5_kern.o
|
||||
always-y += tracex6_kern.o
|
||||
always-y += tracex7_kern.o
|
||||
always-y += sock_flags_kern.o
|
||||
always-y += sock_flags.bpf.o
|
||||
always-y += test_probe_write_user.bpf.o
|
||||
always-y += trace_output.bpf.o
|
||||
always-y += tcbpf1_kern.o
|
||||
@ -140,19 +140,19 @@ always-y += lathist_kern.o
|
||||
always-y += offwaketime_kern.o
|
||||
always-y += spintest_kern.o
|
||||
always-y += map_perf_test.bpf.o
|
||||
always-y += test_overhead_tp_kern.o
|
||||
always-y += test_overhead_raw_tp_kern.o
|
||||
always-y += test_overhead_kprobe_kern.o
|
||||
always-y += test_overhead_tp.bpf.o
|
||||
always-y += test_overhead_raw_tp.bpf.o
|
||||
always-y += test_overhead_kprobe.bpf.o
|
||||
always-y += parse_varlen.o parse_simple.o parse_ldabs.o
|
||||
always-y += test_cgrp2_tc_kern.o
|
||||
always-y += test_cgrp2_tc.bpf.o
|
||||
always-y += xdp1_kern.o
|
||||
always-y += xdp2_kern.o
|
||||
always-y += test_current_task_under_cgroup.bpf.o
|
||||
always-y += trace_event_kern.o
|
||||
always-y += sampleip_kern.o
|
||||
always-y += lwt_len_hist_kern.o
|
||||
always-y += lwt_len_hist.bpf.o
|
||||
always-y += xdp_tx_iptunnel_kern.o
|
||||
always-y += test_map_in_map_kern.o
|
||||
always-y += test_map_in_map.bpf.o
|
||||
always-y += tcp_synrto_kern.o
|
||||
always-y += tcp_rwnd_kern.o
|
||||
always-y += tcp_bufs_kern.o
|
||||
|
@ -10,29 +10,16 @@
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <uapi/linux/if_ether.h>
|
||||
#include <uapi/linux/ip.h>
|
||||
#include <uapi/linux/in.h>
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
struct bpf_elf_map {
|
||||
__u32 type;
|
||||
__u32 size_key;
|
||||
__u32 size_value;
|
||||
__u32 max_elem;
|
||||
__u32 flags;
|
||||
__u32 id;
|
||||
__u32 pinning;
|
||||
};
|
||||
|
||||
struct bpf_elf_map SEC("maps") lwt_len_hist_map = {
|
||||
.type = BPF_MAP_TYPE_PERCPU_HASH,
|
||||
.size_key = sizeof(__u64),
|
||||
.size_value = sizeof(__u64),
|
||||
.pinning = 2,
|
||||
.max_elem = 1024,
|
||||
};
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
|
||||
__type(key, u64);
|
||||
__type(value, u64);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
__uint(max_entries, 1024);
|
||||
} lwt_len_hist_map SEC(".maps");
|
||||
|
||||
static unsigned int log2(unsigned int v)
|
||||
{
|
@ -4,7 +4,7 @@
|
||||
NS1=lwt_ns1
|
||||
VETH0=tst_lwt1a
|
||||
VETH1=tst_lwt1b
|
||||
|
||||
BPF_PROG=lwt_len_hist.bpf.o
|
||||
TRACE_ROOT=/sys/kernel/debug/tracing
|
||||
|
||||
function cleanup {
|
||||
@ -30,7 +30,7 @@ ip netns exec $NS1 netserver
|
||||
|
||||
echo 1 > ${TRACE_ROOT}/tracing_on
|
||||
cp /dev/null ${TRACE_ROOT}/trace
|
||||
ip route add 192.168.253.2/32 encap bpf out obj lwt_len_hist_kern.o section len_hist dev $VETH0
|
||||
ip route add 192.168.253.2/32 encap bpf out obj $BPF_PROG section len_hist dev $VETH0
|
||||
netperf -H 192.168.253.2 -t TCP_STREAM
|
||||
cat ${TRACE_ROOT}/trace | grep -v '^#'
|
||||
./lwt_len_hist
|
||||
|
32
samples/bpf/net_shared.h
Normal file
32
samples/bpf/net_shared.h
Normal file
@ -0,0 +1,32 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef _NET_SHARED_H
|
||||
#define _NET_SHARED_H
|
||||
|
||||
#define AF_INET 2
|
||||
#define AF_INET6 10
|
||||
|
||||
#define ETH_ALEN 6
|
||||
#define ETH_P_802_3_MIN 0x0600
|
||||
#define ETH_P_8021Q 0x8100
|
||||
#define ETH_P_8021AD 0x88A8
|
||||
#define ETH_P_IP 0x0800
|
||||
#define ETH_P_IPV6 0x86DD
|
||||
#define ETH_P_ARP 0x0806
|
||||
#define IPPROTO_ICMPV6 58
|
||||
|
||||
#define TC_ACT_OK 0
|
||||
#define TC_ACT_SHOT 2
|
||||
|
||||
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
|
||||
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define bpf_ntohs(x) __builtin_bswap16(x)
|
||||
#define bpf_htons(x) __builtin_bswap16(x)
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
|
||||
__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define bpf_ntohs(x) (x)
|
||||
#define bpf_htons(x) (x)
|
||||
#else
|
||||
# error "Endianness detection needs to be set up for your compiler?!"
|
||||
#endif
|
||||
|
||||
#endif
|
@ -1,11 +1,9 @@
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/net.h>
|
||||
#include <uapi/linux/in.h>
|
||||
#include <uapi/linux/in6.h>
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include "vmlinux.h"
|
||||
#include "net_shared.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
SEC("cgroup/sock1")
|
||||
SEC("cgroup/sock")
|
||||
int bpf_prog1(struct bpf_sock *sk)
|
||||
{
|
||||
char fmt[] = "socket: family %d type %d protocol %d\n";
|
||||
@ -17,29 +15,29 @@ int bpf_prog1(struct bpf_sock *sk)
|
||||
bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
|
||||
bpf_trace_printk(fmt2, sizeof(fmt2), uid, gid);
|
||||
|
||||
/* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets
|
||||
/* block AF_INET6, SOCK_DGRAM, IPPROTO_ICMPV6 sockets
|
||||
* ie., make ping6 fail
|
||||
*/
|
||||
if (sk->family == PF_INET6 &&
|
||||
sk->type == SOCK_RAW &&
|
||||
if (sk->family == AF_INET6 &&
|
||||
sk->type == SOCK_DGRAM &&
|
||||
sk->protocol == IPPROTO_ICMPV6)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
SEC("cgroup/sock2")
|
||||
SEC("cgroup/sock")
|
||||
int bpf_prog2(struct bpf_sock *sk)
|
||||
{
|
||||
char fmt[] = "socket: family %d type %d protocol %d\n";
|
||||
|
||||
bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol);
|
||||
|
||||
/* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets
|
||||
/* block AF_INET, SOCK_DGRAM, IPPROTO_ICMP sockets
|
||||
* ie., make ping fail
|
||||
*/
|
||||
if (sk->family == PF_INET &&
|
||||
sk->type == SOCK_RAW &&
|
||||
if (sk->family == AF_INET &&
|
||||
sk->type == SOCK_DGRAM &&
|
||||
sk->protocol == IPPROTO_ICMP)
|
||||
return 0;
|
||||
|
@ -8,6 +8,7 @@ REDIRECT_USER='./tc_l2_redirect'
|
||||
REDIRECT_BPF='./tc_l2_redirect_kern.o'
|
||||
|
||||
RP_FILTER=$(< /proc/sys/net/ipv4/conf/all/rp_filter)
|
||||
IPV6_DISABLED=$(< /proc/sys/net/ipv6/conf/all/disable_ipv6)
|
||||
IPV6_FORWARDING=$(< /proc/sys/net/ipv6/conf/all/forwarding)
|
||||
|
||||
function config_common {
|
||||
@ -64,6 +65,7 @@ function config_common {
|
||||
|
||||
sysctl -q -w net.ipv4.conf.all.rp_filter=0
|
||||
sysctl -q -w net.ipv6.conf.all.forwarding=1
|
||||
sysctl -q -w net.ipv6.conf.all.disable_ipv6=0
|
||||
}
|
||||
|
||||
function cleanup {
|
||||
@ -77,6 +79,7 @@ function cleanup {
|
||||
$IP link del ip6t >& /dev/null
|
||||
sysctl -q -w net.ipv4.conf.all.rp_filter=$RP_FILTER
|
||||
sysctl -q -w net.ipv6.conf.all.forwarding=$IPV6_FORWARDING
|
||||
sysctl -q -w net.ipv6.conf.all.disable_ipv6=$IPV6_DISABLED
|
||||
rm -f /sys/fs/bpf/tc/globals/tun_iface
|
||||
[[ -z $DEBUG ]] || set -x
|
||||
set -e
|
||||
|
@ -3,6 +3,8 @@
|
||||
|
||||
# Test various socket options that can be set by attaching programs to cgroups.
|
||||
|
||||
MY_DIR=$(dirname $0)
|
||||
TEST=$MY_DIR/test_cgrp2_sock
|
||||
CGRP_MNT="/tmp/cgroupv2-test_cgrp2_sock"
|
||||
|
||||
################################################################################
|
||||
@ -19,7 +21,7 @@ print_result()
|
||||
|
||||
check_sock()
|
||||
{
|
||||
out=$(test_cgrp2_sock)
|
||||
out=$($TEST)
|
||||
echo $out | grep -q "$1"
|
||||
if [ $? -ne 0 ]; then
|
||||
print_result 1 "IPv4: $2"
|
||||
@ -33,7 +35,7 @@ check_sock()
|
||||
|
||||
check_sock6()
|
||||
{
|
||||
out=$(test_cgrp2_sock -6)
|
||||
out=$($TEST -6)
|
||||
echo $out | grep -q "$1"
|
||||
if [ $? -ne 0 ]; then
|
||||
print_result 1 "IPv6: $2"
|
||||
@ -61,7 +63,7 @@ cleanup_and_exit()
|
||||
|
||||
[ -n "$msg" ] && echo "ERROR: $msg"
|
||||
|
||||
test_cgrp2_sock -d ${CGRP_MNT}/sockopts
|
||||
$TEST -d ${CGRP_MNT}/sockopts
|
||||
ip li del cgrp2_sock
|
||||
umount ${CGRP_MNT}
|
||||
|
||||
@ -98,7 +100,7 @@ check_sock6 "dev , mark 0, priority 0" "No programs attached"
|
||||
|
||||
# verify device is set
|
||||
#
|
||||
test_cgrp2_sock -b cgrp2_sock ${CGRP_MNT}/sockopts
|
||||
$TEST -b cgrp2_sock ${CGRP_MNT}/sockopts
|
||||
if [ $? -ne 0 ]; then
|
||||
cleanup_and_exit 1 "Failed to install program to set device"
|
||||
fi
|
||||
@ -107,7 +109,7 @@ check_sock6 "dev cgrp2_sock, mark 0, priority 0" "Device set"
|
||||
|
||||
# verify mark is set
|
||||
#
|
||||
test_cgrp2_sock -m 666 ${CGRP_MNT}/sockopts
|
||||
$TEST -m 666 ${CGRP_MNT}/sockopts
|
||||
if [ $? -ne 0 ]; then
|
||||
cleanup_and_exit 1 "Failed to install program to set mark"
|
||||
fi
|
||||
@ -116,7 +118,7 @@ check_sock6 "dev , mark 666, priority 0" "Mark set"
|
||||
|
||||
# verify priority is set
|
||||
#
|
||||
test_cgrp2_sock -p 123 ${CGRP_MNT}/sockopts
|
||||
$TEST -p 123 ${CGRP_MNT}/sockopts
|
||||
if [ $? -ne 0 ]; then
|
||||
cleanup_and_exit 1 "Failed to install program to set priority"
|
||||
fi
|
||||
@ -125,7 +127,7 @@ check_sock6 "dev , mark 0, priority 123" "Priority set"
|
||||
|
||||
# all 3 at once
|
||||
#
|
||||
test_cgrp2_sock -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts
|
||||
$TEST -b cgrp2_sock -m 666 -p 123 ${CGRP_MNT}/sockopts
|
||||
if [ $? -ne 0 ]; then
|
||||
cleanup_and_exit 1 "Failed to install program to set device, mark and priority"
|
||||
fi
|
||||
|
@ -2,18 +2,23 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
BPFFS=/sys/fs/bpf
|
||||
MY_DIR=$(dirname $0)
|
||||
TEST=$MY_DIR/test_cgrp2_sock2
|
||||
LINK_PIN=$BPFFS/test_cgrp2_sock2
|
||||
BPF_PROG=$MY_DIR/sock_flags.bpf.o
|
||||
|
||||
function config_device {
|
||||
ip netns add at_ns0
|
||||
ip link add veth0 type veth peer name veth0b
|
||||
ip link set veth0b up
|
||||
ip link set veth0 netns at_ns0
|
||||
ip netns exec at_ns0 sysctl -q net.ipv6.conf.veth0.disable_ipv6=0
|
||||
ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
|
||||
ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad
|
||||
ip netns exec at_ns0 ip link set dev veth0 up
|
||||
sysctl -q net.ipv6.conf.veth0b.disable_ipv6=0
|
||||
ip addr add 172.16.1.101/24 dev veth0b
|
||||
ip addr add 2401:db00::2/64 dev veth0b nodad
|
||||
ip link set veth0b up
|
||||
}
|
||||
|
||||
function config_cgroup {
|
||||
@ -34,7 +39,7 @@ function config_bpffs {
|
||||
}
|
||||
|
||||
function attach_bpf {
|
||||
./test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1
|
||||
$TEST /tmp/cgroupv2/foo $BPF_PROG $1
|
||||
[ $? -ne 0 ] && exit 1
|
||||
}
|
||||
|
||||
|
@ -5,11 +5,8 @@
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#define KBUILD_MODNAME "foo"
|
||||
#include <uapi/linux/if_ether.h>
|
||||
#include <uapi/linux/in6.h>
|
||||
#include <uapi/linux/ipv6.h>
|
||||
#include <uapi/linux/pkt_cls.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include "vmlinux.h"
|
||||
#include "net_shared.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
/* copy of 'struct ethhdr' without __packed */
|
||||
@ -19,24 +16,13 @@ struct eth_hdr {
|
||||
unsigned short h_proto;
|
||||
};
|
||||
|
||||
#define PIN_GLOBAL_NS 2
|
||||
struct bpf_elf_map {
|
||||
__u32 type;
|
||||
__u32 size_key;
|
||||
__u32 size_value;
|
||||
__u32 max_elem;
|
||||
__u32 flags;
|
||||
__u32 id;
|
||||
__u32 pinning;
|
||||
};
|
||||
|
||||
struct bpf_elf_map SEC("maps") test_cgrp2_array_pin = {
|
||||
.type = BPF_MAP_TYPE_CGROUP_ARRAY,
|
||||
.size_key = sizeof(uint32_t),
|
||||
.size_value = sizeof(uint32_t),
|
||||
.pinning = PIN_GLOBAL_NS,
|
||||
.max_elem = 1,
|
||||
};
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
|
||||
__type(key, u32);
|
||||
__type(value, u32);
|
||||
__uint(pinning, LIBBPF_PIN_BY_NAME);
|
||||
__uint(max_entries, 1);
|
||||
} test_cgrp2_array_pin SEC(".maps");
|
||||
|
||||
SEC("filter")
|
||||
int handle_egress(struct __sk_buff *skb)
|
||||
@ -53,7 +39,7 @@ int handle_egress(struct __sk_buff *skb)
|
||||
if (data + sizeof(*eth) + sizeof(*ip6h) > data_end)
|
||||
return TC_ACT_OK;
|
||||
|
||||
if (eth->h_proto != htons(ETH_P_IPV6) ||
|
||||
if (eth->h_proto != bpf_htons(ETH_P_IPV6) ||
|
||||
ip6h->nexthdr != IPPROTO_ICMPV6) {
|
||||
bpf_trace_printk(dont_care_msg, sizeof(dont_care_msg),
|
||||
eth->h_proto, ip6h->nexthdr);
|
@ -4,7 +4,7 @@
|
||||
MY_DIR=$(dirname $0)
|
||||
# Details on the bpf prog
|
||||
BPF_CGRP2_ARRAY_NAME='test_cgrp2_array_pin'
|
||||
BPF_PROG="$MY_DIR/test_cgrp2_tc_kern.o"
|
||||
BPF_PROG="$MY_DIR/test_cgrp2_tc.bpf.o"
|
||||
BPF_SECTION='filter'
|
||||
|
||||
[ -z "$TC" ] && TC='tc'
|
||||
@ -73,11 +73,13 @@ setup_net() {
|
||||
start)
|
||||
$IP link add $HOST_IFC type veth peer name $NS_IFC || return $?
|
||||
$IP link set dev $HOST_IFC up || return $?
|
||||
sysctl -q net.ipv6.conf.$HOST_IFC.disable_ipv6=0
|
||||
sysctl -q net.ipv6.conf.$HOST_IFC.accept_dad=0
|
||||
|
||||
$IP netns add ns || return $?
|
||||
$IP link set dev $NS_IFC netns ns || return $?
|
||||
$IP netns add $NS || return $?
|
||||
$IP link set dev $NS_IFC netns $NS || return $?
|
||||
$IP -n $NS link set dev $NS_IFC up || return $?
|
||||
$IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.disable_ipv6=0
|
||||
$IP netns exec $NS sysctl -q net.ipv6.conf.$NS_IFC.accept_dad=0
|
||||
$TC qdisc add dev $HOST_IFC clsact || return $?
|
||||
$TC filter add dev $HOST_IFC egress bpf da obj $BPF_PROG sec $BPF_SECTION || return $?
|
||||
|
@ -10,16 +10,8 @@
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/in.h>
|
||||
#include <linux/in6.h>
|
||||
#include <linux/tcp.h>
|
||||
#include <linux/udp.h>
|
||||
#include <linux/icmpv6.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include "vmlinux.h"
|
||||
#include "net_shared.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <string.h>
|
||||
|
||||
@ -44,9 +36,9 @@ SEC("test_ctx")
|
||||
int do_test_ctx(struct __sk_buff *skb)
|
||||
{
|
||||
skb->cb[0] = CB_MAGIC;
|
||||
printk("len %d hash %d protocol %d\n", skb->len, skb->hash,
|
||||
printk("len %d hash %d protocol %d", skb->len, skb->hash,
|
||||
skb->protocol);
|
||||
printk("cb %d ingress_ifindex %d ifindex %d\n", skb->cb[0],
|
||||
printk("cb %d ingress_ifindex %d ifindex %d", skb->cb[0],
|
||||
skb->ingress_ifindex, skb->ifindex);
|
||||
|
||||
return BPF_OK;
|
||||
@ -56,9 +48,9 @@ int do_test_ctx(struct __sk_buff *skb)
|
||||
SEC("test_cb")
|
||||
int do_test_cb(struct __sk_buff *skb)
|
||||
{
|
||||
printk("cb0: %x cb1: %x cb2: %x\n", skb->cb[0], skb->cb[1],
|
||||
printk("cb0: %x cb1: %x cb2: %x", skb->cb[0], skb->cb[1],
|
||||
skb->cb[2]);
|
||||
printk("cb3: %x cb4: %x\n", skb->cb[3], skb->cb[4]);
|
||||
printk("cb3: %x cb4: %x", skb->cb[3], skb->cb[4]);
|
||||
|
||||
return BPF_OK;
|
||||
}
|
||||
@ -72,11 +64,11 @@ int do_test_data(struct __sk_buff *skb)
|
||||
struct iphdr *iph = data;
|
||||
|
||||
if (data + sizeof(*iph) > data_end) {
|
||||
printk("packet truncated\n");
|
||||
printk("packet truncated");
|
||||
return BPF_DROP;
|
||||
}
|
||||
|
||||
printk("src: %x dst: %x\n", iph->saddr, iph->daddr);
|
||||
printk("src: %x dst: %x", iph->saddr, iph->daddr);
|
||||
|
||||
return BPF_OK;
|
||||
}
|
||||
@ -97,7 +89,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
|
||||
|
||||
ret = bpf_skb_load_bytes(skb, IP_PROTO_OFF, &proto, 1);
|
||||
if (ret < 0) {
|
||||
printk("bpf_l4_csum_replace failed: %d\n", ret);
|
||||
printk("bpf_l4_csum_replace failed: %d", ret);
|
||||
return BPF_DROP;
|
||||
}
|
||||
|
||||
@ -120,14 +112,14 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
|
||||
ret = bpf_l4_csum_replace(skb, off, old_ip, new_ip,
|
||||
flags | sizeof(new_ip));
|
||||
if (ret < 0) {
|
||||
printk("bpf_l4_csum_replace failed: %d\n");
|
||||
printk("bpf_l4_csum_replace failed: %d");
|
||||
return BPF_DROP;
|
||||
}
|
||||
}
|
||||
|
||||
ret = bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
|
||||
if (ret < 0) {
|
||||
printk("bpf_l3_csum_replace failed: %d\n", ret);
|
||||
printk("bpf_l3_csum_replace failed: %d", ret);
|
||||
return BPF_DROP;
|
||||
}
|
||||
|
||||
@ -137,7 +129,7 @@ static inline int rewrite(struct __sk_buff *skb, uint32_t old_ip,
|
||||
ret = bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
|
||||
|
||||
if (ret < 0) {
|
||||
printk("bpf_skb_store_bytes() failed: %d\n", ret);
|
||||
printk("bpf_skb_store_bytes() failed: %d", ret);
|
||||
return BPF_DROP;
|
||||
}
|
||||
|
||||
@ -153,12 +145,12 @@ int do_test_rewrite(struct __sk_buff *skb)
|
||||
|
||||
ret = bpf_skb_load_bytes(skb, IP_DST_OFF, &old_ip, 4);
|
||||
if (ret < 0) {
|
||||
printk("bpf_skb_load_bytes failed: %d\n", ret);
|
||||
printk("bpf_skb_load_bytes failed: %d", ret);
|
||||
return BPF_DROP;
|
||||
}
|
||||
|
||||
if (old_ip == 0x2fea8c0) {
|
||||
printk("out: rewriting from %x to %x\n", old_ip, new_ip);
|
||||
printk("out: rewriting from %x to %x", old_ip, new_ip);
|
||||
return rewrite(skb, old_ip, new_ip, 1);
|
||||
}
|
||||
|
||||
@ -173,16 +165,16 @@ static inline int __do_push_ll_and_redirect(struct __sk_buff *skb)
|
||||
|
||||
ret = bpf_skb_change_head(skb, 14, 0);
|
||||
if (ret < 0) {
|
||||
printk("skb_change_head() failed: %d\n", ret);
|
||||
printk("skb_change_head() failed: %d", ret);
|
||||
}
|
||||
|
||||
ehdr.h_proto = __constant_htons(ETH_P_IP);
|
||||
ehdr.h_proto = bpf_htons(ETH_P_IP);
|
||||
memcpy(&ehdr.h_source, &smac, 6);
|
||||
memcpy(&ehdr.h_dest, &dmac, 6);
|
||||
|
||||
ret = bpf_skb_store_bytes(skb, 0, &ehdr, sizeof(ehdr), 0);
|
||||
if (ret < 0) {
|
||||
printk("skb_store_bytes() failed: %d\n", ret);
|
||||
printk("skb_store_bytes() failed: %d", ret);
|
||||
return BPF_DROP;
|
||||
}
|
||||
|
||||
@ -202,7 +194,7 @@ int do_push_ll_and_redirect(struct __sk_buff *skb)
|
||||
|
||||
ret = __do_push_ll_and_redirect(skb);
|
||||
if (ret >= 0)
|
||||
printk("redirected to %d\n", ifindex);
|
||||
printk("redirected to %d", ifindex);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -229,7 +221,7 @@ SEC("fill_garbage")
|
||||
int do_fill_garbage(struct __sk_buff *skb)
|
||||
{
|
||||
__fill_garbage(skb);
|
||||
printk("Set initial 96 bytes of header to FF\n");
|
||||
printk("Set initial 96 bytes of header to FF");
|
||||
return BPF_OK;
|
||||
}
|
||||
|
||||
@ -238,7 +230,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb)
|
||||
{
|
||||
int ifindex = DST_IFINDEX;
|
||||
__fill_garbage(skb);
|
||||
printk("redirected to %d\n", ifindex);
|
||||
printk("redirected to %d", ifindex);
|
||||
return bpf_redirect(ifindex, 0);
|
||||
}
|
||||
|
||||
@ -246,7 +238,7 @@ int do_fill_garbage_and_redirect(struct __sk_buff *skb)
|
||||
SEC("drop_all")
|
||||
int do_drop_all(struct __sk_buff *skb)
|
||||
{
|
||||
printk("dropping with: %d\n", BPF_DROP);
|
||||
printk("dropping with: %d", BPF_DROP);
|
||||
return BPF_DROP;
|
||||
}
|
||||
|
||||
|
@ -19,7 +19,10 @@ IPVETH3="192.168.111.2"
|
||||
|
||||
IP_LOCAL="192.168.99.1"
|
||||
|
||||
PROG_SRC="test_lwt_bpf.c"
|
||||
BPF_PROG="test_lwt_bpf.o"
|
||||
TRACE_ROOT=/sys/kernel/debug/tracing
|
||||
CONTEXT_INFO=$(cat ${TRACE_ROOT}/trace_options | grep context)
|
||||
|
||||
function lookup_mac()
|
||||
{
|
||||
@ -36,7 +39,7 @@ function lookup_mac()
|
||||
|
||||
function cleanup {
|
||||
set +ex
|
||||
rm test_lwt_bpf.o 2> /dev/null
|
||||
rm $BPF_PROG 2> /dev/null
|
||||
ip link del $VETH0 2> /dev/null
|
||||
ip link del $VETH1 2> /dev/null
|
||||
ip link del $VETH2 2> /dev/null
|
||||
@ -76,7 +79,7 @@ function install_test {
|
||||
cleanup_routes
|
||||
cp /dev/null ${TRACE_ROOT}/trace
|
||||
|
||||
OPTS="encap bpf headroom 14 $1 obj test_lwt_bpf.o section $2 $VERBOSE"
|
||||
OPTS="encap bpf headroom 14 $1 obj $BPF_PROG section $2 $VERBOSE"
|
||||
|
||||
if [ "$1" == "in" ]; then
|
||||
ip route add table local local ${IP_LOCAL}/32 $OPTS dev lo
|
||||
@ -96,7 +99,7 @@ function remove_prog {
|
||||
function filter_trace {
|
||||
# Add newline to allow starting EXPECT= variables on newline
|
||||
NL=$'\n'
|
||||
echo "${NL}$*" | sed -e 's/^.*: : //g'
|
||||
echo "${NL}$*" | sed -e 's/bpf_trace_printk: //g'
|
||||
}
|
||||
|
||||
function expect_fail {
|
||||
@ -160,11 +163,11 @@ function test_ctx_out {
|
||||
failure "test_ctx out: packets are dropped"
|
||||
}
|
||||
match_trace "$(get_trace)" "
|
||||
len 84 hash 0 protocol 0
|
||||
len 84 hash 0 protocol 8
|
||||
cb 1234 ingress_ifindex 0 ifindex 0
|
||||
len 84 hash 0 protocol 0
|
||||
len 84 hash 0 protocol 8
|
||||
cb 1234 ingress_ifindex 0 ifindex 0
|
||||
len 84 hash 0 protocol 0
|
||||
len 84 hash 0 protocol 8
|
||||
cb 1234 ingress_ifindex 0 ifindex 0" || exit 1
|
||||
remove_prog out
|
||||
}
|
||||
@ -367,6 +370,7 @@ setup_one_veth $NS1 $VETH0 $VETH1 $IPVETH0 $IPVETH1 $IPVETH1b
|
||||
setup_one_veth $NS2 $VETH2 $VETH3 $IPVETH2 $IPVETH3
|
||||
ip netns exec $NS1 netserver
|
||||
echo 1 > ${TRACE_ROOT}/tracing_on
|
||||
echo nocontext-info > ${TRACE_ROOT}/trace_options
|
||||
|
||||
DST_MAC=$(lookup_mac $VETH1 $NS1)
|
||||
SRC_MAC=$(lookup_mac $VETH0)
|
||||
@ -374,7 +378,7 @@ DST_IFINDEX=$(cat /sys/class/net/$VETH0/ifindex)
|
||||
|
||||
CLANG_OPTS="-O2 -target bpf -I ../include/"
|
||||
CLANG_OPTS+=" -DSRC_MAC=$SRC_MAC -DDST_MAC=$DST_MAC -DDST_IFINDEX=$DST_IFINDEX"
|
||||
clang $CLANG_OPTS -c test_lwt_bpf.c -o test_lwt_bpf.o
|
||||
clang $CLANG_OPTS -c $PROG_SRC -o $BPF_PROG
|
||||
|
||||
test_ctx_xmit
|
||||
test_ctx_out
|
||||
@ -397,4 +401,5 @@ test_netperf_redirect
|
||||
|
||||
cleanup
|
||||
echo 0 > ${TRACE_ROOT}/tracing_on
|
||||
echo $CONTEXT_INFO > ${TRACE_ROOT}/trace_options
|
||||
exit 0
|
||||
|
@ -6,16 +6,17 @@
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#define KBUILD_MODNAME "foo"
|
||||
#include <linux/ptrace.h>
|
||||
#include "vmlinux.h"
|
||||
#include <linux/version.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <uapi/linux/in6.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_core_read.h>
|
||||
|
||||
#define MAX_NR_PORTS 65536
|
||||
|
||||
#define EINVAL 22
|
||||
#define ENOENT 2
|
||||
|
||||
/* map #0 */
|
||||
struct inner_a {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
@ -120,7 +120,7 @@ int main(int argc, char **argv)
|
||||
struct bpf_object *obj;
|
||||
char filename[256];
|
||||
|
||||
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
|
||||
snprintf(filename, sizeof(filename), "%s.bpf.o", argv[0]);
|
||||
obj = bpf_object__open_file(filename, NULL);
|
||||
if (libbpf_get_error(obj)) {
|
||||
fprintf(stderr, "ERROR: opening BPF object file failed\n");
|
||||
|
@ -4,10 +4,8 @@
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#include "vmlinux.h"
|
||||
#include <linux/version.h>
|
||||
#include <linux/ptrace.h>
|
||||
#include <linux/sched.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
@ -39,7 +37,7 @@ int prog(struct pt_regs *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("kprobe/urandom_read")
|
||||
SEC("kprobe/fib_table_lookup")
|
||||
int prog2(struct pt_regs *ctx)
|
||||
{
|
||||
return 0;
|
@ -1,6 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2018 Facebook */
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
SEC("raw_tracepoint/task_rename")
|
||||
@ -9,7 +9,7 @@ int prog(struct bpf_raw_tracepoint_args *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("raw_tracepoint/urandom_read")
|
||||
SEC("raw_tracepoint/fib_table_lookup")
|
||||
int prog2(struct bpf_raw_tracepoint_args *ctx)
|
||||
{
|
||||
return 0;
|
@ -4,8 +4,7 @@
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <uapi/linux/bpf.h>
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
/* from /sys/kernel/debug/tracing/events/task/task_rename/format */
|
||||
@ -22,15 +21,27 @@ int prog(struct task_rename *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* from /sys/kernel/debug/tracing/events/random/urandom_read/format */
|
||||
struct urandom_read {
|
||||
/* from /sys/kernel/debug/tracing/events/fib/fib_table_lookup/format */
|
||||
struct fib_table_lookup {
|
||||
__u64 pad;
|
||||
int got_bits;
|
||||
int pool_left;
|
||||
int input_left;
|
||||
__u32 tb_id;
|
||||
int err;
|
||||
int oif;
|
||||
int iif;
|
||||
__u8 proto;
|
||||
__u8 tos;
|
||||
__u8 scope;
|
||||
__u8 flags;
|
||||
__u8 src[4];
|
||||
__u8 dst[4];
|
||||
__u8 gw4[4];
|
||||
__u8 gw6[16];
|
||||
__u16 sport;
|
||||
__u16 dport;
|
||||
char name[16];
|
||||
};
|
||||
SEC("tracepoint/random/urandom_read")
|
||||
int prog2(struct urandom_read *ctx)
|
||||
SEC("tracepoint/fib/fib_table_lookup")
|
||||
int prog2(struct fib_table_lookup *ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
@ -11,6 +11,8 @@
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/socket.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <stdlib.h>
|
||||
#include <signal.h>
|
||||
#include <linux/bpf.h>
|
||||
@ -20,6 +22,8 @@
|
||||
#include <bpf/libbpf.h>
|
||||
|
||||
#define MAX_CNT 1000000
|
||||
#define DUMMY_IP "127.0.0.1"
|
||||
#define DUMMY_PORT 80
|
||||
|
||||
static struct bpf_link *links[2];
|
||||
static struct bpf_object *obj;
|
||||
@ -35,8 +39,8 @@ static __u64 time_get_ns(void)
|
||||
|
||||
static void test_task_rename(int cpu)
|
||||
{
|
||||
__u64 start_time;
|
||||
char buf[] = "test\n";
|
||||
__u64 start_time;
|
||||
int i, fd;
|
||||
|
||||
fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
|
||||
@ -57,26 +61,32 @@ static void test_task_rename(int cpu)
|
||||
close(fd);
|
||||
}
|
||||
|
||||
static void test_urandom_read(int cpu)
|
||||
static void test_fib_table_lookup(int cpu)
|
||||
{
|
||||
struct sockaddr_in addr;
|
||||
char buf[] = "test\n";
|
||||
__u64 start_time;
|
||||
char buf[4];
|
||||
int i, fd;
|
||||
|
||||
fd = open("/dev/urandom", O_RDONLY);
|
||||
fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
|
||||
if (fd < 0) {
|
||||
printf("couldn't open /dev/urandom\n");
|
||||
printf("couldn't open socket\n");
|
||||
exit(1);
|
||||
}
|
||||
memset((char *)&addr, 0, sizeof(addr));
|
||||
addr.sin_addr.s_addr = inet_addr(DUMMY_IP);
|
||||
addr.sin_port = htons(DUMMY_PORT);
|
||||
addr.sin_family = AF_INET;
|
||||
start_time = time_get_ns();
|
||||
for (i = 0; i < MAX_CNT; i++) {
|
||||
if (read(fd, buf, sizeof(buf)) < 0) {
|
||||
printf("failed to read from /dev/urandom: %s\n", strerror(errno));
|
||||
if (sendto(fd, buf, strlen(buf), 0,
|
||||
(struct sockaddr *)&addr, sizeof(addr)) < 0) {
|
||||
printf("failed to start ping: %s\n", strerror(errno));
|
||||
close(fd);
|
||||
return;
|
||||
}
|
||||
}
|
||||
printf("urandom_read:%d: %lld events per sec\n",
|
||||
printf("fib_table_lookup:%d: %lld events per sec\n",
|
||||
cpu, MAX_CNT * 1000000000ll / (time_get_ns() - start_time));
|
||||
close(fd);
|
||||
}
|
||||
@ -92,7 +102,7 @@ static void loop(int cpu, int flags)
|
||||
if (flags & 1)
|
||||
test_task_rename(cpu);
|
||||
if (flags & 2)
|
||||
test_urandom_read(cpu);
|
||||
test_fib_table_lookup(cpu);
|
||||
}
|
||||
|
||||
static void run_perf_test(int tasks, int flags)
|
||||
@ -179,7 +189,7 @@ int main(int argc, char **argv)
|
||||
|
||||
if (test_flags & 0xC) {
|
||||
snprintf(filename, sizeof(filename),
|
||||
"%s_kprobe_kern.o", argv[0]);
|
||||
"%s_kprobe.bpf.o", argv[0]);
|
||||
|
||||
printf("w/KPROBE\n");
|
||||
err = load_progs(filename);
|
||||
@ -191,7 +201,7 @@ int main(int argc, char **argv)
|
||||
|
||||
if (test_flags & 0x30) {
|
||||
snprintf(filename, sizeof(filename),
|
||||
"%s_tp_kern.o", argv[0]);
|
||||
"%s_tp.bpf.o", argv[0]);
|
||||
printf("w/TRACEPOINT\n");
|
||||
err = load_progs(filename);
|
||||
if (!err)
|
||||
@ -202,7 +212,7 @@ int main(int argc, char **argv)
|
||||
|
||||
if (test_flags & 0xC0) {
|
||||
snprintf(filename, sizeof(filename),
|
||||
"%s_raw_tp_kern.o", argv[0]);
|
||||
"%s_raw_tp.bpf.o", argv[0]);
|
||||
printf("w/RAW_TRACEPOINT\n");
|
||||
err = load_progs(filename);
|
||||
if (!err)
|
||||
|
@ -7,17 +7,9 @@
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
#include "net_shared.h"
|
||||
#include "xdp_sample_shared.h"
|
||||
|
||||
#define ETH_ALEN 6
|
||||
#define ETH_P_802_3_MIN 0x0600
|
||||
#define ETH_P_8021Q 0x8100
|
||||
#define ETH_P_8021AD 0x88A8
|
||||
#define ETH_P_IP 0x0800
|
||||
#define ETH_P_IPV6 0x86DD
|
||||
#define ETH_P_ARP 0x0806
|
||||
#define IPPROTO_ICMPV6 58
|
||||
|
||||
#define EINVAL 22
|
||||
#define ENETDOWN 100
|
||||
#define EMSGSIZE 90
|
||||
@ -55,18 +47,6 @@ static __always_inline void swap_src_dst_mac(void *data)
|
||||
p[5] = dst[2];
|
||||
}
|
||||
|
||||
#if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && \
|
||||
__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
#define bpf_ntohs(x) __builtin_bswap16(x)
|
||||
#define bpf_htons(x) __builtin_bswap16(x)
|
||||
#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && \
|
||||
__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
#define bpf_ntohs(x) (x)
|
||||
#define bpf_htons(x) (x)
|
||||
#else
|
||||
# error "Endianness detection needs to be set up for your compiler?!"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Note: including linux/compiler.h or linux/kernel.h for the macros below
|
||||
* conflicts with vmlinux.h include in BPF files, so we define them here.
|
||||
|
@ -271,7 +271,7 @@ class HeaderParser(object):
|
||||
if capture:
|
||||
fn_defines_str += self.line
|
||||
helper_name = capture.expand(r'bpf_\1')
|
||||
self.helper_enum_vals[helper_name] = int(capture[2])
|
||||
self.helper_enum_vals[helper_name] = int(capture.group(2))
|
||||
self.helper_enum_pos[helper_name] = i
|
||||
i += 1
|
||||
else:
|
||||
|
@ -19,5 +19,9 @@ fi
|
||||
if [ "${pahole_ver}" -ge "122" ]; then
|
||||
extra_paholeopt="${extra_paholeopt} -j"
|
||||
fi
|
||||
if [ "${pahole_ver}" -ge "124" ]; then
|
||||
# see PAHOLE_HAS_LANG_EXCLUDE
|
||||
extra_paholeopt="${extra_paholeopt} --lang_exclude=rust"
|
||||
fi
|
||||
|
||||
echo ${extra_paholeopt}
|
||||
|
@ -53,7 +53,7 @@ $(LIBBPF_INTERNAL_HDRS): $(LIBBPF_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_HDRS_
|
||||
$(LIBBPF_BOOTSTRAP): $(wildcard $(BPF_DIR)/*.[ch] $(BPF_DIR)/Makefile) | $(LIBBPF_BOOTSTRAP_OUTPUT)
|
||||
$(Q)$(MAKE) -C $(BPF_DIR) OUTPUT=$(LIBBPF_BOOTSTRAP_OUTPUT) \
|
||||
DESTDIR=$(LIBBPF_BOOTSTRAP_DESTDIR:/=) prefix= \
|
||||
ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) $@ install_headers
|
||||
ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" $@ install_headers
|
||||
|
||||
$(LIBBPF_BOOTSTRAP_INTERNAL_HDRS): $(LIBBPF_BOOTSTRAP_HDRS_DIR)/%.h: $(BPF_DIR)/%.h | $(LIBBPF_BOOTSTRAP_HDRS_DIR)
|
||||
$(call QUIET_INSTALL, $@)
|
||||
@ -215,7 +215,8 @@ $(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF_BOOTSTRAP)
|
||||
-I$(or $(OUTPUT),.) \
|
||||
-I$(srctree)/tools/include/uapi/ \
|
||||
-I$(LIBBPF_BOOTSTRAP_INCLUDE) \
|
||||
-g -O2 -Wall -target bpf -c $< -o $@
|
||||
-g -O2 -Wall -fno-stack-protector \
|
||||
-target bpf -c $< -o $@
|
||||
$(Q)$(LLVM_STRIP) -g $@
|
||||
|
||||
$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
|
||||
|
@ -486,16 +486,16 @@ static void probe_kernel_image_config(const char *define_prefix)
|
||||
}
|
||||
}
|
||||
|
||||
end_parse:
|
||||
if (file)
|
||||
gzclose(file);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(options); i++) {
|
||||
if (define_prefix && !options[i].macro_dump)
|
||||
continue;
|
||||
print_kernel_option(options[i].name, values[i], define_prefix);
|
||||
free(values[i]);
|
||||
}
|
||||
|
||||
end_parse:
|
||||
if (file)
|
||||
gzclose(file);
|
||||
}
|
||||
|
||||
static bool probe_bpf_syscall(const char *define_prefix)
|
||||
|
@ -18,14 +18,11 @@ else
|
||||
endif
|
||||
|
||||
# always use the host compiler
|
||||
AR = $(HOSTAR)
|
||||
CC = $(HOSTCC)
|
||||
LD = $(HOSTLD)
|
||||
ARCH = $(HOSTARCH)
|
||||
HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \
|
||||
EXTRA_CFLAGS="$(HOSTCFLAGS) $(KBUILD_HOSTCFLAGS)"
|
||||
|
||||
RM ?= rm
|
||||
CROSS_COMPILE =
|
||||
CFLAGS := $(KBUILD_HOSTCFLAGS)
|
||||
LDFLAGS := $(KBUILD_HOSTLDFLAGS)
|
||||
|
||||
OUTPUT ?= $(srctree)/tools/bpf/resolve_btfids/
|
||||
|
||||
@ -35,25 +32,33 @@ SUBCMD_SRC := $(srctree)/tools/lib/subcmd/
|
||||
BPFOBJ := $(OUTPUT)/libbpf/libbpf.a
|
||||
LIBBPF_OUT := $(abspath $(dir $(BPFOBJ)))/
|
||||
SUBCMDOBJ := $(OUTPUT)/libsubcmd/libsubcmd.a
|
||||
SUBCMD_OUT := $(abspath $(dir $(SUBCMDOBJ)))/
|
||||
|
||||
LIBBPF_DESTDIR := $(LIBBPF_OUT)
|
||||
LIBBPF_INCLUDE := $(LIBBPF_DESTDIR)include
|
||||
|
||||
SUBCMD_DESTDIR := $(SUBCMD_OUT)
|
||||
SUBCMD_INCLUDE := $(SUBCMD_DESTDIR)include
|
||||
|
||||
BINARY := $(OUTPUT)/resolve_btfids
|
||||
BINARY_IN := $(BINARY)-in.o
|
||||
|
||||
all: $(BINARY)
|
||||
|
||||
prepare: $(BPFOBJ) $(SUBCMDOBJ)
|
||||
|
||||
$(OUTPUT) $(OUTPUT)/libsubcmd $(LIBBPF_OUT):
|
||||
$(call msg,MKDIR,,$@)
|
||||
$(Q)mkdir -p $(@)
|
||||
|
||||
$(SUBCMDOBJ): fixdep FORCE | $(OUTPUT)/libsubcmd
|
||||
$(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(abspath $(dir $@))/ $(abspath $@)
|
||||
$(Q)$(MAKE) -C $(SUBCMD_SRC) OUTPUT=$(SUBCMD_OUT) \
|
||||
DESTDIR=$(SUBCMD_DESTDIR) $(HOST_OVERRIDES) prefix= subdir= \
|
||||
$(abspath $@) install_headers
|
||||
|
||||
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(LIBBPF_OUT)
|
||||
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(LIBBPF_OUT) \
|
||||
DESTDIR=$(LIBBPF_DESTDIR) prefix= EXTRA_CFLAGS="$(CFLAGS)" \
|
||||
DESTDIR=$(LIBBPF_DESTDIR) $(HOST_OVERRIDES) prefix= subdir= \
|
||||
$(abspath $@) install_headers
|
||||
|
||||
LIBELF_FLAGS := $(shell $(HOSTPKG_CONFIG) libelf --cflags 2>/dev/null)
|
||||
@ -63,7 +68,7 @@ CFLAGS += -g \
|
||||
-I$(srctree)/tools/include \
|
||||
-I$(srctree)/tools/include/uapi \
|
||||
-I$(LIBBPF_INCLUDE) \
|
||||
-I$(SUBCMD_SRC) \
|
||||
-I$(SUBCMD_INCLUDE) \
|
||||
$(LIBELF_FLAGS)
|
||||
|
||||
LIBS = $(LIBELF_LIBS) -lz
|
||||
@ -71,19 +76,20 @@ LIBS = $(LIBELF_LIBS) -lz
|
||||
export srctree OUTPUT CFLAGS Q
|
||||
include $(srctree)/tools/build/Makefile.include
|
||||
|
||||
$(BINARY_IN): $(BPFOBJ) fixdep FORCE | $(OUTPUT)
|
||||
$(Q)$(MAKE) $(build)=resolve_btfids
|
||||
$(BINARY_IN): fixdep FORCE prepare | $(OUTPUT)
|
||||
$(Q)$(MAKE) $(build)=resolve_btfids $(HOST_OVERRIDES)
|
||||
|
||||
$(BINARY): $(BPFOBJ) $(SUBCMDOBJ) $(BINARY_IN)
|
||||
$(call msg,LINK,$@)
|
||||
$(Q)$(CC) $(BINARY_IN) $(LDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS)
|
||||
$(Q)$(HOSTCC) $(BINARY_IN) $(KBUILD_HOSTLDFLAGS) -o $@ $(BPFOBJ) $(SUBCMDOBJ) $(LIBS)
|
||||
|
||||
clean_objects := $(wildcard $(OUTPUT)/*.o \
|
||||
$(OUTPUT)/.*.o.cmd \
|
||||
$(OUTPUT)/.*.o.d \
|
||||
$(LIBBPF_OUT) \
|
||||
$(LIBBPF_DESTDIR) \
|
||||
$(OUTPUT)/libsubcmd \
|
||||
$(SUBCMD_OUT) \
|
||||
$(SUBCMD_DESTDIR) \
|
||||
$(OUTPUT)/resolve_btfids)
|
||||
|
||||
ifneq ($(clean_objects),)
|
||||
@ -100,4 +106,4 @@ tags:
|
||||
|
||||
FORCE:
|
||||
|
||||
.PHONY: all FORCE clean tags
|
||||
.PHONY: all FORCE clean tags prepare
|
||||
|
@ -75,7 +75,7 @@
|
||||
#include <linux/err.h>
|
||||
#include <bpf/btf.h>
|
||||
#include <bpf/libbpf.h>
|
||||
#include <parse-options.h>
|
||||
#include <subcmd/parse-options.h>
|
||||
|
||||
#define BTF_IDS_SECTION ".BTF_ids"
|
||||
#define BTF_ID "__BTF_ID__"
|
||||
|
@ -1156,6 +1156,11 @@ enum bpf_link_type {
|
||||
*/
|
||||
#define BPF_F_XDP_HAS_FRAGS (1U << 5)
|
||||
|
||||
/* If BPF_F_XDP_DEV_BOUND_ONLY is used in BPF_PROG_LOAD command, the loaded
|
||||
* program becomes device-bound but can access XDP metadata.
|
||||
*/
|
||||
#define BPF_F_XDP_DEV_BOUND_ONLY (1U << 6)
|
||||
|
||||
/* link_create.kprobe_multi.flags used in LINK_CREATE command for
|
||||
* BPF_TRACE_KPROBE_MULTI attach type to create return probe.
|
||||
*/
|
||||
@ -2647,6 +2652,11 @@ union bpf_attr {
|
||||
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
|
||||
* L2 type as Ethernet.
|
||||
*
|
||||
* * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
|
||||
* **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
|
||||
* Indicate the new IP header version after decapsulating the outer
|
||||
* IP header. Used when the inner and outer IP versions are different.
|
||||
*
|
||||
* A call to this helper is susceptible to change the underlying
|
||||
* packet buffer. Therefore, at load time, all checks on pointers
|
||||
* previously done by the verifier are invalidated and must be
|
||||
@ -5807,6 +5817,8 @@ enum {
|
||||
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
|
||||
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
|
||||
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
|
||||
BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
|
||||
BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@ -78,6 +78,10 @@
|
||||
|
||||
#if defined(bpf_target_x86)
|
||||
|
||||
/*
|
||||
* https://en.wikipedia.org/wiki/X86_calling_conventions#System_V_AMD64_ABI
|
||||
*/
|
||||
|
||||
#if defined(__KERNEL__) || defined(__VMLINUX_H__)
|
||||
|
||||
#define __PT_PARM1_REG di
|
||||
@ -85,25 +89,40 @@
|
||||
#define __PT_PARM3_REG dx
|
||||
#define __PT_PARM4_REG cx
|
||||
#define __PT_PARM5_REG r8
|
||||
#define __PT_PARM6_REG r9
|
||||
/*
|
||||
* Syscall uses r10 for PARM4. See arch/x86/entry/entry_64.S:entry_SYSCALL_64
|
||||
* comments in Linux sources. And refer to syscall(2) manpage.
|
||||
*/
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG r10
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
|
||||
#define __PT_RET_REG sp
|
||||
#define __PT_FP_REG bp
|
||||
#define __PT_RC_REG ax
|
||||
#define __PT_SP_REG sp
|
||||
#define __PT_IP_REG ip
|
||||
/* syscall uses r10 for PARM4 */
|
||||
#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
|
||||
#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)
|
||||
|
||||
#else
|
||||
|
||||
#ifdef __i386__
|
||||
|
||||
/* i386 kernel is built with -mregparm=3 */
|
||||
#define __PT_PARM1_REG eax
|
||||
#define __PT_PARM2_REG edx
|
||||
#define __PT_PARM3_REG ecx
|
||||
/* i386 kernel is built with -mregparm=3 */
|
||||
#define __PT_PARM4_REG __unsupported__
|
||||
#define __PT_PARM5_REG __unsupported__
|
||||
/* i386 syscall ABI is very different, refer to syscall(2) manpage */
|
||||
#define __PT_PARM1_SYSCALL_REG ebx
|
||||
#define __PT_PARM2_SYSCALL_REG ecx
|
||||
#define __PT_PARM3_SYSCALL_REG edx
|
||||
#define __PT_PARM4_SYSCALL_REG esi
|
||||
#define __PT_PARM5_SYSCALL_REG edi
|
||||
#define __PT_PARM6_SYSCALL_REG ebp
|
||||
|
||||
#define __PT_RET_REG esp
|
||||
#define __PT_FP_REG ebp
|
||||
#define __PT_RC_REG eax
|
||||
@ -117,14 +136,20 @@
|
||||
#define __PT_PARM3_REG rdx
|
||||
#define __PT_PARM4_REG rcx
|
||||
#define __PT_PARM5_REG r8
|
||||
#define __PT_PARM6_REG r9
|
||||
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG r10
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
|
||||
#define __PT_RET_REG rsp
|
||||
#define __PT_FP_REG rbp
|
||||
#define __PT_RC_REG rax
|
||||
#define __PT_SP_REG rsp
|
||||
#define __PT_IP_REG rip
|
||||
/* syscall uses r10 for PARM4 */
|
||||
#define PT_REGS_PARM4_SYSCALL(x) ((x)->r10)
|
||||
#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(x, r10)
|
||||
|
||||
#endif /* __i386__ */
|
||||
|
||||
@ -132,6 +157,10 @@
|
||||
|
||||
#elif defined(bpf_target_s390)
|
||||
|
||||
/*
|
||||
* https://github.com/IBM/s390x-abi/releases/download/v1.6/lzsabi_s390x.pdf
|
||||
*/
|
||||
|
||||
struct pt_regs___s390 {
|
||||
unsigned long orig_gpr2;
|
||||
};
|
||||
@ -143,21 +172,41 @@ struct pt_regs___s390 {
|
||||
#define __PT_PARM3_REG gprs[4]
|
||||
#define __PT_PARM4_REG gprs[5]
|
||||
#define __PT_PARM5_REG gprs[6]
|
||||
|
||||
#define __PT_PARM1_SYSCALL_REG orig_gpr2
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG gprs[7]
|
||||
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
|
||||
#define PT_REGS_PARM1_CORE_SYSCALL(x) \
|
||||
BPF_CORE_READ((const struct pt_regs___s390 *)(x), __PT_PARM1_SYSCALL_REG)
|
||||
|
||||
#define __PT_RET_REG gprs[14]
|
||||
#define __PT_FP_REG gprs[11] /* Works only with CONFIG_FRAME_POINTER */
|
||||
#define __PT_RC_REG gprs[2]
|
||||
#define __PT_SP_REG gprs[15]
|
||||
#define __PT_IP_REG psw.addr
|
||||
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
|
||||
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___s390 *)(x), orig_gpr2)
|
||||
|
||||
#elif defined(bpf_target_arm)
|
||||
|
||||
/*
|
||||
* https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#machine-registers
|
||||
*/
|
||||
|
||||
#define __PT_PARM1_REG uregs[0]
|
||||
#define __PT_PARM2_REG uregs[1]
|
||||
#define __PT_PARM3_REG uregs[2]
|
||||
#define __PT_PARM4_REG uregs[3]
|
||||
#define __PT_PARM5_REG uregs[4]
|
||||
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM6_SYSCALL_REG uregs[5]
|
||||
#define __PT_PARM7_SYSCALL_REG uregs[6]
|
||||
|
||||
#define __PT_RET_REG uregs[14]
|
||||
#define __PT_FP_REG uregs[11] /* Works only with CONFIG_FRAME_POINTER */
|
||||
#define __PT_RC_REG uregs[0]
|
||||
@ -166,6 +215,10 @@ struct pt_regs___s390 {
|
||||
|
||||
#elif defined(bpf_target_arm64)
|
||||
|
||||
/*
|
||||
* https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst#machine-registers
|
||||
*/
|
||||
|
||||
struct pt_regs___arm64 {
|
||||
unsigned long orig_x0;
|
||||
};
|
||||
@ -177,21 +230,49 @@ struct pt_regs___arm64 {
|
||||
#define __PT_PARM3_REG regs[2]
|
||||
#define __PT_PARM4_REG regs[3]
|
||||
#define __PT_PARM5_REG regs[4]
|
||||
#define __PT_PARM6_REG regs[5]
|
||||
#define __PT_PARM7_REG regs[6]
|
||||
#define __PT_PARM8_REG regs[7]
|
||||
|
||||
#define __PT_PARM1_SYSCALL_REG orig_x0
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
|
||||
#define PT_REGS_PARM1_CORE_SYSCALL(x) \
|
||||
BPF_CORE_READ((const struct pt_regs___arm64 *)(x), __PT_PARM1_SYSCALL_REG)
|
||||
|
||||
#define __PT_RET_REG regs[30]
|
||||
#define __PT_FP_REG regs[29] /* Works only with CONFIG_FRAME_POINTER */
|
||||
#define __PT_RC_REG regs[0]
|
||||
#define __PT_SP_REG sp
|
||||
#define __PT_IP_REG pc
|
||||
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1_CORE_SYSCALL(x)
|
||||
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ((const struct pt_regs___arm64 *)(x), orig_x0)
|
||||
|
||||
#elif defined(bpf_target_mips)
|
||||
|
||||
/*
|
||||
* N64 ABI is assumed right now.
|
||||
* https://en.wikipedia.org/wiki/MIPS_architecture#Calling_conventions
|
||||
*/
|
||||
|
||||
#define __PT_PARM1_REG regs[4]
|
||||
#define __PT_PARM2_REG regs[5]
|
||||
#define __PT_PARM3_REG regs[6]
|
||||
#define __PT_PARM4_REG regs[7]
|
||||
#define __PT_PARM5_REG regs[8]
|
||||
#define __PT_PARM6_REG regs[9]
|
||||
#define __PT_PARM7_REG regs[10]
|
||||
#define __PT_PARM8_REG regs[11]
|
||||
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG /* only N32/N64 */
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG /* only N32/N64 */
|
||||
|
||||
#define __PT_RET_REG regs[31]
|
||||
#define __PT_FP_REG regs[30] /* Works only with CONFIG_FRAME_POINTER */
|
||||
#define __PT_RC_REG regs[2]
|
||||
@ -200,26 +281,58 @@ struct pt_regs___arm64 {
|
||||
|
||||
#elif defined(bpf_target_powerpc)
|
||||
|
||||
/*
|
||||
* http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf (page 3-14,
|
||||
* section "Function Calling Sequence")
|
||||
*/
|
||||
|
||||
#define __PT_PARM1_REG gpr[3]
|
||||
#define __PT_PARM2_REG gpr[4]
|
||||
#define __PT_PARM3_REG gpr[5]
|
||||
#define __PT_PARM4_REG gpr[6]
|
||||
#define __PT_PARM5_REG gpr[7]
|
||||
#define __PT_PARM6_REG gpr[8]
|
||||
#define __PT_PARM7_REG gpr[9]
|
||||
#define __PT_PARM8_REG gpr[10]
|
||||
|
||||
/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
#define __PT_PARM1_SYSCALL_REG orig_gpr3
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
#if !defined(__arch64__)
|
||||
#define __PT_PARM7_SYSCALL_REG __PT_PARM7_REG /* only powerpc (not powerpc64) */
|
||||
#endif
|
||||
|
||||
#define __PT_RET_REG regs[31]
|
||||
#define __PT_FP_REG __unsupported__
|
||||
#define __PT_RC_REG gpr[3]
|
||||
#define __PT_SP_REG sp
|
||||
#define __PT_IP_REG nip
|
||||
/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
|
||||
#elif defined(bpf_target_sparc)
|
||||
|
||||
/*
|
||||
* https://en.wikipedia.org/wiki/Calling_convention#SPARC
|
||||
*/
|
||||
|
||||
#define __PT_PARM1_REG u_regs[UREG_I0]
|
||||
#define __PT_PARM2_REG u_regs[UREG_I1]
|
||||
#define __PT_PARM3_REG u_regs[UREG_I2]
|
||||
#define __PT_PARM4_REG u_regs[UREG_I3]
|
||||
#define __PT_PARM5_REG u_regs[UREG_I4]
|
||||
#define __PT_PARM6_REG u_regs[UREG_I5]
|
||||
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
|
||||
#define __PT_RET_REG u_regs[UREG_I7]
|
||||
#define __PT_FP_REG __unsupported__
|
||||
#define __PT_RC_REG u_regs[UREG_I0]
|
||||
@ -233,22 +346,42 @@ struct pt_regs___arm64 {
|
||||
|
||||
#elif defined(bpf_target_riscv)
|
||||
|
||||
/*
|
||||
* https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc#risc-v-calling-conventions
|
||||
*/
|
||||
|
||||
#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
|
||||
#define __PT_PARM1_REG a0
|
||||
#define __PT_PARM2_REG a1
|
||||
#define __PT_PARM3_REG a2
|
||||
#define __PT_PARM4_REG a3
|
||||
#define __PT_PARM5_REG a4
|
||||
#define __PT_PARM6_REG a5
|
||||
#define __PT_PARM7_REG a6
|
||||
#define __PT_PARM8_REG a7
|
||||
|
||||
/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
|
||||
#define __PT_RET_REG ra
|
||||
#define __PT_FP_REG s0
|
||||
#define __PT_RC_REG a0
|
||||
#define __PT_SP_REG sp
|
||||
#define __PT_IP_REG pc
|
||||
/* riscv does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
|
||||
#elif defined(bpf_target_arc)
|
||||
|
||||
/*
|
||||
* Section "Function Calling Sequence" (page 24):
|
||||
* https://raw.githubusercontent.com/wiki/foss-for-synopsys-dwc-arc-processors/toolchain/files/ARCv2_ABI.pdf
|
||||
*/
|
||||
|
||||
/* arc provides struct user_pt_regs instead of struct pt_regs to userspace */
|
||||
#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x))
|
||||
#define __PT_PARM1_REG scratch.r0
|
||||
@ -256,30 +389,55 @@ struct pt_regs___arm64 {
|
||||
#define __PT_PARM3_REG scratch.r2
|
||||
#define __PT_PARM4_REG scratch.r3
|
||||
#define __PT_PARM5_REG scratch.r4
|
||||
#define __PT_PARM6_REG scratch.r5
|
||||
#define __PT_PARM7_REG scratch.r6
|
||||
#define __PT_PARM8_REG scratch.r7
|
||||
|
||||
/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
|
||||
#define __PT_RET_REG scratch.blink
|
||||
#define __PT_FP_REG __unsupported__
|
||||
#define __PT_FP_REG scratch.fp
|
||||
#define __PT_RC_REG scratch.r0
|
||||
#define __PT_SP_REG scratch.sp
|
||||
#define __PT_IP_REG scratch.ret
|
||||
/* arc does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
|
||||
#elif defined(bpf_target_loongarch)
|
||||
|
||||
/* https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html */
|
||||
/*
|
||||
* https://docs.kernel.org/loongarch/introduction.html
|
||||
* https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html
|
||||
*/
|
||||
|
||||
#define __PT_PARM1_REG regs[4]
|
||||
#define __PT_PARM2_REG regs[5]
|
||||
#define __PT_PARM3_REG regs[6]
|
||||
#define __PT_PARM4_REG regs[7]
|
||||
#define __PT_PARM5_REG regs[8]
|
||||
#define __PT_PARM6_REG regs[9]
|
||||
#define __PT_PARM7_REG regs[10]
|
||||
#define __PT_PARM8_REG regs[11]
|
||||
|
||||
/* loongarch does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
#define __PT_PARM1_SYSCALL_REG __PT_PARM1_REG
|
||||
#define __PT_PARM2_SYSCALL_REG __PT_PARM2_REG
|
||||
#define __PT_PARM3_SYSCALL_REG __PT_PARM3_REG
|
||||
#define __PT_PARM4_SYSCALL_REG __PT_PARM4_REG
|
||||
#define __PT_PARM5_SYSCALL_REG __PT_PARM5_REG
|
||||
#define __PT_PARM6_SYSCALL_REG __PT_PARM6_REG
|
||||
|
||||
#define __PT_RET_REG regs[1]
|
||||
#define __PT_FP_REG regs[22]
|
||||
#define __PT_RC_REG regs[4]
|
||||
#define __PT_SP_REG regs[3]
|
||||
#define __PT_IP_REG csr_era
|
||||
/* loongarch does not select ARCH_HAS_SYSCALL_WRAPPER. */
|
||||
#define PT_REGS_SYSCALL_REGS(ctx) ctx
|
||||
|
||||
#endif
|
||||
|
||||
@ -287,16 +445,49 @@ struct pt_regs___arm64 {
|
||||
|
||||
struct pt_regs;
|
||||
|
||||
/* allow some architecutres to override `struct pt_regs` */
|
||||
/* allow some architectures to override `struct pt_regs` */
|
||||
#ifndef __PT_REGS_CAST
|
||||
#define __PT_REGS_CAST(x) (x)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Different architectures support different number of arguments passed
|
||||
* through registers. i386 supports just 3, some arches support up to 8.
|
||||
*/
|
||||
#ifndef __PT_PARM4_REG
|
||||
#define __PT_PARM4_REG __unsupported__
|
||||
#endif
|
||||
#ifndef __PT_PARM5_REG
|
||||
#define __PT_PARM5_REG __unsupported__
|
||||
#endif
|
||||
#ifndef __PT_PARM6_REG
|
||||
#define __PT_PARM6_REG __unsupported__
|
||||
#endif
|
||||
#ifndef __PT_PARM7_REG
|
||||
#define __PT_PARM7_REG __unsupported__
|
||||
#endif
|
||||
#ifndef __PT_PARM8_REG
|
||||
#define __PT_PARM8_REG __unsupported__
|
||||
#endif
|
||||
/*
|
||||
* Similarly, syscall-specific conventions might differ between function call
|
||||
* conventions within each architecutre. All supported architectures pass
|
||||
* either 6 or 7 syscall arguments in registers.
|
||||
*
|
||||
* See syscall(2) manpage for succinct table with information on each arch.
|
||||
*/
|
||||
#ifndef __PT_PARM7_SYSCALL_REG
|
||||
#define __PT_PARM7_SYSCALL_REG __unsupported__
|
||||
#endif
|
||||
|
||||
#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG)
|
||||
#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG)
|
||||
#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG)
|
||||
#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG)
|
||||
#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG)
|
||||
#define PT_REGS_PARM6(x) (__PT_REGS_CAST(x)->__PT_PARM6_REG)
|
||||
#define PT_REGS_PARM7(x) (__PT_REGS_CAST(x)->__PT_PARM7_REG)
|
||||
#define PT_REGS_PARM8(x) (__PT_REGS_CAST(x)->__PT_PARM8_REG)
|
||||
#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG)
|
||||
#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG)
|
||||
#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG)
|
||||
@ -308,6 +499,9 @@ struct pt_regs;
|
||||
#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG)
|
||||
#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG)
|
||||
#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG)
|
||||
#define PT_REGS_PARM6_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_REG)
|
||||
#define PT_REGS_PARM7_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_REG)
|
||||
#define PT_REGS_PARM8_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM8_REG)
|
||||
#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG)
|
||||
#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG)
|
||||
#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG)
|
||||
@ -334,24 +528,33 @@ struct pt_regs;
|
||||
#endif
|
||||
|
||||
#ifndef PT_REGS_PARM1_SYSCALL
|
||||
#define PT_REGS_PARM1_SYSCALL(x) PT_REGS_PARM1(x)
|
||||
#define PT_REGS_PARM1_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM1_SYSCALL_REG)
|
||||
#define PT_REGS_PARM1_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_SYSCALL_REG)
|
||||
#endif
|
||||
#ifndef PT_REGS_PARM2_SYSCALL
|
||||
#define PT_REGS_PARM2_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM2_SYSCALL_REG)
|
||||
#define PT_REGS_PARM2_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_SYSCALL_REG)
|
||||
#endif
|
||||
#ifndef PT_REGS_PARM3_SYSCALL
|
||||
#define PT_REGS_PARM3_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM3_SYSCALL_REG)
|
||||
#define PT_REGS_PARM3_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_SYSCALL_REG)
|
||||
#endif
|
||||
#define PT_REGS_PARM2_SYSCALL(x) PT_REGS_PARM2(x)
|
||||
#define PT_REGS_PARM3_SYSCALL(x) PT_REGS_PARM3(x)
|
||||
#ifndef PT_REGS_PARM4_SYSCALL
|
||||
#define PT_REGS_PARM4_SYSCALL(x) PT_REGS_PARM4(x)
|
||||
#define PT_REGS_PARM4_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM4_SYSCALL_REG)
|
||||
#define PT_REGS_PARM4_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_SYSCALL_REG)
|
||||
#endif
|
||||
#define PT_REGS_PARM5_SYSCALL(x) PT_REGS_PARM5(x)
|
||||
|
||||
#ifndef PT_REGS_PARM1_CORE_SYSCALL
|
||||
#define PT_REGS_PARM1_CORE_SYSCALL(x) PT_REGS_PARM1_CORE(x)
|
||||
#ifndef PT_REGS_PARM5_SYSCALL
|
||||
#define PT_REGS_PARM5_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM5_SYSCALL_REG)
|
||||
#define PT_REGS_PARM5_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_SYSCALL_REG)
|
||||
#endif
|
||||
#define PT_REGS_PARM2_CORE_SYSCALL(x) PT_REGS_PARM2_CORE(x)
|
||||
#define PT_REGS_PARM3_CORE_SYSCALL(x) PT_REGS_PARM3_CORE(x)
|
||||
#ifndef PT_REGS_PARM4_CORE_SYSCALL
|
||||
#define PT_REGS_PARM4_CORE_SYSCALL(x) PT_REGS_PARM4_CORE(x)
|
||||
#ifndef PT_REGS_PARM6_SYSCALL
|
||||
#define PT_REGS_PARM6_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM6_SYSCALL_REG)
|
||||
#define PT_REGS_PARM6_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM6_SYSCALL_REG)
|
||||
#endif
|
||||
#ifndef PT_REGS_PARM7_SYSCALL
|
||||
#define PT_REGS_PARM7_SYSCALL(x) (__PT_REGS_CAST(x)->__PT_PARM7_SYSCALL_REG)
|
||||
#define PT_REGS_PARM7_CORE_SYSCALL(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM7_SYSCALL_REG)
|
||||
#endif
|
||||
#define PT_REGS_PARM5_CORE_SYSCALL(x) PT_REGS_PARM5_CORE(x)
|
||||
|
||||
#else /* defined(bpf_target_defined) */
|
||||
|
||||
@ -360,6 +563,9 @@ struct pt_regs;
|
||||
#define PT_REGS_PARM3(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM4(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM5(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM6(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM7(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM8(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_RET(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_FP(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_RC(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
@ -371,6 +577,9 @@ struct pt_regs;
|
||||
#define PT_REGS_PARM3_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM4_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM5_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM6_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM7_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM8_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_RET_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_FP_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_RC_CORE(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
@ -385,12 +594,16 @@ struct pt_regs;
|
||||
#define PT_REGS_PARM3_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM4_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM5_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM6_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM7_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
|
||||
#define PT_REGS_PARM1_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM2_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM3_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM4_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM5_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM6_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
#define PT_REGS_PARM7_CORE_SYSCALL(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })
|
||||
|
||||
#endif /* defined(bpf_target_defined) */
|
||||
|
||||
@ -576,6 +789,9 @@ struct pt_regs;
|
||||
#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
|
||||
#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
|
||||
#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
|
||||
#define ___bpf_kprobe_args6(x, args...) ___bpf_kprobe_args5(args), (void *)PT_REGS_PARM6(ctx)
|
||||
#define ___bpf_kprobe_args7(x, args...) ___bpf_kprobe_args6(args), (void *)PT_REGS_PARM7(ctx)
|
||||
#define ___bpf_kprobe_args8(x, args...) ___bpf_kprobe_args7(args), (void *)PT_REGS_PARM8(ctx)
|
||||
#define ___bpf_kprobe_args(args...) ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
|
||||
|
||||
/*
|
||||
@ -632,6 +848,8 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
|
||||
#define ___bpf_syscall_args3(x, args...) ___bpf_syscall_args2(args), (void *)PT_REGS_PARM3_SYSCALL(regs)
|
||||
#define ___bpf_syscall_args4(x, args...) ___bpf_syscall_args3(args), (void *)PT_REGS_PARM4_SYSCALL(regs)
|
||||
#define ___bpf_syscall_args5(x, args...) ___bpf_syscall_args4(args), (void *)PT_REGS_PARM5_SYSCALL(regs)
|
||||
#define ___bpf_syscall_args6(x, args...) ___bpf_syscall_args5(args), (void *)PT_REGS_PARM6_SYSCALL(regs)
|
||||
#define ___bpf_syscall_args7(x, args...) ___bpf_syscall_args6(args), (void *)PT_REGS_PARM7_SYSCALL(regs)
|
||||
#define ___bpf_syscall_args(args...) ___bpf_apply(___bpf_syscall_args, ___bpf_narg(args))(args)
|
||||
|
||||
/* If kernel doesn't have CONFIG_ARCH_HAS_SYSCALL_WRAPPER, we have to BPF_CORE_READ from pt_regs */
|
||||
@ -641,6 +859,8 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
|
||||
#define ___bpf_syswrap_args3(x, args...) ___bpf_syswrap_args2(args), (void *)PT_REGS_PARM3_CORE_SYSCALL(regs)
|
||||
#define ___bpf_syswrap_args4(x, args...) ___bpf_syswrap_args3(args), (void *)PT_REGS_PARM4_CORE_SYSCALL(regs)
|
||||
#define ___bpf_syswrap_args5(x, args...) ___bpf_syswrap_args4(args), (void *)PT_REGS_PARM5_CORE_SYSCALL(regs)
|
||||
#define ___bpf_syswrap_args6(x, args...) ___bpf_syswrap_args5(args), (void *)PT_REGS_PARM6_CORE_SYSCALL(regs)
|
||||
#define ___bpf_syswrap_args7(x, args...) ___bpf_syswrap_args6(args), (void *)PT_REGS_PARM7_CORE_SYSCALL(regs)
|
||||
#define ___bpf_syswrap_args(args...) ___bpf_apply(___bpf_syswrap_args, ___bpf_narg(args))(args)
|
||||
|
||||
/*
|
||||
@ -690,4 +910,11 @@ ____##name(struct pt_regs *ctx, ##args)
|
||||
|
||||
#define BPF_KPROBE_SYSCALL BPF_KSYSCALL
|
||||
|
||||
/* BPF_UPROBE and BPF_URETPROBE are identical to BPF_KPROBE and BPF_KRETPROBE,
|
||||
* but are named way less confusingly for SEC("uprobe") and SEC("uretprobe")
|
||||
* use cases.
|
||||
*/
|
||||
#define BPF_UPROBE(name, args...) BPF_KPROBE(name, ##args)
|
||||
#define BPF_URETPROBE(name, args...) BPF_KRETPROBE(name, ##args)
|
||||
|
||||
#endif
|
||||
|
@ -7355,7 +7355,7 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
|
||||
if (!bpf_map__is_internal(m))
|
||||
continue;
|
||||
if (!kernel_supports(obj, FEAT_ARRAY_MMAP))
|
||||
m->def.map_flags ^= BPF_F_MMAPABLE;
|
||||
m->def.map_flags &= ~BPF_F_MMAPABLE;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -8605,6 +8605,7 @@ static const struct bpf_sec_def section_defs[] = {
|
||||
SEC_DEF("cgroup/setsockopt", CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT, SEC_ATTACHABLE),
|
||||
SEC_DEF("cgroup/dev", CGROUP_DEVICE, BPF_CGROUP_DEVICE, SEC_ATTACHABLE_OPT),
|
||||
SEC_DEF("struct_ops+", STRUCT_OPS, 0, SEC_NONE),
|
||||
SEC_DEF("struct_ops.s+", STRUCT_OPS, 0, SEC_SLEEPABLE),
|
||||
SEC_DEF("sk_lookup", SK_LOOKUP, BPF_SK_LOOKUP, SEC_ATTACHABLE),
|
||||
};
|
||||
|
||||
@ -9994,9 +9995,16 @@ static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,
|
||||
const char *kfunc_name, size_t offset)
|
||||
{
|
||||
static int index = 0;
|
||||
int i;
|
||||
|
||||
snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset,
|
||||
__sync_fetch_and_add(&index, 1));
|
||||
|
||||
/* sanitize binary_path in the probe name */
|
||||
for (i = 0; buf[i]; i++) {
|
||||
if (!isalnum(buf[i]))
|
||||
buf[i] = '_';
|
||||
}
|
||||
}
|
||||
|
||||
static int add_kprobe_event_legacy(const char *probe_name, bool retprobe,
|
||||
|
@ -233,11 +233,30 @@ LIBBPF_API int bpf_object__load(struct bpf_object *obj);
|
||||
*/
|
||||
LIBBPF_API void bpf_object__close(struct bpf_object *obj);
|
||||
|
||||
/* pin_maps and unpin_maps can both be called with a NULL path, in which case
|
||||
* they will use the pin_path attribute of each map (and ignore all maps that
|
||||
* don't have a pin_path set).
|
||||
/**
|
||||
* @brief **bpf_object__pin_maps()** pins each map contained within
|
||||
* the BPF object at the passed directory.
|
||||
* @param obj Pointer to a valid BPF object
|
||||
* @param path A directory where maps should be pinned.
|
||||
* @return 0, on success; negative error code, otherwise
|
||||
*
|
||||
* If `path` is NULL `bpf_map__pin` (which is being used on each map)
|
||||
* will use the pin_path attribute of each map. In this case, maps that
|
||||
* don't have a pin_path set will be ignored.
|
||||
*/
|
||||
LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);
|
||||
|
||||
/**
|
||||
* @brief **bpf_object__unpin_maps()** unpins each map contained within
|
||||
* the BPF object found in the passed directory.
|
||||
* @param obj Pointer to a valid BPF object
|
||||
* @param path A directory where pinned maps should be searched for.
|
||||
* @return 0, on success; negative error code, otherwise
|
||||
*
|
||||
* If `path` is NULL `bpf_map__unpin` (which is being used on each map)
|
||||
* will use the pin_path attribute of each map. In this case, maps that
|
||||
* don't have a pin_path set will be ignored.
|
||||
*/
|
||||
LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,
|
||||
const char *path);
|
||||
LIBBPF_API int bpf_object__pin_programs(struct bpf_object *obj,
|
||||
@ -848,10 +867,57 @@ LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize
|
||||
* @return true, if the map is an internal map; false, otherwise
|
||||
*/
|
||||
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map__set_pin_path()** sets the path attribute that tells where the
|
||||
* BPF map should be pinned. This does not actually create the 'pin'.
|
||||
* @param map The bpf_map
|
||||
* @param path The path
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*/
|
||||
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map__pin_path()** gets the path attribute that tells where the
|
||||
* BPF map should be pinned.
|
||||
* @param map The bpf_map
|
||||
* @return The path string; which can be NULL
|
||||
*/
|
||||
LIBBPF_API const char *bpf_map__pin_path(const struct bpf_map *map);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map__is_pinned()** tells the caller whether or not the
|
||||
* passed map has been pinned via a 'pin' file.
|
||||
* @param map The bpf_map
|
||||
* @return true, if the map is pinned; false, otherwise
|
||||
*/
|
||||
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map__pin()** creates a file that serves as a 'pin'
|
||||
* for the BPF map. This increments the reference count on the
|
||||
* BPF map which will keep the BPF map loaded even after the
|
||||
* userspace process which loaded it has exited.
|
||||
* @param map The bpf_map to pin
|
||||
* @param path A file path for the 'pin'
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*
|
||||
* If `path` is NULL the maps `pin_path` attribute will be used. If this is
|
||||
* also NULL, an error will be returned and the map will not be pinned.
|
||||
*/
|
||||
LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
|
||||
|
||||
/**
|
||||
* @brief **bpf_map__unpin()** removes the file that serves as a
|
||||
* 'pin' for the BPF map.
|
||||
* @param map The bpf_map to unpin
|
||||
* @param path A file path for the 'pin'
|
||||
* @return 0, on success; negative error, otherwise
|
||||
*
|
||||
* The `path` parameter can be NULL, in which case the `pin_path`
|
||||
* map attribute is unpinned. If both the `path` parameter and
|
||||
* `pin_path` map attribute are set, they must be equal.
|
||||
*/
|
||||
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
|
||||
|
||||
LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
|
||||
@ -1064,7 +1130,8 @@ struct user_ring_buffer_opts {
|
||||
|
||||
#define user_ring_buffer_opts__last_field sz
|
||||
|
||||
/* @brief **user_ring_buffer__new()** creates a new instance of a user ring
|
||||
/**
|
||||
* @brief **user_ring_buffer__new()** creates a new instance of a user ring
|
||||
* buffer.
|
||||
*
|
||||
* @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map.
|
||||
@ -1075,7 +1142,8 @@ struct user_ring_buffer_opts {
|
||||
LIBBPF_API struct user_ring_buffer *
|
||||
user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);
|
||||
|
||||
/* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
|
||||
/**
|
||||
* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
|
||||
* user ring buffer.
|
||||
* @param rb A pointer to a user ring buffer.
|
||||
* @param size The size of the sample, in bytes.
|
||||
@ -1095,7 +1163,8 @@ user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);
|
||||
*/
|
||||
LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
|
||||
|
||||
/* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
|
||||
/**
|
||||
* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
|
||||
* ring buffer, possibly blocking for up to @timeout_ms until a sample becomes
|
||||
* available.
|
||||
* @param rb The user ring buffer.
|
||||
@ -1139,7 +1208,8 @@ LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
|
||||
__u32 size,
|
||||
int timeout_ms);
|
||||
|
||||
/* @brief **user_ring_buffer__submit()** submits a previously reserved sample
|
||||
/**
|
||||
* @brief **user_ring_buffer__submit()** submits a previously reserved sample
|
||||
* into the ring buffer.
|
||||
* @param rb The user ring buffer.
|
||||
* @param sample A reserved sample.
|
||||
@ -1149,7 +1219,8 @@ LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
|
||||
*/
|
||||
LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
|
||||
|
||||
/* @brief **user_ring_buffer__discard()** discards a previously reserved sample.
|
||||
/**
|
||||
* @brief **user_ring_buffer__discard()** discards a previously reserved sample.
|
||||
* @param rb The user ring buffer.
|
||||
* @param sample A reserved sample.
|
||||
*
|
||||
@ -1158,7 +1229,8 @@ LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *samp
|
||||
*/
|
||||
LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample);
|
||||
|
||||
/* @brief **user_ring_buffer__free()** frees a ring buffer that was previously
|
||||
/**
|
||||
* @brief **user_ring_buffer__free()** frees a ring buffer that was previously
|
||||
* created with **user_ring_buffer__new()**.
|
||||
* @param rb The user ring buffer being freed.
|
||||
*/
|
||||
|
@ -20,8 +20,8 @@
|
||||
/* make sure libbpf doesn't use kernel-only integer typedefs */
|
||||
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
|
||||
|
||||
/* prevent accidental re-addition of reallocarray() */
|
||||
#pragma GCC poison reallocarray
|
||||
/* prevent accidental re-addition of reallocarray()/strlcpy() */
|
||||
#pragma GCC poison reallocarray strlcpy
|
||||
|
||||
#include "libbpf.h"
|
||||
#include "btf.h"
|
||||
|
1
tools/testing/selftests/bpf/.gitignore
vendored
1
tools/testing/selftests/bpf/.gitignore
vendored
@ -47,3 +47,4 @@ test_cpp
|
||||
xskxceiver
|
||||
xdp_redirect_multi
|
||||
xdp_synproxy
|
||||
xdp_hw_metadata
|
||||
|
@ -13,6 +13,7 @@ cgroup_hierarchical_stats # JIT does not support calling kernel f
|
||||
cgrp_kfunc # JIT does not support calling kernel function
|
||||
cgrp_local_storage # prog_attach unexpected error: -524 (trampoline)
|
||||
core_read_macros # unknown func bpf_probe_read#4 (overlapping)
|
||||
cpumask # JIT does not support calling kernel function
|
||||
d_path # failed to auto-attach program 'prog_stat': -524 (trampoline)
|
||||
decap_sanity # JIT does not support calling kernel function (kfunc)
|
||||
deny_namespace # failed to attach: ERROR: strerror_r(-524)=22 (trampoline)
|
||||
@ -44,6 +45,7 @@ map_kptr # failed to open_and_load program: -524
|
||||
modify_return # modify_return attach failed: -524 (trampoline)
|
||||
module_attach # skel_attach skeleton attach failed: -524 (trampoline)
|
||||
mptcp
|
||||
nested_trust # JIT does not support calling kernel function
|
||||
netcnt # failed to load BPF skeleton 'netcnt_prog': -7 (?)
|
||||
probe_user # check_kprobe_res wrong kprobe res from probe read (?)
|
||||
rcu_read_lock # failed to find kernel BTF type ID of '__x64_sys_getpgid': -3 (?)
|
||||
@ -86,5 +88,6 @@ xdp_adjust_tail # case-128 err 0 errno 28 retval 1 size
|
||||
xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
|
||||
xdp_bpf2bpf # failed to auto-attach program 'trace_on_entry': -524 (trampoline)
|
||||
xdp_do_redirect # prog_run_max_size unexpected error: -22 (errno 22)
|
||||
xdp_metadata # JIT does not support calling kernel function (kfunc)
|
||||
xdp_synproxy # JIT does not support calling kernel function (kfunc)
|
||||
xfrm_info # JIT does not support calling kernel function (kfunc)
|
||||
|
@ -83,7 +83,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
|
||||
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
|
||||
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
|
||||
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
|
||||
xskxceiver xdp_redirect_multi xdp_synproxy veristat
|
||||
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata
|
||||
|
||||
TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file
|
||||
TEST_GEN_FILES += liburandom_read.so
|
||||
@ -181,14 +181,15 @@ endif
|
||||
# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
|
||||
$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
|
||||
$(call msg,LIB,,$@)
|
||||
$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) \
|
||||
$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \
|
||||
$^ $(filter-out -static,$(LDLIBS)) \
|
||||
-fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
|
||||
-fPIC -shared -o $@
|
||||
|
||||
$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
|
||||
$(call msg,BINARY,,$@)
|
||||
$(Q)$(CLANG) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
|
||||
liburandom_read.so $(LDLIBS) \
|
||||
liburandom_read.so $(filter-out -static,$(LDLIBS)) \
|
||||
-fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
|
||||
-Wl,-rpath=. -o $@
|
||||
|
||||
@ -240,7 +241,6 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
|
||||
$(OUTPUT)/test_maps: $(TESTING_HELPERS)
|
||||
$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS)
|
||||
$(OUTPUT)/xsk.o: $(BPFOBJ)
|
||||
$(OUTPUT)/xskxceiver: $(OUTPUT)/xsk.o
|
||||
|
||||
BPFTOOL ?= $(DEFAULT_BPFTOOL)
|
||||
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
|
||||
@ -383,6 +383,8 @@ linked_maps.skel.h-deps := linked_maps1.bpf.o linked_maps2.bpf.o
|
||||
test_subskeleton.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o test_subskeleton.bpf.o
|
||||
test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o
|
||||
test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
|
||||
xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
|
||||
xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
|
||||
|
||||
LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
|
||||
|
||||
@ -527,7 +529,7 @@ TRUNNER_BPF_PROGS_DIR := progs
|
||||
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
|
||||
network_helpers.c testing_helpers.c \
|
||||
btf_helpers.c flow_dissector_load.h \
|
||||
cap_helpers.c test_loader.c
|
||||
cap_helpers.c test_loader.c xsk.c
|
||||
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
|
||||
$(OUTPUT)/liburandom_read.so \
|
||||
$(OUTPUT)/xdp_synproxy \
|
||||
@ -576,6 +578,14 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
|
||||
$(call msg,BINARY,,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
|
||||
|
||||
$(OUTPUT)/xskxceiver: xskxceiver.c $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
|
||||
$(call msg,BINARY,,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
|
||||
|
||||
$(OUTPUT)/xdp_hw_metadata: xdp_hw_metadata.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xdp_hw_metadata.skel.h | $(OUTPUT)
|
||||
$(call msg,BINARY,,$@)
|
||||
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
|
||||
|
||||
# Make sure we are able to include and link libbpf against c++.
|
||||
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
|
||||
$(call msg,CXX,,$@)
|
||||
|
@ -93,11 +93,11 @@ static struct {
|
||||
const char *prog_name;
|
||||
const char *expected_err_msg;
|
||||
} failure_tests[] = {
|
||||
{"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
|
||||
{"cgrp_kfunc_acquire_untrusted", "Possibly NULL pointer passed to trusted arg0"},
|
||||
{"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"},
|
||||
{"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
|
||||
{"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
|
||||
{"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup must point"},
|
||||
{"cgrp_kfunc_acquire_null", "Possibly NULL pointer passed to trusted arg0"},
|
||||
{"cgrp_kfunc_acquire_unreleased", "Unreleased reference"},
|
||||
{"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
|
||||
{"cgrp_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
|
||||
|
74
tools/testing/selftests/bpf/prog_tests/cpumask.c
Normal file
74
tools/testing/selftests/bpf/prog_tests/cpumask.c
Normal file
@ -0,0 +1,74 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include <test_progs.h>
|
||||
#include "cpumask_failure.skel.h"
|
||||
#include "cpumask_success.skel.h"
|
||||
|
||||
static const char * const cpumask_success_testcases[] = {
|
||||
"test_alloc_free_cpumask",
|
||||
"test_set_clear_cpu",
|
||||
"test_setall_clear_cpu",
|
||||
"test_first_firstzero_cpu",
|
||||
"test_test_and_set_clear",
|
||||
"test_and_or_xor",
|
||||
"test_intersects_subset",
|
||||
"test_copy_any_anyand",
|
||||
"test_insert_leave",
|
||||
"test_insert_remove_release",
|
||||
"test_insert_kptr_get_release",
|
||||
};
|
||||
|
||||
static void verify_success(const char *prog_name)
|
||||
{
|
||||
struct cpumask_success *skel;
|
||||
struct bpf_program *prog;
|
||||
struct bpf_link *link = NULL;
|
||||
pid_t child_pid;
|
||||
int status;
|
||||
|
||||
skel = cpumask_success__open();
|
||||
if (!ASSERT_OK_PTR(skel, "cpumask_success__open"))
|
||||
return;
|
||||
|
||||
skel->bss->pid = getpid();
|
||||
skel->bss->nr_cpus = libbpf_num_possible_cpus();
|
||||
|
||||
cpumask_success__load(skel);
|
||||
if (!ASSERT_OK_PTR(skel, "cpumask_success__load"))
|
||||
goto cleanup;
|
||||
|
||||
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
|
||||
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
|
||||
goto cleanup;
|
||||
|
||||
link = bpf_program__attach(prog);
|
||||
if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
|
||||
goto cleanup;
|
||||
|
||||
child_pid = fork();
|
||||
if (!ASSERT_GT(child_pid, -1, "child_pid"))
|
||||
goto cleanup;
|
||||
if (child_pid == 0)
|
||||
_exit(0);
|
||||
waitpid(child_pid, &status, 0);
|
||||
ASSERT_OK(skel->bss->err, "post_wait_err");
|
||||
|
||||
cleanup:
|
||||
bpf_link__destroy(link);
|
||||
cpumask_success__destroy(skel);
|
||||
}
|
||||
|
||||
void test_cpumask(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(cpumask_success_testcases); i++) {
|
||||
if (!test__start_subtest(cpumask_success_testcases[i]))
|
||||
continue;
|
||||
|
||||
verify_success(cpumask_success_testcases[i]);
|
||||
}
|
||||
|
||||
RUN_TESTS(cpumask_failure);
|
||||
}
|
@ -1,7 +1,8 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
|
||||
#include <test_progs.h>
|
||||
#include "dummy_st_ops.skel.h"
|
||||
#include "dummy_st_ops_success.skel.h"
|
||||
#include "dummy_st_ops_fail.skel.h"
|
||||
#include "trace_dummy_st_ops.skel.h"
|
||||
|
||||
/* Need to keep consistent with definition in include/linux/bpf.h */
|
||||
@ -11,17 +12,17 @@ struct bpf_dummy_ops_state {
|
||||
|
||||
static void test_dummy_st_ops_attach(void)
|
||||
{
|
||||
struct dummy_st_ops *skel;
|
||||
struct dummy_st_ops_success *skel;
|
||||
struct bpf_link *link;
|
||||
|
||||
skel = dummy_st_ops__open_and_load();
|
||||
skel = dummy_st_ops_success__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
|
||||
return;
|
||||
|
||||
link = bpf_map__attach_struct_ops(skel->maps.dummy_1);
|
||||
ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach");
|
||||
|
||||
dummy_st_ops__destroy(skel);
|
||||
dummy_st_ops_success__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_dummy_init_ret_value(void)
|
||||
@ -31,10 +32,10 @@ static void test_dummy_init_ret_value(void)
|
||||
.ctx_in = args,
|
||||
.ctx_size_in = sizeof(args),
|
||||
);
|
||||
struct dummy_st_ops *skel;
|
||||
struct dummy_st_ops_success *skel;
|
||||
int fd, err;
|
||||
|
||||
skel = dummy_st_ops__open_and_load();
|
||||
skel = dummy_st_ops_success__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
|
||||
return;
|
||||
|
||||
@ -43,7 +44,7 @@ static void test_dummy_init_ret_value(void)
|
||||
ASSERT_OK(err, "test_run");
|
||||
ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
|
||||
|
||||
dummy_st_ops__destroy(skel);
|
||||
dummy_st_ops_success__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_dummy_init_ptr_arg(void)
|
||||
@ -58,10 +59,10 @@ static void test_dummy_init_ptr_arg(void)
|
||||
.ctx_size_in = sizeof(args),
|
||||
);
|
||||
struct trace_dummy_st_ops *trace_skel;
|
||||
struct dummy_st_ops *skel;
|
||||
struct dummy_st_ops_success *skel;
|
||||
int fd, err;
|
||||
|
||||
skel = dummy_st_ops__open_and_load();
|
||||
skel = dummy_st_ops_success__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
|
||||
return;
|
||||
|
||||
@ -91,7 +92,7 @@ static void test_dummy_init_ptr_arg(void)
|
||||
ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val");
|
||||
|
||||
done:
|
||||
dummy_st_ops__destroy(skel);
|
||||
dummy_st_ops_success__destroy(skel);
|
||||
trace_dummy_st_ops__destroy(trace_skel);
|
||||
}
|
||||
|
||||
@ -102,12 +103,12 @@ static void test_dummy_multiple_args(void)
|
||||
.ctx_in = args,
|
||||
.ctx_size_in = sizeof(args),
|
||||
);
|
||||
struct dummy_st_ops *skel;
|
||||
struct dummy_st_ops_success *skel;
|
||||
int fd, err;
|
||||
size_t i;
|
||||
char name[8];
|
||||
|
||||
skel = dummy_st_ops__open_and_load();
|
||||
skel = dummy_st_ops_success__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
|
||||
return;
|
||||
|
||||
@ -119,7 +120,28 @@ static void test_dummy_multiple_args(void)
|
||||
ASSERT_EQ(skel->bss->test_2_args[i], args[i], name);
|
||||
}
|
||||
|
||||
dummy_st_ops__destroy(skel);
|
||||
dummy_st_ops_success__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_dummy_sleepable(void)
|
||||
{
|
||||
__u64 args[1] = {0};
|
||||
LIBBPF_OPTS(bpf_test_run_opts, attr,
|
||||
.ctx_in = args,
|
||||
.ctx_size_in = sizeof(args),
|
||||
);
|
||||
struct dummy_st_ops_success *skel;
|
||||
int fd, err;
|
||||
|
||||
skel = dummy_st_ops_success__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
|
||||
return;
|
||||
|
||||
fd = bpf_program__fd(skel->progs.test_sleepable);
|
||||
err = bpf_prog_test_run_opts(fd, &attr);
|
||||
ASSERT_OK(err, "test_run");
|
||||
|
||||
dummy_st_ops_success__destroy(skel);
|
||||
}
|
||||
|
||||
void test_dummy_st_ops(void)
|
||||
@ -132,4 +154,8 @@ void test_dummy_st_ops(void)
|
||||
test_dummy_init_ptr_arg();
|
||||
if (test__start_subtest("dummy_multiple_args"))
|
||||
test_dummy_multiple_args();
|
||||
if (test__start_subtest("dummy_sleepable"))
|
||||
test_dummy_sleepable();
|
||||
|
||||
RUN_TESTS(dummy_st_ops_fail);
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ static struct {
|
||||
const char *expected_verifier_err_msg;
|
||||
int expected_runtime_err;
|
||||
} kfunc_dynptr_tests[] = {
|
||||
{"not_valid_dynptr", "Expected an initialized dynptr as arg #1", 0},
|
||||
{"not_valid_dynptr", "cannot pass in dynptr at an offset=-8", 0},
|
||||
{"not_ptr_to_stack", "arg#0 expected pointer to stack or dynptr_ptr", 0},
|
||||
{"dynptr_data_null", NULL, -EBADMSG},
|
||||
};
|
||||
|
@ -322,7 +322,7 @@ static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
|
||||
return strcmp((const char *) key1, (const char *) key2) == 0;
|
||||
}
|
||||
|
||||
static int get_syms(char ***symsp, size_t *cntp)
|
||||
static int get_syms(char ***symsp, size_t *cntp, bool kernel)
|
||||
{
|
||||
size_t cap = 0, cnt = 0, i;
|
||||
char *name = NULL, **syms = NULL;
|
||||
@ -349,8 +349,9 @@ static int get_syms(char ***symsp, size_t *cntp)
|
||||
}
|
||||
|
||||
while (fgets(buf, sizeof(buf), f)) {
|
||||
/* skip modules */
|
||||
if (strchr(buf, '['))
|
||||
if (kernel && strchr(buf, '['))
|
||||
continue;
|
||||
if (!kernel && !strchr(buf, '['))
|
||||
continue;
|
||||
|
||||
free(name);
|
||||
@ -404,7 +405,7 @@ error:
|
||||
return err;
|
||||
}
|
||||
|
||||
void serial_test_kprobe_multi_bench_attach(void)
|
||||
static void test_kprobe_multi_bench_attach(bool kernel)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
|
||||
struct kprobe_multi_empty *skel = NULL;
|
||||
@ -415,7 +416,7 @@ void serial_test_kprobe_multi_bench_attach(void)
|
||||
char **syms = NULL;
|
||||
size_t cnt = 0, i;
|
||||
|
||||
if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
|
||||
if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
|
||||
return;
|
||||
|
||||
skel = kprobe_multi_empty__open_and_load();
|
||||
@ -453,6 +454,14 @@ cleanup:
|
||||
}
|
||||
}
|
||||
|
||||
void serial_test_kprobe_multi_bench_attach(void)
|
||||
{
|
||||
if (test__start_subtest("kernel"))
|
||||
test_kprobe_multi_bench_attach(true);
|
||||
if (test__start_subtest("modules"))
|
||||
test_kprobe_multi_bench_attach(false);
|
||||
}
|
||||
|
||||
void test_kprobe_multi_test(void)
|
||||
{
|
||||
if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
|
||||
|
12
tools/testing/selftests/bpf/prog_tests/nested_trust.c
Normal file
12
tools/testing/selftests/bpf/prog_tests/nested_trust.c
Normal file
@ -0,0 +1,12 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include <test_progs.h>
|
||||
#include "nested_trust_failure.skel.h"
|
||||
#include "nested_trust_success.skel.h"
|
||||
|
||||
void test_nested_trust(void)
|
||||
{
|
||||
RUN_TESTS(nested_trust_success);
|
||||
RUN_TESTS(nested_trust_failure);
|
||||
}
|
@ -4,6 +4,7 @@
|
||||
#define _GNU_SOURCE
|
||||
#include <sched.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/tls.h>
|
||||
#include <net/if.h>
|
||||
|
||||
#include "test_progs.h"
|
||||
@ -83,6 +84,76 @@ static void test_udp(int family)
|
||||
ASSERT_EQ(bss->nr_binddev, 1, "nr_bind");
|
||||
}
|
||||
|
||||
static void test_ktls(int family)
|
||||
{
|
||||
struct tls12_crypto_info_aes_gcm_128 aes128;
|
||||
struct setget_sockopt__bss *bss = skel->bss;
|
||||
int cfd = -1, sfd = -1, fd = -1, ret;
|
||||
char buf;
|
||||
|
||||
memset(bss, 0, sizeof(*bss));
|
||||
|
||||
sfd = start_server(family, SOCK_STREAM,
|
||||
family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
|
||||
if (!ASSERT_GE(sfd, 0, "start_server"))
|
||||
return;
|
||||
fd = connect_to_fd(sfd, 0);
|
||||
if (!ASSERT_GE(fd, 0, "connect_to_fd"))
|
||||
goto err_out;
|
||||
|
||||
cfd = accept(sfd, NULL, 0);
|
||||
if (!ASSERT_GE(cfd, 0, "accept"))
|
||||
goto err_out;
|
||||
|
||||
close(sfd);
|
||||
sfd = -1;
|
||||
|
||||
/* Setup KTLS */
|
||||
ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
|
||||
if (!ASSERT_OK(ret, "setsockopt"))
|
||||
goto err_out;
|
||||
ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
|
||||
if (!ASSERT_OK(ret, "setsockopt"))
|
||||
goto err_out;
|
||||
|
||||
memset(&aes128, 0, sizeof(aes128));
|
||||
aes128.info.version = TLS_1_2_VERSION;
|
||||
aes128.info.cipher_type = TLS_CIPHER_AES_GCM_128;
|
||||
|
||||
ret = setsockopt(fd, SOL_TLS, TLS_TX, &aes128, sizeof(aes128));
|
||||
if (!ASSERT_OK(ret, "setsockopt"))
|
||||
goto err_out;
|
||||
|
||||
ret = setsockopt(cfd, SOL_TLS, TLS_RX, &aes128, sizeof(aes128));
|
||||
if (!ASSERT_OK(ret, "setsockopt"))
|
||||
goto err_out;
|
||||
|
||||
/* KTLS is enabled */
|
||||
|
||||
close(fd);
|
||||
/* At this point, the cfd socket is at the CLOSE_WAIT state
|
||||
* and still run TLS protocol. The test for
|
||||
* BPF_TCP_CLOSE_WAIT should be run at this point.
|
||||
*/
|
||||
ret = read(cfd, &buf, sizeof(buf));
|
||||
ASSERT_EQ(ret, 0, "read");
|
||||
close(cfd);
|
||||
|
||||
ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
|
||||
ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
|
||||
ASSERT_EQ(bss->nr_active, 1, "nr_active");
|
||||
ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
|
||||
ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
|
||||
ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
|
||||
ASSERT_EQ(bss->nr_fin_wait1, 1, "nr_fin_wait1");
|
||||
return;
|
||||
|
||||
err_out:
|
||||
close(fd);
|
||||
close(cfd);
|
||||
close(sfd);
|
||||
}
|
||||
|
||||
void test_setget_sockopt(void)
|
||||
{
|
||||
cg_fd = test__join_cgroup(CG_NAME);
|
||||
@ -118,6 +189,8 @@ void test_setget_sockopt(void)
|
||||
test_tcp(AF_INET);
|
||||
test_udp(AF_INET6);
|
||||
test_udp(AF_INET);
|
||||
test_ktls(AF_INET6);
|
||||
test_ktls(AF_INET);
|
||||
|
||||
done:
|
||||
setget_sockopt__destroy(skel);
|
||||
|
@ -9,9 +9,6 @@
|
||||
#include "task_kfunc_failure.skel.h"
|
||||
#include "task_kfunc_success.skel.h"
|
||||
|
||||
static size_t log_buf_sz = 1 << 20; /* 1 MB */
|
||||
static char obj_log_buf[1048576];
|
||||
|
||||
static struct task_kfunc_success *open_load_task_kfunc_skel(void)
|
||||
{
|
||||
struct task_kfunc_success *skel;
|
||||
@ -83,67 +80,6 @@ static const char * const success_tests[] = {
|
||||
"test_task_from_pid_invalid",
|
||||
};
|
||||
|
||||
static struct {
|
||||
const char *prog_name;
|
||||
const char *expected_err_msg;
|
||||
} failure_tests[] = {
|
||||
{"task_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
|
||||
{"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"},
|
||||
{"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
|
||||
{"task_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
|
||||
{"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"},
|
||||
{"task_kfunc_acquire_unreleased", "Unreleased reference"},
|
||||
{"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
|
||||
{"task_kfunc_get_non_kptr_acquired", "arg#0 expected pointer to map value"},
|
||||
{"task_kfunc_get_null", "arg#0 expected pointer to map value"},
|
||||
{"task_kfunc_xchg_unreleased", "Unreleased reference"},
|
||||
{"task_kfunc_get_unreleased", "Unreleased reference"},
|
||||
{"task_kfunc_release_untrusted", "arg#0 is untrusted_ptr_or_null_ expected ptr_ or socket"},
|
||||
{"task_kfunc_release_fp", "arg#0 pointer type STRUCT task_struct must point"},
|
||||
{"task_kfunc_release_null", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
|
||||
{"task_kfunc_release_unacquired", "release kernel function bpf_task_release expects"},
|
||||
{"task_kfunc_from_pid_no_null_check", "arg#0 is ptr_or_null_ expected ptr_ or socket"},
|
||||
{"task_kfunc_from_lsm_task_free", "reg type unsupported for arg#0 function"},
|
||||
};
|
||||
|
||||
static void verify_fail(const char *prog_name, const char *expected_err_msg)
|
||||
{
|
||||
LIBBPF_OPTS(bpf_object_open_opts, opts);
|
||||
struct task_kfunc_failure *skel;
|
||||
int err, i;
|
||||
|
||||
opts.kernel_log_buf = obj_log_buf;
|
||||
opts.kernel_log_size = log_buf_sz;
|
||||
opts.kernel_log_level = 1;
|
||||
|
||||
skel = task_kfunc_failure__open_opts(&opts);
|
||||
if (!ASSERT_OK_PTR(skel, "task_kfunc_failure__open_opts"))
|
||||
goto cleanup;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
|
||||
struct bpf_program *prog;
|
||||
const char *curr_name = failure_tests[i].prog_name;
|
||||
|
||||
prog = bpf_object__find_program_by_name(skel->obj, curr_name);
|
||||
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
|
||||
goto cleanup;
|
||||
|
||||
bpf_program__set_autoload(prog, !strcmp(curr_name, prog_name));
|
||||
}
|
||||
|
||||
err = task_kfunc_failure__load(skel);
|
||||
if (!ASSERT_ERR(err, "unexpected load success"))
|
||||
goto cleanup;
|
||||
|
||||
if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
|
||||
fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
|
||||
fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
task_kfunc_failure__destroy(skel);
|
||||
}
|
||||
|
||||
void test_task_kfunc(void)
|
||||
{
|
||||
int i;
|
||||
@ -155,10 +91,5 @@ void test_task_kfunc(void)
|
||||
run_success_test(success_tests[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
|
||||
if (!test__start_subtest(failure_tests[i].prog_name))
|
||||
continue;
|
||||
|
||||
verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
|
||||
}
|
||||
RUN_TESTS(task_kfunc_failure);
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright 2022 Sony Group Corporation */
|
||||
#define _GNU_SOURCE
|
||||
#include <fcntl.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <test_progs.h>
|
||||
#include "bpf_syscall_macro.skel.h"
|
||||
@ -13,6 +15,8 @@ void test_bpf_syscall_macro(void)
|
||||
unsigned long exp_arg3 = 13;
|
||||
unsigned long exp_arg4 = 14;
|
||||
unsigned long exp_arg5 = 15;
|
||||
loff_t off_in, off_out;
|
||||
ssize_t r;
|
||||
|
||||
/* check whether it can open program */
|
||||
skel = bpf_syscall_macro__open();
|
||||
@ -33,6 +37,7 @@ void test_bpf_syscall_macro(void)
|
||||
|
||||
/* check whether args of syscall are copied correctly */
|
||||
prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
|
||||
|
||||
#if defined(__aarch64__) || defined(__s390__)
|
||||
ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
|
||||
#else
|
||||
@ -68,6 +73,18 @@ void test_bpf_syscall_macro(void)
|
||||
ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
|
||||
ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
|
||||
|
||||
r = splice(-42, &off_in, 42, &off_out, 0x12340000, SPLICE_F_NONBLOCK);
|
||||
err = -errno;
|
||||
ASSERT_EQ(r, -1, "splice_res");
|
||||
ASSERT_EQ(err, -EBADF, "splice_err");
|
||||
|
||||
ASSERT_EQ(skel->bss->splice_fd_in, -42, "splice_arg1");
|
||||
ASSERT_EQ(skel->bss->splice_off_in, (__u64)&off_in, "splice_arg2");
|
||||
ASSERT_EQ(skel->bss->splice_fd_out, 42, "splice_arg3");
|
||||
ASSERT_EQ(skel->bss->splice_off_out, (__u64)&off_out, "splice_arg4");
|
||||
ASSERT_EQ(skel->bss->splice_len, 0x12340000, "splice_arg5");
|
||||
ASSERT_EQ(skel->bss->splice_flags, SPLICE_F_NONBLOCK, "splice_arg6");
|
||||
|
||||
cleanup:
|
||||
bpf_syscall_macro__destroy(skel);
|
||||
}
|
||||
|
@ -3,18 +3,21 @@
|
||||
|
||||
#include <test_progs.h>
|
||||
#include "test_uprobe_autoattach.skel.h"
|
||||
#include "progs/bpf_misc.h"
|
||||
|
||||
/* uprobe attach point */
|
||||
static noinline int autoattach_trigger_func(int arg)
|
||||
static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3,
|
||||
int arg4, int arg5, int arg6,
|
||||
int arg7, int arg8)
|
||||
{
|
||||
asm volatile ("");
|
||||
return arg + 1;
|
||||
return arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8 + 1;
|
||||
}
|
||||
|
||||
void test_uprobe_autoattach(void)
|
||||
{
|
||||
struct test_uprobe_autoattach *skel;
|
||||
int trigger_val = 100, trigger_ret;
|
||||
int trigger_ret;
|
||||
size_t malloc_sz = 1;
|
||||
char *mem;
|
||||
|
||||
@ -28,22 +31,42 @@ void test_uprobe_autoattach(void)
|
||||
skel->bss->test_pid = getpid();
|
||||
|
||||
/* trigger & validate uprobe & uretprobe */
|
||||
trigger_ret = autoattach_trigger_func(trigger_val);
|
||||
trigger_ret = autoattach_trigger_func(1, 2, 3, 4, 5, 6, 7, 8);
|
||||
|
||||
skel->bss->test_pid = getpid();
|
||||
|
||||
/* trigger & validate shared library u[ret]probes attached by name */
|
||||
mem = malloc(malloc_sz);
|
||||
|
||||
ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1");
|
||||
ASSERT_EQ(skel->bss->uprobe_byname_parm1, 1, "check_uprobe_byname_parm1");
|
||||
ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
|
||||
ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
|
||||
ASSERT_EQ(skel->bss->uretprobe_byname_ret, trigger_ret, "check_uretprobe_byname_ret");
|
||||
ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
|
||||
ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1");
|
||||
ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
|
||||
ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
|
||||
ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
|
||||
|
||||
ASSERT_EQ(skel->bss->a[0], 1, "arg1");
|
||||
ASSERT_EQ(skel->bss->a[1], 2, "arg2");
|
||||
ASSERT_EQ(skel->bss->a[2], 3, "arg3");
|
||||
#if FUNC_REG_ARG_CNT > 3
|
||||
ASSERT_EQ(skel->bss->a[3], 4, "arg4");
|
||||
#endif
|
||||
#if FUNC_REG_ARG_CNT > 4
|
||||
ASSERT_EQ(skel->bss->a[4], 5, "arg5");
|
||||
#endif
|
||||
#if FUNC_REG_ARG_CNT > 5
|
||||
ASSERT_EQ(skel->bss->a[5], 6, "arg6");
|
||||
#endif
|
||||
#if FUNC_REG_ARG_CNT > 6
|
||||
ASSERT_EQ(skel->bss->a[6], 7, "arg7");
|
||||
#endif
|
||||
#if FUNC_REG_ARG_CNT > 7
|
||||
ASSERT_EQ(skel->bss->a[7], 8, "arg8");
|
||||
#endif
|
||||
|
||||
free(mem);
|
||||
cleanup:
|
||||
test_uprobe_autoattach__destroy(skel);
|
||||
|
410
tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
Normal file
410
tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
Normal file
@ -0,0 +1,410 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <test_progs.h>
|
||||
#include <network_helpers.h>
|
||||
#include "xdp_metadata.skel.h"
|
||||
#include "xdp_metadata2.skel.h"
|
||||
#include "xdp_metadata.h"
|
||||
#include "xsk.h"
|
||||
|
||||
#include <bpf/btf.h>
|
||||
#include <linux/errqueue.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <linux/net_tstamp.h>
|
||||
#include <linux/udp.h>
|
||||
#include <sys/mman.h>
|
||||
#include <net/if.h>
|
||||
#include <poll.h>
|
||||
|
||||
#define TX_NAME "veTX"
|
||||
#define RX_NAME "veRX"
|
||||
|
||||
#define UDP_PAYLOAD_BYTES 4
|
||||
|
||||
#define AF_XDP_SOURCE_PORT 1234
|
||||
#define AF_XDP_CONSUMER_PORT 8080
|
||||
|
||||
#define UMEM_NUM 16
|
||||
#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
|
||||
#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
|
||||
#define XDP_FLAGS XDP_FLAGS_DRV_MODE
|
||||
#define QUEUE_ID 0
|
||||
|
||||
#define TX_ADDR "10.0.0.1"
|
||||
#define RX_ADDR "10.0.0.2"
|
||||
#define PREFIX_LEN "8"
|
||||
#define FAMILY AF_INET
|
||||
|
||||
#define SYS(cmd) ({ \
|
||||
if (!ASSERT_OK(system(cmd), (cmd))) \
|
||||
goto out; \
|
||||
})
|
||||
|
||||
struct xsk {
|
||||
void *umem_area;
|
||||
struct xsk_umem *umem;
|
||||
struct xsk_ring_prod fill;
|
||||
struct xsk_ring_cons comp;
|
||||
struct xsk_ring_prod tx;
|
||||
struct xsk_ring_cons rx;
|
||||
struct xsk_socket *socket;
|
||||
};
|
||||
|
||||
static int open_xsk(int ifindex, struct xsk *xsk)
|
||||
{
|
||||
int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
|
||||
const struct xsk_socket_config socket_config = {
|
||||
.rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
||||
.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
||||
.bind_flags = XDP_COPY,
|
||||
};
|
||||
const struct xsk_umem_config umem_config = {
|
||||
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
|
||||
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
|
||||
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
|
||||
.flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG,
|
||||
};
|
||||
__u32 idx;
|
||||
u64 addr;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
|
||||
if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
|
||||
return -1;
|
||||
|
||||
ret = xsk_umem__create(&xsk->umem,
|
||||
xsk->umem_area, UMEM_SIZE,
|
||||
&xsk->fill,
|
||||
&xsk->comp,
|
||||
&umem_config);
|
||||
if (!ASSERT_OK(ret, "xsk_umem__create"))
|
||||
return ret;
|
||||
|
||||
ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
|
||||
xsk->umem,
|
||||
&xsk->rx,
|
||||
&xsk->tx,
|
||||
&socket_config);
|
||||
if (!ASSERT_OK(ret, "xsk_socket__create"))
|
||||
return ret;
|
||||
|
||||
/* First half of umem is for TX. This way address matches 1-to-1
|
||||
* to the completion queue index.
|
||||
*/
|
||||
|
||||
for (i = 0; i < UMEM_NUM / 2; i++) {
|
||||
addr = i * UMEM_FRAME_SIZE;
|
||||
printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
|
||||
}
|
||||
|
||||
/* Second half of umem is for RX. */
|
||||
|
||||
ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
|
||||
if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
|
||||
return ret;
|
||||
if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
|
||||
return -1;
|
||||
|
||||
for (i = 0; i < UMEM_NUM / 2; i++) {
|
||||
addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
|
||||
printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
|
||||
*xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
|
||||
}
|
||||
xsk_ring_prod__submit(&xsk->fill, ret);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void close_xsk(struct xsk *xsk)
|
||||
{
|
||||
if (xsk->umem)
|
||||
xsk_umem__delete(xsk->umem);
|
||||
if (xsk->socket)
|
||||
xsk_socket__delete(xsk->socket);
|
||||
munmap(xsk->umem, UMEM_SIZE);
|
||||
}
|
||||
|
||||
static void ip_csum(struct iphdr *iph)
|
||||
{
|
||||
__u32 sum = 0;
|
||||
__u16 *p;
|
||||
int i;
|
||||
|
||||
iph->check = 0;
|
||||
p = (void *)iph;
|
||||
for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
|
||||
sum += p[i];
|
||||
|
||||
while (sum >> 16)
|
||||
sum = (sum & 0xffff) + (sum >> 16);
|
||||
|
||||
iph->check = ~sum;
|
||||
}
|
||||
|
||||
static int generate_packet(struct xsk *xsk, __u16 dst_port)
|
||||
{
|
||||
struct xdp_desc *tx_desc;
|
||||
struct udphdr *udph;
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
void *data;
|
||||
__u32 idx;
|
||||
int ret;
|
||||
|
||||
ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
|
||||
if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
|
||||
return -1;
|
||||
|
||||
tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
|
||||
tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE;
|
||||
printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
|
||||
data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
|
||||
|
||||
eth = data;
|
||||
iph = (void *)(eth + 1);
|
||||
udph = (void *)(iph + 1);
|
||||
|
||||
memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
|
||||
memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
|
||||
eth->h_proto = htons(ETH_P_IP);
|
||||
|
||||
iph->version = 0x4;
|
||||
iph->ihl = 0x5;
|
||||
iph->tos = 0x9;
|
||||
iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
|
||||
iph->id = 0;
|
||||
iph->frag_off = 0;
|
||||
iph->ttl = 0;
|
||||
iph->protocol = IPPROTO_UDP;
|
||||
ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
|
||||
ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
|
||||
ip_csum(iph);
|
||||
|
||||
udph->source = htons(AF_XDP_SOURCE_PORT);
|
||||
udph->dest = htons(dst_port);
|
||||
udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
|
||||
udph->check = 0;
|
||||
|
||||
memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
|
||||
|
||||
tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
|
||||
xsk_ring_prod__submit(&xsk->tx, 1);
|
||||
|
||||
ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
|
||||
if (!ASSERT_GE(ret, 0, "sendto"))
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void complete_tx(struct xsk *xsk)
|
||||
{
|
||||
__u32 idx;
|
||||
__u64 addr;
|
||||
|
||||
if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
|
||||
addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
|
||||
|
||||
printf("%p: refill idx=%u addr=%llx\n", xsk, idx, addr);
|
||||
*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
|
||||
xsk_ring_prod__submit(&xsk->fill, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void refill_rx(struct xsk *xsk, __u64 addr)
|
||||
{
|
||||
__u32 idx;
|
||||
|
||||
if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
|
||||
printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
|
||||
*xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
|
||||
xsk_ring_prod__submit(&xsk->fill, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static int verify_xsk_metadata(struct xsk *xsk)
|
||||
{
|
||||
const struct xdp_desc *rx_desc;
|
||||
struct pollfd fds = {};
|
||||
struct xdp_meta *meta;
|
||||
struct ethhdr *eth;
|
||||
struct iphdr *iph;
|
||||
__u64 comp_addr;
|
||||
void *data;
|
||||
__u64 addr;
|
||||
__u32 idx;
|
||||
int ret;
|
||||
|
||||
ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
|
||||
if (!ASSERT_EQ(ret, 0, "recvfrom"))
|
||||
return -1;
|
||||
|
||||
fds.fd = xsk_socket__fd(xsk->socket);
|
||||
fds.events = POLLIN;
|
||||
|
||||
ret = poll(&fds, 1, 1000);
|
||||
if (!ASSERT_GT(ret, 0, "poll"))
|
||||
return -1;
|
||||
|
||||
ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
|
||||
if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
|
||||
return -2;
|
||||
|
||||
rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
|
||||
comp_addr = xsk_umem__extract_addr(rx_desc->addr);
|
||||
addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
|
||||
printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
|
||||
xsk, idx, rx_desc->addr, addr, comp_addr);
|
||||
data = xsk_umem__get_data(xsk->umem_area, addr);
|
||||
|
||||
/* Make sure we got the packet offset correctly. */
|
||||
|
||||
eth = data;
|
||||
ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
|
||||
iph = (void *)(eth + 1);
|
||||
ASSERT_EQ((int)iph->version, 4, "iph->version");
|
||||
|
||||
/* custom metadata */
|
||||
|
||||
meta = data - sizeof(struct xdp_meta);
|
||||
|
||||
if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
|
||||
return -1;
|
||||
|
||||
if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
|
||||
return -1;
|
||||
|
||||
xsk_ring_cons__release(&xsk->rx, 1);
|
||||
refill_rx(xsk, comp_addr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void test_xdp_metadata(void)
|
||||
{
|
||||
struct xdp_metadata2 *bpf_obj2 = NULL;
|
||||
struct xdp_metadata *bpf_obj = NULL;
|
||||
struct bpf_program *new_prog, *prog;
|
||||
struct nstoken *tok = NULL;
|
||||
__u32 queue_id = QUEUE_ID;
|
||||
struct bpf_map *prog_arr;
|
||||
struct xsk tx_xsk = {};
|
||||
struct xsk rx_xsk = {};
|
||||
__u32 val, key = 0;
|
||||
int retries = 10;
|
||||
int rx_ifindex;
|
||||
int tx_ifindex;
|
||||
int sock_fd;
|
||||
int ret;
|
||||
|
||||
/* Setup new networking namespace, with a veth pair. */
|
||||
|
||||
SYS("ip netns add xdp_metadata");
|
||||
tok = open_netns("xdp_metadata");
|
||||
SYS("ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
|
||||
" type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
|
||||
SYS("ip link set dev " TX_NAME " address 00:00:00:00:00:01");
|
||||
SYS("ip link set dev " RX_NAME " address 00:00:00:00:00:02");
|
||||
SYS("ip link set dev " TX_NAME " up");
|
||||
SYS("ip link set dev " RX_NAME " up");
|
||||
SYS("ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
|
||||
SYS("ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
|
||||
|
||||
rx_ifindex = if_nametoindex(RX_NAME);
|
||||
tx_ifindex = if_nametoindex(TX_NAME);
|
||||
|
||||
/* Setup separate AF_XDP for TX and RX interfaces. */
|
||||
|
||||
ret = open_xsk(tx_ifindex, &tx_xsk);
|
||||
if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
|
||||
goto out;
|
||||
|
||||
ret = open_xsk(rx_ifindex, &rx_xsk);
|
||||
if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
|
||||
goto out;
|
||||
|
||||
bpf_obj = xdp_metadata__open();
|
||||
if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
|
||||
goto out;
|
||||
|
||||
prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
|
||||
bpf_program__set_ifindex(prog, rx_ifindex);
|
||||
bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
|
||||
|
||||
if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
|
||||
goto out;
|
||||
|
||||
/* Make sure we can't add dev-bound programs to prog maps. */
|
||||
prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
|
||||
if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
|
||||
goto out;
|
||||
|
||||
val = bpf_program__fd(prog);
|
||||
if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
|
||||
&val, sizeof(val), BPF_ANY),
|
||||
"update prog_arr"))
|
||||
goto out;
|
||||
|
||||
/* Attach BPF program to RX interface. */
|
||||
|
||||
ret = bpf_xdp_attach(rx_ifindex,
|
||||
bpf_program__fd(bpf_obj->progs.rx),
|
||||
XDP_FLAGS, NULL);
|
||||
if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
|
||||
goto out;
|
||||
|
||||
sock_fd = xsk_socket__fd(rx_xsk.socket);
|
||||
ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
|
||||
if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
|
||||
goto out;
|
||||
|
||||
/* Send packet destined to RX AF_XDP socket. */
|
||||
if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
|
||||
"generate AF_XDP_CONSUMER_PORT"))
|
||||
goto out;
|
||||
|
||||
/* Verify AF_XDP RX packet has proper metadata. */
|
||||
if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0,
|
||||
"verify_xsk_metadata"))
|
||||
goto out;
|
||||
|
||||
complete_tx(&tx_xsk);
|
||||
|
||||
/* Make sure freplace correctly picks up original bound device
|
||||
* and doesn't crash.
|
||||
*/
|
||||
|
||||
bpf_obj2 = xdp_metadata2__open();
|
||||
if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
|
||||
goto out;
|
||||
|
||||
new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
|
||||
bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
|
||||
|
||||
if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
|
||||
goto out;
|
||||
|
||||
if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
|
||||
goto out;
|
||||
|
||||
/* Send packet to trigger . */
|
||||
if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
|
||||
"generate freplace packet"))
|
||||
goto out;
|
||||
|
||||
while (!retries--) {
|
||||
if (bpf_obj2->bss->called)
|
||||
break;
|
||||
usleep(10);
|
||||
}
|
||||
ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
|
||||
|
||||
out:
|
||||
close_xsk(&rx_xsk);
|
||||
close_xsk(&tx_xsk);
|
||||
xdp_metadata2__destroy(bpf_obj2);
|
||||
xdp_metadata__destroy(bpf_obj);
|
||||
if (tok)
|
||||
close_netns(tok);
|
||||
system("ip netns del xdp_metadata");
|
||||
}
|
@ -7,6 +7,13 @@
|
||||
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
|
||||
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
|
||||
|
||||
/* Convenience macro for use with 'asm volatile' blocks */
|
||||
#define __naked __attribute__((naked))
|
||||
#define __clobber_all "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory"
|
||||
#define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory"
|
||||
#define __imm(name) [name]"i"(name)
|
||||
#define __imm_addr(name) [name]"i"(&name)
|
||||
|
||||
#if defined(__TARGET_ARCH_x86)
|
||||
#define SYSCALL_WRAPPER 1
|
||||
#define SYS_PREFIX "__x64_"
|
||||
@ -21,4 +28,29 @@
|
||||
#define SYS_PREFIX "__se_"
|
||||
#endif
|
||||
|
||||
/* How many arguments are passed to function in register */
|
||||
#if defined(__TARGET_ARCH_x86) || defined(__x86_64__)
|
||||
#define FUNC_REG_ARG_CNT 6
|
||||
#elif defined(__i386__)
|
||||
#define FUNC_REG_ARG_CNT 3
|
||||
#elif defined(__TARGET_ARCH_s390) || defined(__s390x__)
|
||||
#define FUNC_REG_ARG_CNT 5
|
||||
#elif defined(__TARGET_ARCH_arm) || defined(__arm__)
|
||||
#define FUNC_REG_ARG_CNT 4
|
||||
#elif defined(__TARGET_ARCH_arm64) || defined(__aarch64__)
|
||||
#define FUNC_REG_ARG_CNT 8
|
||||
#elif defined(__TARGET_ARCH_mips) || defined(__mips__)
|
||||
#define FUNC_REG_ARG_CNT 8
|
||||
#elif defined(__TARGET_ARCH_powerpc) || defined(__powerpc__) || defined(__powerpc64__)
|
||||
#define FUNC_REG_ARG_CNT 8
|
||||
#elif defined(__TARGET_ARCH_sparc) || defined(__sparc__)
|
||||
#define FUNC_REG_ARG_CNT 6
|
||||
#elif defined(__TARGET_ARCH_riscv) || defined(__riscv__)
|
||||
#define FUNC_REG_ARG_CNT 8
|
||||
#else
|
||||
/* default to 5 for others */
|
||||
#define FUNC_REG_ARG_CNT 5
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -81,4 +81,30 @@ int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2,
|
||||
return 0;
|
||||
}
|
||||
|
||||
__u64 splice_fd_in;
|
||||
__u64 splice_off_in;
|
||||
__u64 splice_fd_out;
|
||||
__u64 splice_off_out;
|
||||
__u64 splice_len;
|
||||
__u64 splice_flags;
|
||||
|
||||
SEC("ksyscall/splice")
|
||||
int BPF_KSYSCALL(splice_enter, int fd_in, loff_t *off_in, int fd_out,
|
||||
loff_t *off_out, size_t len, unsigned int flags)
|
||||
{
|
||||
pid_t pid = bpf_get_current_pid_tgid() >> 32;
|
||||
|
||||
if (pid != filter_pid)
|
||||
return 0;
|
||||
|
||||
splice_fd_in = fd_in;
|
||||
splice_off_in = (__u64)off_in;
|
||||
splice_fd_out = fd_out;
|
||||
splice_off_out = (__u64)off_out;
|
||||
splice_len = len;
|
||||
splice_flags = flags;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
114
tools/testing/selftests/bpf/progs/cpumask_common.h
Normal file
114
tools/testing/selftests/bpf/progs/cpumask_common.h
Normal file
@ -0,0 +1,114 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#ifndef _CPUMASK_COMMON_H
|
||||
#define _CPUMASK_COMMON_H
|
||||
|
||||
#include "errno.h"
|
||||
#include <stdbool.h>
|
||||
|
||||
int err;
|
||||
|
||||
struct __cpumask_map_value {
|
||||
struct bpf_cpumask __kptr_ref * cpumask;
|
||||
};
|
||||
|
||||
struct array_map {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct __cpumask_map_value);
|
||||
__uint(max_entries, 1);
|
||||
} __cpumask_map SEC(".maps");
|
||||
|
||||
struct bpf_cpumask *bpf_cpumask_create(void) __ksym;
|
||||
void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym;
|
||||
struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym;
|
||||
struct bpf_cpumask *bpf_cpumask_kptr_get(struct bpf_cpumask **cpumask) __ksym;
|
||||
u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym;
|
||||
u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
|
||||
void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
|
||||
void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
|
||||
bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym;
|
||||
bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
|
||||
bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym;
|
||||
void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym;
|
||||
void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym;
|
||||
bool bpf_cpumask_and(struct bpf_cpumask *cpumask,
|
||||
const struct cpumask *src1,
|
||||
const struct cpumask *src2) __ksym;
|
||||
void bpf_cpumask_or(struct bpf_cpumask *cpumask,
|
||||
const struct cpumask *src1,
|
||||
const struct cpumask *src2) __ksym;
|
||||
void bpf_cpumask_xor(struct bpf_cpumask *cpumask,
|
||||
const struct cpumask *src1,
|
||||
const struct cpumask *src2) __ksym;
|
||||
bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym;
|
||||
bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym;
|
||||
bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym;
|
||||
bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym;
|
||||
bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym;
|
||||
void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym;
|
||||
u32 bpf_cpumask_any(const struct cpumask *src) __ksym;
|
||||
u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym;
|
||||
|
||||
static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
|
||||
{
|
||||
return (const struct cpumask *)cpumask;
|
||||
}
|
||||
|
||||
static inline struct bpf_cpumask *create_cpumask(void)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
cpumask = bpf_cpumask_create();
|
||||
if (!cpumask) {
|
||||
err = 1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!bpf_cpumask_empty(cast(cpumask))) {
|
||||
err = 2;
|
||||
bpf_cpumask_release(cpumask);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return cpumask;
|
||||
}
|
||||
|
||||
static inline struct __cpumask_map_value *cpumask_map_value_lookup(void)
|
||||
{
|
||||
u32 key = 0;
|
||||
|
||||
return bpf_map_lookup_elem(&__cpumask_map, &key);
|
||||
}
|
||||
|
||||
static inline int cpumask_map_insert(struct bpf_cpumask *mask)
|
||||
{
|
||||
struct __cpumask_map_value local, *v;
|
||||
long status;
|
||||
struct bpf_cpumask *old;
|
||||
u32 key = 0;
|
||||
|
||||
local.cpumask = NULL;
|
||||
status = bpf_map_update_elem(&__cpumask_map, &key, &local, 0);
|
||||
if (status) {
|
||||
bpf_cpumask_release(mask);
|
||||
return status;
|
||||
}
|
||||
|
||||
v = bpf_map_lookup_elem(&__cpumask_map, &key);
|
||||
if (!v) {
|
||||
bpf_cpumask_release(mask);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
old = bpf_kptr_xchg(&v->cpumask, mask);
|
||||
if (old) {
|
||||
bpf_cpumask_release(old);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif /* _CPUMASK_COMMON_H */
|
126
tools/testing/selftests/bpf/progs/cpumask_failure.c
Normal file
126
tools/testing/selftests/bpf/progs/cpumask_failure.c
Normal file
@ -0,0 +1,126 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include "bpf_misc.h"
|
||||
|
||||
#include "cpumask_common.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
/* Prototype for all of the program trace events below:
|
||||
*
|
||||
* TRACE_EVENT(task_newtask,
|
||||
* TP_PROTO(struct task_struct *p, u64 clone_flags)
|
||||
*/
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
__failure __msg("Unreleased reference")
|
||||
int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
|
||||
/* cpumask is never released. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
__failure __msg("NULL pointer passed to trusted arg0")
|
||||
int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
|
||||
/* cpumask is released twice. */
|
||||
bpf_cpumask_release(cpumask);
|
||||
bpf_cpumask_release(cpumask);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
__failure __msg("bpf_cpumask_acquire args#0 expected pointer to STRUCT bpf_cpumask")
|
||||
int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
/* Can't acquire a non-struct bpf_cpumask. */
|
||||
cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask")
|
||||
int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
/* Can't set the CPU of a non-struct bpf_cpumask. */
|
||||
bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
__failure __msg("Unreleased reference")
|
||||
int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
struct __cpumask_map_value *v;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
if (cpumask_map_insert(cpumask))
|
||||
return 0;
|
||||
|
||||
v = cpumask_map_value_lookup();
|
||||
if (!v)
|
||||
return 0;
|
||||
|
||||
cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
|
||||
|
||||
/* cpumask is never released. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
__failure __msg("Unreleased reference")
|
||||
int BPF_PROG(test_kptr_get_no_release, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
struct __cpumask_map_value *v;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
if (cpumask_map_insert(cpumask))
|
||||
return 0;
|
||||
|
||||
v = cpumask_map_value_lookup();
|
||||
if (!v)
|
||||
return 0;
|
||||
|
||||
cpumask = bpf_cpumask_kptr_get(&v->cpumask);
|
||||
|
||||
/* cpumask is never released. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
__failure __msg("NULL pointer passed to trusted arg0")
|
||||
int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
/* NULL passed to KF_TRUSTED_ARGS kfunc. */
|
||||
bpf_cpumask_empty(NULL);
|
||||
|
||||
return 0;
|
||||
}
|
426
tools/testing/selftests/bpf/progs/cpumask_success.c
Normal file
426
tools/testing/selftests/bpf/progs/cpumask_success.c
Normal file
@ -0,0 +1,426 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include <vmlinux.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
|
||||
#include "cpumask_common.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
int pid, nr_cpus;
|
||||
|
||||
static bool is_test_task(void)
|
||||
{
|
||||
int cur_pid = bpf_get_current_pid_tgid() >> 32;
|
||||
|
||||
return pid == cur_pid;
|
||||
}
|
||||
|
||||
static bool create_cpumask_set(struct bpf_cpumask **out1,
|
||||
struct bpf_cpumask **out2,
|
||||
struct bpf_cpumask **out3,
|
||||
struct bpf_cpumask **out4)
|
||||
{
|
||||
struct bpf_cpumask *mask1, *mask2, *mask3, *mask4;
|
||||
|
||||
mask1 = create_cpumask();
|
||||
if (!mask1)
|
||||
return false;
|
||||
|
||||
mask2 = create_cpumask();
|
||||
if (!mask2) {
|
||||
bpf_cpumask_release(mask1);
|
||||
err = 3;
|
||||
return false;
|
||||
}
|
||||
|
||||
mask3 = create_cpumask();
|
||||
if (!mask3) {
|
||||
bpf_cpumask_release(mask1);
|
||||
bpf_cpumask_release(mask2);
|
||||
err = 4;
|
||||
return false;
|
||||
}
|
||||
|
||||
mask4 = create_cpumask();
|
||||
if (!mask4) {
|
||||
bpf_cpumask_release(mask1);
|
||||
bpf_cpumask_release(mask2);
|
||||
bpf_cpumask_release(mask3);
|
||||
err = 5;
|
||||
return false;
|
||||
}
|
||||
|
||||
*out1 = mask1;
|
||||
*out2 = mask2;
|
||||
*out3 = mask3;
|
||||
*out4 = mask4;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_alloc_free_cpumask, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
bpf_cpumask_release(cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_set_clear_cpu, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
bpf_cpumask_set_cpu(0, cpumask);
|
||||
if (!bpf_cpumask_test_cpu(0, cast(cpumask))) {
|
||||
err = 3;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
bpf_cpumask_clear_cpu(0, cpumask);
|
||||
if (bpf_cpumask_test_cpu(0, cast(cpumask))) {
|
||||
err = 4;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_setall_clear_cpu, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
bpf_cpumask_setall(cpumask);
|
||||
if (!bpf_cpumask_full(cast(cpumask))) {
|
||||
err = 3;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
bpf_cpumask_clear(cpumask);
|
||||
if (!bpf_cpumask_empty(cast(cpumask))) {
|
||||
err = 4;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_first_firstzero_cpu, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
if (bpf_cpumask_first(cast(cpumask)) < nr_cpus) {
|
||||
err = 3;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
if (bpf_cpumask_first_zero(cast(cpumask)) != 0) {
|
||||
bpf_printk("first zero: %d", bpf_cpumask_first_zero(cast(cpumask)));
|
||||
err = 4;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
bpf_cpumask_set_cpu(0, cpumask);
|
||||
if (bpf_cpumask_first(cast(cpumask)) != 0) {
|
||||
err = 5;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
if (bpf_cpumask_first_zero(cast(cpumask)) != 1) {
|
||||
err = 6;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
if (bpf_cpumask_test_and_set_cpu(0, cpumask)) {
|
||||
err = 3;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
if (!bpf_cpumask_test_and_set_cpu(0, cpumask)) {
|
||||
err = 4;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
if (!bpf_cpumask_test_and_clear_cpu(0, cpumask)) {
|
||||
err = 5;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(cpumask);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_and_or_xor, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
|
||||
return 0;
|
||||
|
||||
bpf_cpumask_set_cpu(0, mask1);
|
||||
bpf_cpumask_set_cpu(1, mask2);
|
||||
|
||||
if (bpf_cpumask_and(dst1, cast(mask1), cast(mask2))) {
|
||||
err = 6;
|
||||
goto release_exit;
|
||||
}
|
||||
if (!bpf_cpumask_empty(cast(dst1))) {
|
||||
err = 7;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
|
||||
if (!bpf_cpumask_test_cpu(0, cast(dst1))) {
|
||||
err = 8;
|
||||
goto release_exit;
|
||||
}
|
||||
if (!bpf_cpumask_test_cpu(1, cast(dst1))) {
|
||||
err = 9;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
bpf_cpumask_xor(dst2, cast(mask1), cast(mask2));
|
||||
if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
|
||||
err = 10;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(mask1);
|
||||
bpf_cpumask_release(mask2);
|
||||
bpf_cpumask_release(dst1);
|
||||
bpf_cpumask_release(dst2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_intersects_subset, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
|
||||
return 0;
|
||||
|
||||
bpf_cpumask_set_cpu(0, mask1);
|
||||
bpf_cpumask_set_cpu(1, mask2);
|
||||
if (bpf_cpumask_intersects(cast(mask1), cast(mask2))) {
|
||||
err = 6;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
|
||||
if (!bpf_cpumask_subset(cast(mask1), cast(dst1))) {
|
||||
err = 7;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
if (!bpf_cpumask_subset(cast(mask2), cast(dst1))) {
|
||||
err = 8;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
if (bpf_cpumask_subset(cast(dst1), cast(mask1))) {
|
||||
err = 9;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(mask1);
|
||||
bpf_cpumask_release(mask2);
|
||||
bpf_cpumask_release(dst1);
|
||||
bpf_cpumask_release(dst2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
|
||||
u32 cpu;
|
||||
|
||||
if (!is_test_task())
|
||||
return 0;
|
||||
|
||||
if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
|
||||
return 0;
|
||||
|
||||
bpf_cpumask_set_cpu(0, mask1);
|
||||
bpf_cpumask_set_cpu(1, mask2);
|
||||
bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
|
||||
|
||||
cpu = bpf_cpumask_any(cast(mask1));
|
||||
if (cpu != 0) {
|
||||
err = 6;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
cpu = bpf_cpumask_any(cast(dst2));
|
||||
if (cpu < nr_cpus) {
|
||||
err = 7;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
bpf_cpumask_copy(dst2, cast(dst1));
|
||||
if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
|
||||
err = 8;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
cpu = bpf_cpumask_any(cast(dst2));
|
||||
if (cpu > 1) {
|
||||
err = 9;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
cpu = bpf_cpumask_any_and(cast(mask1), cast(mask2));
|
||||
if (cpu < nr_cpus) {
|
||||
err = 10;
|
||||
goto release_exit;
|
||||
}
|
||||
|
||||
release_exit:
|
||||
bpf_cpumask_release(mask1);
|
||||
bpf_cpumask_release(mask2);
|
||||
bpf_cpumask_release(dst1);
|
||||
bpf_cpumask_release(dst2);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
struct __cpumask_map_value *v;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
if (cpumask_map_insert(cpumask))
|
||||
err = 3;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
struct __cpumask_map_value *v;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
if (cpumask_map_insert(cpumask)) {
|
||||
err = 3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
v = cpumask_map_value_lookup();
|
||||
if (!v) {
|
||||
err = 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
|
||||
if (cpumask)
|
||||
bpf_cpumask_release(cpumask);
|
||||
else
|
||||
err = 5;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("tp_btf/task_newtask")
|
||||
int BPF_PROG(test_insert_kptr_get_release, struct task_struct *task, u64 clone_flags)
|
||||
{
|
||||
struct bpf_cpumask *cpumask;
|
||||
struct __cpumask_map_value *v;
|
||||
|
||||
cpumask = create_cpumask();
|
||||
if (!cpumask)
|
||||
return 0;
|
||||
|
||||
if (cpumask_map_insert(cpumask)) {
|
||||
err = 3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
v = cpumask_map_value_lookup();
|
||||
if (!v) {
|
||||
err = 4;
|
||||
return 0;
|
||||
}
|
||||
|
||||
cpumask = bpf_cpumask_kptr_get(&v->cpumask);
|
||||
if (cpumask)
|
||||
bpf_cpumask_release(cpumask);
|
||||
else
|
||||
err = 5;
|
||||
|
||||
return 0;
|
||||
}
|
27
tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
Normal file
27
tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
Normal file
@ -0,0 +1,27 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
#include "bpf_misc.h"
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
SEC("struct_ops.s/test_2")
|
||||
__failure __msg("attach to unsupported member test_2 of struct bpf_dummy_ops")
|
||||
int BPF_PROG(test_unsupported_field_sleepable,
|
||||
struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
|
||||
char a3, unsigned long a4)
|
||||
{
|
||||
/* Tries to mark an unsleepable field in struct bpf_dummy_ops as sleepable. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC(".struct_ops")
|
||||
struct bpf_dummy_ops dummy_1 = {
|
||||
.test_1 = NULL,
|
||||
.test_2 = (void *)test_unsupported_field_sleepable,
|
||||
.test_sleepable = (void *)NULL,
|
||||
};
|
@ -1,19 +1,9 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
|
||||
#include <linux/bpf.h>
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
|
||||
struct bpf_dummy_ops_state {
|
||||
int val;
|
||||
} __attribute__((preserve_access_index));
|
||||
|
||||
struct bpf_dummy_ops {
|
||||
int (*test_1)(struct bpf_dummy_ops_state *state);
|
||||
int (*test_2)(struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
|
||||
char a3, unsigned long a4);
|
||||
};
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
||||
SEC("struct_ops/test_1")
|
||||
@ -43,8 +33,15 @@ int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("struct_ops.s/test_sleepable")
|
||||
int BPF_PROG(test_sleepable, struct bpf_dummy_ops_state *state)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC(".struct_ops")
|
||||
struct bpf_dummy_ops dummy_1 = {
|
||||
.test_1 = (void *)test_1,
|
||||
.test_2 = (void *)test_2,
|
||||
.test_sleepable = (void *)test_sleepable,
|
||||
};
|
@ -35,6 +35,13 @@ struct {
|
||||
__type(value, __u32);
|
||||
} array_map3 SEC(".maps");
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__uint(max_entries, 1);
|
||||
__type(key, __u32);
|
||||
__type(value, __u64);
|
||||
} array_map4 SEC(".maps");
|
||||
|
||||
struct sample {
|
||||
int pid;
|
||||
long value;
|
||||
@ -67,7 +74,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr)
|
||||
* bpf_ringbuf_submit/discard_dynptr call
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Unreleased reference id=1")
|
||||
__failure __msg("Unreleased reference id=2")
|
||||
int ringbuf_missing_release1(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -80,7 +87,7 @@ int ringbuf_missing_release1(void *ctx)
|
||||
}
|
||||
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Unreleased reference id=2")
|
||||
__failure __msg("Unreleased reference id=4")
|
||||
int ringbuf_missing_release2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr1, ptr2;
|
||||
@ -382,7 +389,7 @@ int invalid_helper1(void *ctx)
|
||||
|
||||
/* A dynptr can't be passed into a helper function at a non-zero offset */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #3")
|
||||
__failure __msg("cannot pass in dynptr at an offset=-8")
|
||||
int invalid_helper2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -420,7 +427,7 @@ int invalid_write1(void *ctx)
|
||||
* offset
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #3")
|
||||
__failure __msg("cannot overwrite referenced dynptr")
|
||||
int invalid_write2(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -444,7 +451,7 @@ int invalid_write2(void *ctx)
|
||||
* non-const offset
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("Expected an initialized dynptr as arg #1")
|
||||
__failure __msg("cannot overwrite referenced dynptr")
|
||||
int invalid_write3(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -476,7 +483,7 @@ static int invalid_write4_callback(__u32 index, void *data)
|
||||
* be invalidated as a dynptr
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("arg 1 is an unacquired reference")
|
||||
__failure __msg("cannot overwrite referenced dynptr")
|
||||
int invalid_write4(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -584,7 +591,7 @@ int invalid_read4(void *ctx)
|
||||
|
||||
/* Initializing a dynptr on an offset should fail */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid write to stack")
|
||||
__failure __msg("cannot pass in dynptr at an offset=0")
|
||||
int invalid_offset(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
@ -653,3 +660,435 @@ int dynptr_from_mem_invalid_api(void *ctx)
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
|
||||
int dynptr_pruning_overwrite(struct __sk_buff *ctx)
|
||||
{
|
||||
asm volatile (
|
||||
"r9 = 0xeB9F; \
|
||||
r6 = %[ringbuf] ll; \
|
||||
r1 = r6; \
|
||||
r2 = 8; \
|
||||
r3 = 0; \
|
||||
r4 = r10; \
|
||||
r4 += -16; \
|
||||
call %[bpf_ringbuf_reserve_dynptr]; \
|
||||
if r0 == 0 goto pjmp1; \
|
||||
goto pjmp2; \
|
||||
pjmp1: \
|
||||
*(u64 *)(r10 - 16) = r9; \
|
||||
pjmp2: \
|
||||
r1 = r10; \
|
||||
r1 += -16; \
|
||||
r2 = 0; \
|
||||
call %[bpf_ringbuf_discard_dynptr]; "
|
||||
:
|
||||
: __imm(bpf_ringbuf_reserve_dynptr),
|
||||
__imm(bpf_ringbuf_discard_dynptr),
|
||||
__imm_addr(ringbuf)
|
||||
: __clobber_all
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__success __msg("12: safe") __log_level(2)
|
||||
int dynptr_pruning_stacksafe(struct __sk_buff *ctx)
|
||||
{
|
||||
asm volatile (
|
||||
"r9 = 0xeB9F; \
|
||||
r6 = %[ringbuf] ll; \
|
||||
r1 = r6; \
|
||||
r2 = 8; \
|
||||
r3 = 0; \
|
||||
r4 = r10; \
|
||||
r4 += -16; \
|
||||
call %[bpf_ringbuf_reserve_dynptr]; \
|
||||
if r0 == 0 goto stjmp1; \
|
||||
goto stjmp2; \
|
||||
stjmp1: \
|
||||
r9 = r9; \
|
||||
stjmp2: \
|
||||
r1 = r10; \
|
||||
r1 += -16; \
|
||||
r2 = 0; \
|
||||
call %[bpf_ringbuf_discard_dynptr]; "
|
||||
:
|
||||
: __imm(bpf_ringbuf_reserve_dynptr),
|
||||
__imm(bpf_ringbuf_discard_dynptr),
|
||||
__imm_addr(ringbuf)
|
||||
: __clobber_all
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
|
||||
int dynptr_pruning_type_confusion(struct __sk_buff *ctx)
|
||||
{
|
||||
asm volatile (
|
||||
"r6 = %[array_map4] ll; \
|
||||
r7 = %[ringbuf] ll; \
|
||||
r1 = r6; \
|
||||
r2 = r10; \
|
||||
r2 += -8; \
|
||||
r9 = 0; \
|
||||
*(u64 *)(r2 + 0) = r9; \
|
||||
r3 = r10; \
|
||||
r3 += -24; \
|
||||
r9 = 0xeB9FeB9F; \
|
||||
*(u64 *)(r10 - 16) = r9; \
|
||||
*(u64 *)(r10 - 24) = r9; \
|
||||
r9 = 0; \
|
||||
r4 = 0; \
|
||||
r8 = r2; \
|
||||
call %[bpf_map_update_elem]; \
|
||||
r1 = r6; \
|
||||
r2 = r8; \
|
||||
call %[bpf_map_lookup_elem]; \
|
||||
if r0 != 0 goto tjmp1; \
|
||||
exit; \
|
||||
tjmp1: \
|
||||
r8 = r0; \
|
||||
r1 = r7; \
|
||||
r2 = 8; \
|
||||
r3 = 0; \
|
||||
r4 = r10; \
|
||||
r4 += -16; \
|
||||
r0 = *(u64 *)(r0 + 0); \
|
||||
call %[bpf_ringbuf_reserve_dynptr]; \
|
||||
if r0 == 0 goto tjmp2; \
|
||||
r8 = r8; \
|
||||
r8 = r8; \
|
||||
r8 = r8; \
|
||||
r8 = r8; \
|
||||
r8 = r8; \
|
||||
r8 = r8; \
|
||||
r8 = r8; \
|
||||
goto tjmp3; \
|
||||
tjmp2: \
|
||||
*(u64 *)(r10 - 8) = r9; \
|
||||
*(u64 *)(r10 - 16) = r9; \
|
||||
r1 = r8; \
|
||||
r1 += 8; \
|
||||
r2 = 0; \
|
||||
r3 = 0; \
|
||||
r4 = r10; \
|
||||
r4 += -16; \
|
||||
call %[bpf_dynptr_from_mem]; \
|
||||
tjmp3: \
|
||||
r1 = r10; \
|
||||
r1 += -16; \
|
||||
r2 = 0; \
|
||||
call %[bpf_ringbuf_discard_dynptr]; "
|
||||
:
|
||||
: __imm(bpf_map_update_elem),
|
||||
__imm(bpf_map_lookup_elem),
|
||||
__imm(bpf_ringbuf_reserve_dynptr),
|
||||
__imm(bpf_dynptr_from_mem),
|
||||
__imm(bpf_ringbuf_discard_dynptr),
|
||||
__imm_addr(array_map4),
|
||||
__imm_addr(ringbuf)
|
||||
: __clobber_all
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("dynptr has to be at a constant offset") __log_level(2)
|
||||
int dynptr_var_off_overwrite(struct __sk_buff *ctx)
|
||||
{
|
||||
asm volatile (
|
||||
"r9 = 16; \
|
||||
*(u32 *)(r10 - 4) = r9; \
|
||||
r8 = *(u32 *)(r10 - 4); \
|
||||
if r8 >= 0 goto vjmp1; \
|
||||
r0 = 1; \
|
||||
exit; \
|
||||
vjmp1: \
|
||||
if r8 <= 16 goto vjmp2; \
|
||||
r0 = 1; \
|
||||
exit; \
|
||||
vjmp2: \
|
||||
r8 &= 16; \
|
||||
r1 = %[ringbuf] ll; \
|
||||
r2 = 8; \
|
||||
r3 = 0; \
|
||||
r4 = r10; \
|
||||
r4 += -32; \
|
||||
r4 += r8; \
|
||||
call %[bpf_ringbuf_reserve_dynptr]; \
|
||||
r9 = 0xeB9F; \
|
||||
*(u64 *)(r10 - 16) = r9; \
|
||||
r1 = r10; \
|
||||
r1 += -32; \
|
||||
r1 += r8; \
|
||||
r2 = 0; \
|
||||
call %[bpf_ringbuf_discard_dynptr]; "
|
||||
:
|
||||
: __imm(bpf_ringbuf_reserve_dynptr),
|
||||
__imm(bpf_ringbuf_discard_dynptr),
|
||||
__imm_addr(ringbuf)
|
||||
: __clobber_all
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("?tc")
|
||||
__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
|
||||
int dynptr_partial_slot_invalidate(struct __sk_buff *ctx)
|
||||
{
|
||||
asm volatile (
|
||||
"r6 = %[ringbuf] ll; \
|
||||
r7 = %[array_map4] ll; \
|
||||
r1 = r7; \
|
||||
r2 = r10; \
|
||||
r2 += -8; \
|
||||
r9 = 0; \
|
||||
*(u64 *)(r2 + 0) = r9; \
|
||||
r3 = r2; \
|
||||
r4 = 0; \
|
||||
r8 = r2; \
|
||||
call %[bpf_map_update_elem]; \
|
||||
r1 = r7; \
|
||||
r2 = r8; \
|
||||
call %[bpf_map_lookup_elem]; \
|
||||
if r0 != 0 goto sjmp1; \
|
||||
exit; \
|
||||
sjmp1: \
|
||||
r7 = r0; \
|
||||
r1 = r6; \
|
||||
r2 = 8; \
|
||||
r3 = 0; \
|
||||
r4 = r10; \
|
||||
r4 += -24; \
|
||||
call %[bpf_ringbuf_reserve_dynptr]; \
|
||||
*(u64 *)(r10 - 16) = r9; \
|
||||
r1 = r7; \
|
||||
r2 = 8; \
|
||||
r3 = 0; \
|
||||
r4 = r10; \
|
||||
r4 += -16; \
|
||||
call %[bpf_dynptr_from_mem]; \
|
||||
r1 = r10; \
|
||||
r1 += -512; \
|
||||
r2 = 488; \
|
||||
r3 = r10; \
|
||||
r3 += -24; \
|
||||
r4 = 0; \
|
||||
r5 = 0; \
|
||||
call %[bpf_dynptr_read]; \
|
||||
r8 = 1; \
|
||||
if r0 != 0 goto sjmp2; \
|
||||
r8 = 0; \
|
||||
sjmp2: \
|
||||
r1 = r10; \
|
||||
r1 += -24; \
|
||||
r2 = 0; \
|
||||
call %[bpf_ringbuf_discard_dynptr]; "
|
||||
:
|
||||
: __imm(bpf_map_update_elem),
|
||||
__imm(bpf_map_lookup_elem),
|
||||
__imm(bpf_ringbuf_reserve_dynptr),
|
||||
__imm(bpf_ringbuf_discard_dynptr),
|
||||
__imm(bpf_dynptr_from_mem),
|
||||
__imm(bpf_dynptr_read),
|
||||
__imm_addr(ringbuf),
|
||||
__imm_addr(array_map4)
|
||||
: __clobber_all
|
||||
);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Test that it is allowed to overwrite unreferenced dynptr. */
|
||||
SEC("?raw_tp")
|
||||
__success
|
||||
int dynptr_overwrite_unref(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
return 0;
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
return 0;
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
return 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Test that slices are invalidated on reinitializing a dynptr. */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid mem access 'scalar'")
|
||||
int dynptr_invalidate_slice_reinit(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
__u8 *p;
|
||||
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
return 0;
|
||||
p = bpf_dynptr_data(&ptr, 0, 1);
|
||||
if (!p)
|
||||
return 0;
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
return 0;
|
||||
/* this should fail */
|
||||
return *p;
|
||||
}
|
||||
|
||||
/* Invalidation of dynptr slices on destruction of dynptr should not miss
|
||||
* mem_or_null pointers.
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("R1 type=scalar expected=percpu_ptr_")
|
||||
int dynptr_invalidate_slice_or_null(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
__u8 *p;
|
||||
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
return 0;
|
||||
|
||||
p = bpf_dynptr_data(&ptr, 0, 1);
|
||||
*(__u8 *)&ptr = 0;
|
||||
/* this should fail */
|
||||
bpf_this_cpu_ptr(p);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Destruction of dynptr should also any slices obtained from it */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("R7 invalid mem access 'scalar'")
|
||||
int dynptr_invalidate_slice_failure(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr1;
|
||||
struct bpf_dynptr ptr2;
|
||||
__u8 *p1, *p2;
|
||||
|
||||
if (get_map_val_dynptr(&ptr1))
|
||||
return 0;
|
||||
if (get_map_val_dynptr(&ptr2))
|
||||
return 0;
|
||||
|
||||
p1 = bpf_dynptr_data(&ptr1, 0, 1);
|
||||
if (!p1)
|
||||
return 0;
|
||||
p2 = bpf_dynptr_data(&ptr2, 0, 1);
|
||||
if (!p2)
|
||||
return 0;
|
||||
|
||||
*(__u8 *)&ptr1 = 0;
|
||||
/* this should fail */
|
||||
return *p1;
|
||||
}
|
||||
|
||||
/* Invalidation of slices should be scoped and should not prevent dereferencing
|
||||
* slices of another dynptr after destroying unrelated dynptr
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__success
|
||||
int dynptr_invalidate_slice_success(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr1;
|
||||
struct bpf_dynptr ptr2;
|
||||
__u8 *p1, *p2;
|
||||
|
||||
if (get_map_val_dynptr(&ptr1))
|
||||
return 1;
|
||||
if (get_map_val_dynptr(&ptr2))
|
||||
return 1;
|
||||
|
||||
p1 = bpf_dynptr_data(&ptr1, 0, 1);
|
||||
if (!p1)
|
||||
return 1;
|
||||
p2 = bpf_dynptr_data(&ptr2, 0, 1);
|
||||
if (!p2)
|
||||
return 1;
|
||||
|
||||
*(__u8 *)&ptr1 = 0;
|
||||
return *p2;
|
||||
}
|
||||
|
||||
/* Overwriting referenced dynptr should be rejected */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("cannot overwrite referenced dynptr")
|
||||
int dynptr_overwrite_ref(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
|
||||
bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
|
||||
/* this should fail */
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
bpf_ringbuf_discard_dynptr(&ptr, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reject writes to dynptr slot from bpf_dynptr_read */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("potential write to dynptr at off=-16")
|
||||
int dynptr_read_into_slot(void *ctx)
|
||||
{
|
||||
union {
|
||||
struct {
|
||||
char _pad[48];
|
||||
struct bpf_dynptr ptr;
|
||||
};
|
||||
char buf[64];
|
||||
} data;
|
||||
|
||||
bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &data.ptr);
|
||||
/* this should fail */
|
||||
bpf_dynptr_read(data.buf, sizeof(data.buf), &data.ptr, 0, 0);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reject writes to dynptr slot for uninit arg */
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("potential write to dynptr at off=-16")
|
||||
int uninit_write_into_slot(void *ctx)
|
||||
{
|
||||
struct {
|
||||
char buf[64];
|
||||
struct bpf_dynptr ptr;
|
||||
} data;
|
||||
|
||||
bpf_ringbuf_reserve_dynptr(&ringbuf, 80, 0, &data.ptr);
|
||||
/* this should fail */
|
||||
bpf_get_current_comm(data.buf, 80);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int callback(__u32 index, void *data)
|
||||
{
|
||||
*(__u32 *)data = 123;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* If the dynptr is written into in a callback function, its data
|
||||
* slices should be invalidated as well.
|
||||
*/
|
||||
SEC("?raw_tp")
|
||||
__failure __msg("invalid mem access 'scalar'")
|
||||
int invalid_data_slices(void *ctx)
|
||||
{
|
||||
struct bpf_dynptr ptr;
|
||||
__u32 *slice;
|
||||
|
||||
if (get_map_val_dynptr(&ptr))
|
||||
return 0;
|
||||
|
||||
slice = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
|
||||
if (!slice)
|
||||
return 0;
|
||||
|
||||
bpf_loop(10, callback, &ptr, 0);
|
||||
|
||||
/* this should fail */
|
||||
*slice = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
12
tools/testing/selftests/bpf/progs/nested_trust_common.h
Normal file
12
tools/testing/selftests/bpf/progs/nested_trust_common.h
Normal file
@ -0,0 +1,12 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
|
||||
|
||||
#ifndef _NESTED_TRUST_COMMON_H
|
||||
#define _NESTED_TRUST_COMMON_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym;
|
||||
bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
|
||||
|
||||
#endif /* _NESTED_TRUST_COMMON_H */
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user