2022-02-20 10:40:55 -08:00
// SPDX-License-Identifier: GPL-2.0
2018-04-18 15:55:57 -07:00
/* Copyright (c) 2018 Facebook */
# include <uapi/linux/btf.h>
2019-11-14 10:57:15 -08:00
# include <uapi/linux/bpf.h>
# include <uapi/linux/bpf_perf_event.h>
2018-04-18 15:55:57 -07:00
# include <uapi/linux/types.h>
2018-04-18 15:56:00 -07:00
# include <linux/seq_file.h>
2018-04-18 15:55:57 -07:00
# include <linux/compiler.h>
2018-11-19 15:29:08 -08:00
# include <linux/ctype.h>
2018-04-18 15:55:57 -07:00
# include <linux/errno.h>
# include <linux/slab.h>
2018-04-18 15:56:01 -07:00
# include <linux/anon_inodes.h>
# include <linux/file.h>
2018-04-18 15:55:57 -07:00
# include <linux/uaccess.h>
# include <linux/kernel.h>
2018-05-04 14:49:51 -07:00
# include <linux/idr.h>
2018-05-22 14:57:18 -07:00
# include <linux/sort.h>
2018-04-18 15:55:57 -07:00
# include <linux/bpf_verifier.h>
# include <linux/btf.h>
2020-07-11 23:53:26 +02:00
# include <linux/btf_ids.h>
2019-11-14 10:57:15 -08:00
# include <linux/skmsg.h>
# include <linux/perf_event.h>
2020-08-25 21:21:19 +02:00
# include <linux/bsearch.h>
2020-11-09 17:19:31 -08:00
# include <linux/kobject.h>
# include <linux/sysfs.h>
2019-11-14 10:57:15 -08:00
# include <net/sock.h>
2021-12-01 10:10:31 -08:00
# include "../tools/lib/bpf/relo_core.h"
2018-04-18 15:55:57 -07:00
/* BTF (BPF Type Format) is the meta data format which describes
* the data types of BPF program / map . Hence , it basically focus
* on the C programming language which the modern BPF is primary
* using .
*
* ELF Section :
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
* The BTF data is stored under the " .BTF " ELF section
*
* struct btf_type :
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
* Each ' struct btf_type ' object describes a C data type .
* Depending on the type it is describing , a ' struct btf_type '
* object may be followed by more data . F . e .
* To describe an array , ' struct btf_type ' is followed by
* ' struct btf_array ' .
*
* ' struct btf_type ' and any extra data following it are
* 4 bytes aligned .
*
* Type section :
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
* The BTF type section contains a list of ' struct btf_type ' objects .
* Each one describes a C type . Recall from the above section
* that a ' struct btf_type ' object could be immediately followed by extra
2021-05-25 10:56:59 +08:00
* data in order to describe some particular C types .
2018-04-18 15:55:57 -07:00
*
* type_id :
* ~ ~ ~ ~ ~ ~ ~
* Each btf_type object is identified by a type_id . The type_id
* is implicitly implied by the location of the btf_type object in
* the BTF type section . The first one has type_id 1. The second
* one has type_id 2. . . etc . Hence , an earlier btf_type has
* a smaller type_id .
*
* A btf_type object may refer to another btf_type object by using
* type_id ( i . e . the " type " in the " struct btf_type " ) .
*
* NOTE that we cannot assume any reference - order .
* A btf_type object can refer to an earlier btf_type object
* but it can also refer to a later btf_type object .
*
* For example , to describe " const void * " . A btf_type
* object describing " const " may refer to another btf_type
* object describing " void * " . This type - reference is done
* by specifying type_id :
*
* [ 1 ] CONST ( anon ) type_id = 2
* [ 2 ] PTR ( anon ) type_id = 0
*
* The above is the btf_verifier debug log :
* - Each line started with " [?] " is a btf_type object
* - [ ? ] is the type_id of the btf_type object .
* - CONST / PTR is the BTF_KIND_XXX
* - " (anon) " is the name of the type . It just
* happens that CONST and PTR has no name .
* - type_id = XXX is the ' u32 type ' in btf_type
*
* NOTE : " void " has type_id 0
*
* String section :
* ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
* The BTF string section contains the names used by the type section .
* Each string is referred by an " offset " from the beginning of the
* string section .
*
* Each string is ' \0 ' terminated .
*
* The first character in the string section must be ' \0 '
* which is used to mean ' anonymous ' . Some btf_type may not
* have a name .
*/
/* BTF verification:
*
* To verify BTF data , two passes are needed .
*
* Pass # 1
* ~ ~ ~ ~ ~ ~ ~
* The first pass is to collect all btf_type objects to
* an array : " btf->types " .
*
* Depending on the C type that a btf_type is describing ,
* a btf_type may be followed by extra data . We don ' t know
* how many btf_type is there , and more importantly we don ' t
* know where each btf_type is located in the type section .
*
* Without knowing the location of each type_id , most verifications
* cannot be done . e . g . an earlier btf_type may refer to a later
* btf_type ( recall the " const void * " above ) , so we cannot
* check this type - reference in the first pass .
*
* In the first pass , it still does some verifications ( e . g .
* checking the name is a valid offset to the string section ) .
2018-04-18 15:55:58 -07:00
*
* Pass # 2
* ~ ~ ~ ~ ~ ~ ~
* The main focus is to resolve a btf_type that is referring
* to another type .
*
* We have to ensure the referring type :
* 1 ) does exist in the BTF ( i . e . in btf - > types [ ] )
* 2 ) does not cause a loop :
* struct A {
* struct B b ;
* } ;
*
* struct B {
* struct A a ;
* } ;
*
* btf_type_needs_resolve ( ) decides if a btf_type needs
* to be resolved .
*
* The needs_resolve type implements the " resolve() " ops which
* essentially does a DFS and detects backedge .
*
* During resolve ( or DFS ) , different C types have different
* " RESOLVED " conditions .
*
* When resolving a BTF_KIND_STRUCT , we need to resolve all its
* members because a member is always referring to another
* type . A struct ' s member can be treated as " RESOLVED " if
* it is referring to a BTF_KIND_PTR . Otherwise , the
* following valid C struct would be rejected :
*
* struct A {
* int m ;
* struct A * a ;
* } ;
*
* When resolving a BTF_KIND_PTR , it needs to keep resolving if
* it is referring to another BTF_KIND_PTR . Otherwise , we cannot
* detect a pointer loop , e . g . :
* BTF_KIND_CONST - > BTF_KIND_PTR - > BTF_KIND_CONST - > BTF_KIND_PTR +
* ^ |
* + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +
*
2018-04-18 15:55:57 -07:00
*/
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
# define BITS_PER_U128 (sizeof(u64) * BITS_PER_BYTE * 2)
2018-04-18 15:55:57 -07:00
# define BITS_PER_BYTE_MASK (BITS_PER_BYTE - 1)
# define BITS_PER_BYTE_MASKED(bits) ((bits) & BITS_PER_BYTE_MASK)
# define BITS_ROUNDDOWN_BYTES(bits) ((bits) >> 3)
# define BITS_ROUNDUP_BYTES(bits) \
( BITS_ROUNDDOWN_BYTES ( bits ) + ! ! BITS_PER_BYTE_MASKED ( bits ) )
2021-02-26 21:22:52 +01:00
# define BTF_INFO_MASK 0x9f00ffff
2018-05-22 14:57:20 -07:00
# define BTF_INT_MASK 0x0fffffff
# define BTF_TYPE_ID_VALID(type_id) ((type_id) <= BTF_MAX_TYPE)
# define BTF_STR_OFFSET_VALID(name_off) ((name_off) <= BTF_MAX_NAME_OFFSET)
2018-04-18 15:55:57 -07:00
/* 16MB for 64k structs and each has 16 members and
* a few MB spaces for the string section .
* The hard limit is S32_MAX .
*/
# define BTF_MAX_SIZE (16 * 1024 * 1024)
2018-04-18 15:55:58 -07:00
# define for_each_member_from(i, from, struct_type, member) \
for ( i = from , member = btf_type_member ( struct_type ) + from ; \
i < btf_type_vlen ( struct_type ) ; \
i + + , member + + )
2019-04-09 23:20:09 +02:00
# define for_each_vsi_from(i, from, struct_type, member) \
for ( i = from , member = btf_type_var_secinfo ( struct_type ) + from ; \
i < btf_type_vlen ( struct_type ) ; \
i + + , member + + )
2019-08-20 10:31:50 +01:00
DEFINE_IDR ( btf_idr ) ;
DEFINE_SPINLOCK ( btf_idr_lock ) ;
2018-05-04 14:49:51 -07:00
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
enum btf_kfunc_hook {
BTF_KFUNC_HOOK_XDP ,
BTF_KFUNC_HOOK_TC ,
BTF_KFUNC_HOOK_STRUCT_OPS ,
2022-05-18 22:59:08 +02:00
BTF_KFUNC_HOOK_TRACING ,
BTF_KFUNC_HOOK_SYSCALL ,
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
BTF_KFUNC_HOOK_MAX ,
} ;
enum {
2022-09-06 17:13:01 +02:00
BTF_KFUNC_SET_MAX_CNT = 256 ,
2022-04-25 03:18:54 +05:30
BTF_DTOR_KFUNC_MAX_CNT = 256 ,
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
} ;
struct btf_kfunc_set_tab {
2022-07-21 15:42:35 +02:00
struct btf_id_set8 * sets [ BTF_KFUNC_HOOK_MAX ] ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
} ;
2022-04-25 03:18:54 +05:30
struct btf_id_dtor_kfunc_tab {
u32 cnt ;
struct btf_id_dtor_kfunc dtors [ ] ;
} ;
2018-04-18 15:55:57 -07:00
struct btf {
2018-05-22 14:57:18 -07:00
void * data ;
2018-04-18 15:55:57 -07:00
struct btf_type * * types ;
2018-04-18 15:55:58 -07:00
u32 * resolved_ids ;
u32 * resolved_sizes ;
2018-04-18 15:55:57 -07:00
const char * strings ;
void * nohdr_data ;
2018-05-22 14:57:18 -07:00
struct btf_header hdr ;
2020-11-09 17:19:28 -08:00
u32 nr_types ; /* includes VOID for base BTF */
2018-04-18 15:55:57 -07:00
u32 types_size ;
u32 data_size ;
2018-04-18 15:56:01 -07:00
refcount_t refcnt ;
2018-05-04 14:49:51 -07:00
u32 id ;
struct rcu_head rcu ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
struct btf_kfunc_set_tab * kfunc_set_tab ;
2022-04-25 03:18:54 +05:30
struct btf_id_dtor_kfunc_tab * dtor_kfunc_tab ;
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
struct btf_struct_metas * struct_meta_tab ;
2020-11-09 17:19:28 -08:00
/* split BTF support */
struct btf * base_btf ;
u32 start_id ; /* first type ID in this BTF (0 for base BTF) */
u32 start_str_off ; /* first string offset (0 for base BTF) */
2020-11-09 17:19:29 -08:00
char name [ MODULE_NAME_LEN ] ;
bool kernel_btf ;
2018-04-18 15:55:57 -07:00
} ;
2018-04-18 15:55:58 -07:00
enum verifier_phase {
CHECK_META ,
CHECK_TYPE ,
} ;
struct resolve_vertex {
const struct btf_type * t ;
u32 type_id ;
u16 next_member ;
} ;
enum visit_state {
NOT_VISITED ,
VISITED ,
RESOLVED ,
} ;
enum resolve_mode {
RESOLVE_TBD , /* To Be Determined */
RESOLVE_PTR , /* Resolving for Pointer */
RESOLVE_STRUCT_OR_ARRAY , /* Resolving for struct/union
* or array
*/
} ;
# define MAX_RESOLVE_DEPTH 32
2018-05-22 14:57:18 -07:00
struct btf_sec_info {
u32 off ;
u32 len ;
} ;
2018-04-18 15:55:57 -07:00
struct btf_verifier_env {
struct btf * btf ;
2018-04-18 15:55:58 -07:00
u8 * visit_states ;
struct resolve_vertex stack [ MAX_RESOLVE_DEPTH ] ;
2018-04-18 15:55:57 -07:00
struct bpf_verifier_log log ;
u32 log_type_id ;
2018-04-18 15:55:58 -07:00
u32 top_stack ;
enum verifier_phase phase ;
enum resolve_mode resolve_mode ;
2018-04-18 15:55:57 -07:00
} ;
static const char * const btf_kind_str [ NR_BTF_KINDS ] = {
[ BTF_KIND_UNKN ] = " UNKNOWN " ,
[ BTF_KIND_INT ] = " INT " ,
[ BTF_KIND_PTR ] = " PTR " ,
[ BTF_KIND_ARRAY ] = " ARRAY " ,
[ BTF_KIND_STRUCT ] = " STRUCT " ,
[ BTF_KIND_UNION ] = " UNION " ,
[ BTF_KIND_ENUM ] = " ENUM " ,
[ BTF_KIND_FWD ] = " FWD " ,
[ BTF_KIND_TYPEDEF ] = " TYPEDEF " ,
[ BTF_KIND_VOLATILE ] = " VOLATILE " ,
[ BTF_KIND_CONST ] = " CONST " ,
[ BTF_KIND_RESTRICT ] = " RESTRICT " ,
2018-11-19 15:29:08 -08:00
[ BTF_KIND_FUNC ] = " FUNC " ,
[ BTF_KIND_FUNC_PROTO ] = " FUNC_PROTO " ,
2019-04-09 23:20:09 +02:00
[ BTF_KIND_VAR ] = " VAR " ,
[ BTF_KIND_DATASEC ] = " DATASEC " ,
2021-02-26 21:22:52 +01:00
[ BTF_KIND_FLOAT ] = " FLOAT " ,
2021-10-12 09:48:38 -07:00
[ BTF_KIND_DECL_TAG ] = " DECL_TAG " ,
2021-11-11 17:26:09 -08:00
[ BTF_KIND_TYPE_TAG ] = " TYPE_TAG " ,
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
[ BTF_KIND_ENUM64 ] = " ENUM64 " ,
2018-04-18 15:55:57 -07:00
} ;
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
const char * btf_type_str ( const struct btf_type * t )
2020-01-20 16:53:46 -08:00
{
return btf_kind_str [ BTF_INFO_KIND ( t - > info ) ] ;
}
2020-09-28 12:31:04 +01:00
/* Chunk size we use in safe copy of data to be shown. */
# define BTF_SHOW_OBJ_SAFE_SIZE 32
/*
* This is the maximum size of a base type value ( equivalent to a
* 128 - bit int ) ; if we are at the end of our safe buffer and have
* less than 16 bytes space we can ' t be assured of being able
* to copy the next type safely , so in such cases we will initiate
* a new copy .
*/
# define BTF_SHOW_OBJ_BASE_TYPE_SIZE 16
/* Type name size */
# define BTF_SHOW_NAME_SIZE 80
/*
* Common data to all BTF show operations . Private show functions can add
* their own data to a structure containing a struct btf_show and consult it
* in the show callback . See btf_type_show ( ) below .
*
* One challenge with showing nested data is we want to skip 0 - valued
* data , but in order to figure out whether a nested object is all zeros
* we need to walk through it . As a result , we need to make two passes
* when handling structs , unions and arrays ; the first path simply looks
* for nonzero data , while the second actually does the display . The first
* pass is signalled by show - > state . depth_check being set , and if we
* encounter a non - zero value we set show - > state . depth_to_show to
* the depth at which we encountered it . When we have completed the
* first pass , we will know if anything needs to be displayed if
* depth_to_show > depth . See btf_ [ struct , array ] _show ( ) for the
* implementation of this .
*
* Another problem is we want to ensure the data for display is safe to
* access . To support this , the anonymous " struct {} obj " tracks the data
* object and our safe copy of it . We copy portions of the data needed
* to the object " copy " buffer , but because its size is limited to
* BTF_SHOW_OBJ_COPY_LEN bytes , multiple copies may be required as we
* traverse larger objects for display .
*
* The various data type show functions all start with a call to
* btf_show_start_type ( ) which returns a pointer to the safe copy
* of the data needed ( or if BTF_SHOW_UNSAFE is specified , to the
* raw data itself ) . btf_show_obj_safe ( ) is responsible for
* using copy_from_kernel_nofault ( ) to update the safe data if necessary
* as we traverse the object ' s data . skbuff - like semantics are
* used :
*
* - obj . head points to the start of the toplevel object for display
* - obj . size is the size of the toplevel object
* - obj . data points to the current point in the original data at
* which our safe data starts . obj . data will advance as we copy
* portions of the data .
*
* In most cases a single copy will suffice , but larger data structures
* such as " struct task_struct " will require many copies . The logic in
* btf_show_obj_safe ( ) handles the logic that determines if a new
* copy_from_kernel_nofault ( ) is needed .
*/
struct btf_show {
u64 flags ;
void * target ; /* target of show operation (seq file, buffer) */
void ( * showfn ) ( struct btf_show * show , const char * fmt , va_list args ) ;
const struct btf * btf ;
/* below are used during iteration */
struct {
u8 depth ;
u8 depth_to_show ;
u8 depth_check ;
u8 array_member : 1 ,
array_terminated : 1 ;
u16 array_encoding ;
u32 type_id ;
int status ; /* non-zero for error */
const struct btf_type * type ;
const struct btf_member * member ;
char name [ BTF_SHOW_NAME_SIZE ] ; /* space for member name/type */
} state ;
struct {
u32 size ;
void * head ;
void * data ;
u8 safe [ BTF_SHOW_OBJ_SAFE_SIZE ] ;
} obj ;
} ;
2018-04-18 15:55:57 -07:00
struct btf_kind_operations {
s32 ( * check_meta ) ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left ) ;
2018-04-18 15:55:58 -07:00
int ( * resolve ) ( struct btf_verifier_env * env ,
const struct resolve_vertex * v ) ;
2018-04-18 15:55:59 -07:00
int ( * check_member ) ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
int ( * check_kflag_member ) ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type ) ;
2018-04-18 15:55:57 -07:00
void ( * log_details ) ( struct btf_verifier_env * env ,
const struct btf_type * t ) ;
2020-09-28 12:31:04 +01:00
void ( * show ) ( const struct btf * btf , const struct btf_type * t ,
2018-04-18 15:56:00 -07:00
u32 type_id , void * data , u8 bits_offsets ,
2020-09-28 12:31:04 +01:00
struct btf_show * show ) ;
2018-04-18 15:55:57 -07:00
} ;
static const struct btf_kind_operations * const kind_ops [ NR_BTF_KINDS ] ;
static struct btf_type btf_void ;
2018-11-19 15:29:08 -08:00
static int btf_resolve ( struct btf_verifier_env * env ,
const struct btf_type * t , u32 type_id ) ;
2022-02-03 11:17:27 -08:00
static int btf_func_check ( struct btf_verifier_env * env ,
const struct btf_type * t ) ;
2018-04-18 15:55:58 -07:00
static bool btf_type_is_modifier ( const struct btf_type * t )
{
/* Some of them is not strictly a C modifier
* but they are grouped into the same bucket
* for BTF concern :
* A type ( t ) that refers to another
* type through t - > type AND its size cannot
* be determined without following the t - > type .
*
* ptr does not fall into this bucket
* because its size is always sizeof ( void * ) .
*/
switch ( BTF_INFO_KIND ( t - > info ) ) {
case BTF_KIND_TYPEDEF :
case BTF_KIND_VOLATILE :
case BTF_KIND_CONST :
case BTF_KIND_RESTRICT :
2021-11-11 17:26:09 -08:00
case BTF_KIND_TYPE_TAG :
2018-04-18 15:55:58 -07:00
return true ;
}
return false ;
}
2019-04-09 23:20:10 +02:00
bool btf_type_is_void ( const struct btf_type * t )
2018-04-18 15:55:58 -07:00
{
2018-11-19 15:29:06 -08:00
return t = = & btf_void ;
}
static bool btf_type_is_fwd ( const struct btf_type * t )
{
return BTF_INFO_KIND ( t - > info ) = = BTF_KIND_FWD ;
}
static bool btf_type_nosize ( const struct btf_type * t )
{
2018-11-19 15:29:08 -08:00
return btf_type_is_void ( t ) | | btf_type_is_fwd ( t ) | |
btf_type_is_func ( t ) | | btf_type_is_func_proto ( t ) ;
2018-04-18 15:55:58 -07:00
}
2018-11-19 15:29:06 -08:00
static bool btf_type_nosize_or_null ( const struct btf_type * t )
2018-04-18 15:55:58 -07:00
{
2018-11-19 15:29:06 -08:00
return ! t | | btf_type_nosize ( t ) ;
2018-04-18 15:55:58 -07:00
}
2019-01-31 15:40:04 -08:00
static bool __btf_type_is_struct ( const struct btf_type * t )
{
return BTF_INFO_KIND ( t - > info ) = = BTF_KIND_STRUCT ;
}
2018-04-18 15:55:58 -07:00
static bool btf_type_is_array ( const struct btf_type * t )
{
return BTF_INFO_KIND ( t - > info ) = = BTF_KIND_ARRAY ;
}
2019-04-09 23:20:09 +02:00
static bool btf_type_is_datasec ( const struct btf_type * t )
{
return BTF_INFO_KIND ( t - > info ) = = BTF_KIND_DATASEC ;
}
2021-10-12 09:48:38 -07:00
static bool btf_type_is_decl_tag ( const struct btf_type * t )
2021-09-14 15:30:15 -07:00
{
2021-10-12 09:48:38 -07:00
return BTF_INFO_KIND ( t - > info ) = = BTF_KIND_DECL_TAG ;
2021-09-14 15:30:15 -07:00
}
2021-10-12 09:48:38 -07:00
static bool btf_type_is_decl_tag_target ( const struct btf_type * t )
2021-09-14 15:30:15 -07:00
{
return btf_type_is_func ( t ) | | btf_type_is_struct ( t ) | |
2021-10-21 12:56:28 -07:00
btf_type_is_var ( t ) | | btf_type_is_typedef ( t ) ;
2021-09-14 15:30:15 -07:00
}
2021-01-11 23:55:18 -08:00
u32 btf_nr_types ( const struct btf * btf )
2020-11-09 17:19:28 -08:00
{
u32 total = 0 ;
while ( btf ) {
total + = btf - > nr_types ;
btf = btf - > base_btf ;
}
return total ;
}
2020-01-08 16:35:03 -08:00
s32 btf_find_by_name_kind ( const struct btf * btf , const char * name , u8 kind )
{
const struct btf_type * t ;
const char * tname ;
2020-11-09 17:19:28 -08:00
u32 i , total ;
2020-01-08 16:35:03 -08:00
2021-01-11 23:55:18 -08:00
total = btf_nr_types ( btf ) ;
2020-11-09 17:19:28 -08:00
for ( i = 1 ; i < total ; i + + ) {
t = btf_type_by_id ( btf , i ) ;
2020-01-08 16:35:03 -08:00
if ( BTF_INFO_KIND ( t - > info ) ! = kind )
continue ;
tname = btf_name_by_offset ( btf , t - > name_off ) ;
if ( ! strcmp ( tname , name ) )
return i ;
}
return - ENOENT ;
}
2022-03-17 17:29:43 +05:30
static s32 bpf_find_btf_id ( const char * name , u32 kind , struct btf * * btf_p )
{
struct btf * btf ;
s32 ret ;
int id ;
btf = bpf_get_btf_vmlinux ( ) ;
if ( IS_ERR ( btf ) )
return PTR_ERR ( btf ) ;
2022-03-20 20:00:03 +05:30
if ( ! btf )
return - EINVAL ;
2022-03-17 17:29:43 +05:30
ret = btf_find_by_name_kind ( btf , name , kind ) ;
/* ret is never zero, since btf_find_by_name_kind returns
* positive btf_id or negative error .
*/
if ( ret > 0 ) {
btf_get ( btf ) ;
* btf_p = btf ;
return ret ;
}
/* If name is not found in vmlinux's BTF then search in module's BTFs */
spin_lock_bh ( & btf_idr_lock ) ;
idr_for_each_entry ( & btf_idr , btf , id ) {
if ( ! btf_is_module ( btf ) )
continue ;
/* linear search could be slow hence unlock/lock
* the IDR to avoiding holding it for too long
*/
btf_get ( btf ) ;
spin_unlock_bh ( & btf_idr_lock ) ;
ret = btf_find_by_name_kind ( btf , name , kind ) ;
if ( ret > 0 ) {
* btf_p = btf ;
return ret ;
}
spin_lock_bh ( & btf_idr_lock ) ;
btf_put ( btf ) ;
}
spin_unlock_bh ( & btf_idr_lock ) ;
return ret ;
}
2020-01-08 16:35:03 -08:00
const struct btf_type * btf_type_skip_modifiers ( const struct btf * btf ,
u32 id , u32 * res_id )
{
const struct btf_type * t = btf_type_by_id ( btf , id ) ;
while ( btf_type_is_modifier ( t ) ) {
id = t - > type ;
t = btf_type_by_id ( btf , t - > type ) ;
}
if ( res_id )
* res_id = id ;
return t ;
}
const struct btf_type * btf_type_resolve_ptr ( const struct btf * btf ,
u32 id , u32 * res_id )
{
const struct btf_type * t ;
t = btf_type_skip_modifiers ( btf , id , NULL ) ;
if ( ! btf_type_is_ptr ( t ) )
return NULL ;
return btf_type_skip_modifiers ( btf , t - > type , res_id ) ;
}
const struct btf_type * btf_type_resolve_func_ptr ( const struct btf * btf ,
u32 id , u32 * res_id )
{
const struct btf_type * ptype ;
ptype = btf_type_resolve_ptr ( btf , id , res_id ) ;
if ( ptype & & btf_type_is_func_proto ( ptype ) )
return ptype ;
return NULL ;
}
2019-04-09 23:20:09 +02:00
/* Types that act only as a source, not sink or intermediate
* type when resolving .
*/
static bool btf_type_is_resolve_source_only ( const struct btf_type * t )
{
return btf_type_is_var ( t ) | |
2021-10-12 09:48:38 -07:00
btf_type_is_decl_tag ( t ) | |
2019-04-09 23:20:09 +02:00
btf_type_is_datasec ( t ) ;
}
2018-04-18 15:55:58 -07:00
/* What types need to be resolved?
*
* btf_type_is_modifier ( ) is an obvious one .
*
* btf_type_is_struct ( ) because its member refers to
* another type ( through member - > type ) .
2019-04-09 23:20:09 +02:00
*
* btf_type_is_var ( ) because the variable refers to
* another type . btf_type_is_datasec ( ) holds multiple
* btf_type_is_var ( ) types that need resolving .
*
2018-04-18 15:55:58 -07:00
* btf_type_is_array ( ) because its element ( array - > type )
* refers to another type . Array can be thought of a
* special case of struct while array just has the same
* member - type repeated by array - > nelems of times .
*/
static bool btf_type_needs_resolve ( const struct btf_type * t )
{
return btf_type_is_modifier ( t ) | |
2019-04-09 23:20:09 +02:00
btf_type_is_ptr ( t ) | |
btf_type_is_struct ( t ) | |
btf_type_is_array ( t ) | |
btf_type_is_var ( t ) | |
2022-02-03 11:17:27 -08:00
btf_type_is_func ( t ) | |
2021-10-12 09:48:38 -07:00
btf_type_is_decl_tag ( t ) | |
2019-04-09 23:20:09 +02:00
btf_type_is_datasec ( t ) ;
2018-04-18 15:55:58 -07:00
}
/* t->size can be used */
static bool btf_type_has_size ( const struct btf_type * t )
{
switch ( BTF_INFO_KIND ( t - > info ) ) {
case BTF_KIND_INT :
case BTF_KIND_STRUCT :
case BTF_KIND_UNION :
case BTF_KIND_ENUM :
2019-04-09 23:20:09 +02:00
case BTF_KIND_DATASEC :
2021-02-26 21:22:52 +01:00
case BTF_KIND_FLOAT :
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
case BTF_KIND_ENUM64 :
2018-04-18 15:55:58 -07:00
return true ;
}
return false ;
}
2018-04-18 15:55:57 -07:00
static const char * btf_int_encoding_str ( u8 encoding )
{
if ( encoding = = 0 )
return " (none) " ;
else if ( encoding = = BTF_INT_SIGNED )
return " SIGNED " ;
else if ( encoding = = BTF_INT_CHAR )
return " CHAR " ;
else if ( encoding = = BTF_INT_BOOL )
return " BOOL " ;
else
return " UNKN " ;
}
static u32 btf_type_int ( const struct btf_type * t )
{
return * ( u32 * ) ( t + 1 ) ;
}
static const struct btf_array * btf_type_array ( const struct btf_type * t )
{
return ( const struct btf_array * ) ( t + 1 ) ;
}
static const struct btf_enum * btf_type_enum ( const struct btf_type * t )
{
return ( const struct btf_enum * ) ( t + 1 ) ;
}
2019-04-09 23:20:09 +02:00
static const struct btf_var * btf_type_var ( const struct btf_type * t )
{
return ( const struct btf_var * ) ( t + 1 ) ;
}
2021-10-12 09:48:38 -07:00
static const struct btf_decl_tag * btf_type_decl_tag ( const struct btf_type * t )
2021-09-14 15:30:15 -07:00
{
2021-10-12 09:48:38 -07:00
return ( const struct btf_decl_tag * ) ( t + 1 ) ;
2021-09-14 15:30:15 -07:00
}
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
static const struct btf_enum64 * btf_type_enum64 ( const struct btf_type * t )
{
return ( const struct btf_enum64 * ) ( t + 1 ) ;
}
2018-04-18 15:55:57 -07:00
static const struct btf_kind_operations * btf_type_ops ( const struct btf_type * t )
{
return kind_ops [ BTF_INFO_KIND ( t - > info ) ] ;
}
2019-01-16 20:29:40 +01:00
static bool btf_name_offset_valid ( const struct btf * btf , u32 offset )
2018-04-18 15:55:57 -07:00
{
2020-11-09 17:19:28 -08:00
if ( ! BTF_STR_OFFSET_VALID ( offset ) )
return false ;
while ( offset < btf - > start_str_off )
btf = btf - > base_btf ;
offset - = btf - > start_str_off ;
return offset < btf - > hdr . str_len ;
2018-04-18 15:55:57 -07:00
}
2019-04-09 23:20:09 +02:00
static bool __btf_name_char_ok ( char c , bool first , bool dot_ok )
{
if ( ( first ? ! isalpha ( c ) :
! isalnum ( c ) ) & &
c ! = ' _ ' & &
( ( c = = ' . ' & & ! dot_ok ) | |
c ! = ' . ' ) )
return false ;
return true ;
}
2020-11-09 17:19:28 -08:00
static const char * btf_str_by_offset ( const struct btf * btf , u32 offset )
{
while ( offset < btf - > start_str_off )
btf = btf - > base_btf ;
offset - = btf - > start_str_off ;
if ( offset < btf - > hdr . str_len )
return & btf - > strings [ offset ] ;
return NULL ;
}
2019-04-09 23:20:09 +02:00
static bool __btf_name_valid ( const struct btf * btf , u32 offset , bool dot_ok )
2018-11-19 15:29:08 -08:00
{
/* offset must be valid */
2020-11-09 17:19:28 -08:00
const char * src = btf_str_by_offset ( btf , offset ) ;
2018-11-19 15:29:08 -08:00
const char * src_limit ;
2019-04-09 23:20:09 +02:00
if ( ! __btf_name_char_ok ( * src , true , dot_ok ) )
2018-11-19 15:29:08 -08:00
return false ;
/* set a limit on identifier length */
src_limit = src + KSYM_NAME_LEN ;
src + + ;
while ( * src & & src < src_limit ) {
2019-04-09 23:20:09 +02:00
if ( ! __btf_name_char_ok ( * src , false , dot_ok ) )
2018-11-19 15:29:08 -08:00
return false ;
src + + ;
}
return ! * src ;
}
2019-04-09 23:20:09 +02:00
/* Only C-style identifier is permitted. This can be relaxed if
* necessary .
*/
static bool btf_name_valid_identifier ( const struct btf * btf , u32 offset )
{
return __btf_name_valid ( btf , offset , false ) ;
}
static bool btf_name_valid_section ( const struct btf * btf , u32 offset )
{
return __btf_name_valid ( btf , offset , true ) ;
}
2018-12-13 10:41:46 -08:00
static const char * __btf_name_by_offset ( const struct btf * btf , u32 offset )
2018-04-18 15:55:57 -07:00
{
2020-11-09 17:19:28 -08:00
const char * name ;
2018-05-22 14:57:20 -07:00
if ( ! offset )
2018-04-18 15:55:57 -07:00
return " (anon) " ;
2020-11-09 17:19:28 -08:00
name = btf_str_by_offset ( btf , offset ) ;
return name ? : " (invalid-name-offset) " ;
2018-04-18 15:55:57 -07:00
}
2018-12-13 10:41:46 -08:00
const char * btf_name_by_offset ( const struct btf * btf , u32 offset )
{
2020-11-09 17:19:28 -08:00
return btf_str_by_offset ( btf , offset ) ;
2018-12-13 10:41:46 -08:00
}
bpf: Introduce bpf_func_info
This patch added interface to load a program with the following
additional information:
. prog_btf_fd
. func_info, func_info_rec_size and func_info_cnt
where func_info will provide function range and type_id
corresponding to each function.
The func_info_rec_size is introduced in the UAPI to specify
struct bpf_func_info size passed from user space. This
intends to make bpf_func_info structure growable in the future.
If the kernel gets a different bpf_func_info size from userspace,
it will try to handle user request with part of bpf_func_info
it can understand. In this patch, kernel can understand
struct bpf_func_info {
__u32 insn_offset;
__u32 type_id;
};
If user passed a bpf func_info record size of 16 bytes, the
kernel can still handle part of records with the above definition.
If verifier agrees with function range provided by the user,
the bpf_prog ksym for each function will use the func name
provided in the type_id, which is supposed to provide better
encoding as it is not limited by 16 bytes program name
limitation and this is better for bpf program which contains
multiple subprograms.
The bpf_prog_info interface is also extended to
return btf_id, func_info, func_info_rec_size and func_info_cnt
to userspace, so userspace can print out the function prototype
for each xlated function. The insn_offset in the returned
func_info corresponds to the insn offset for xlated functions.
With other jit related fields in bpf_prog_info, userspace can also
print out function prototypes for each jited function.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-11-19 15:29:11 -08:00
const struct btf_type * btf_type_by_id ( const struct btf * btf , u32 type_id )
2018-04-18 15:55:58 -07:00
{
2020-11-09 17:19:28 -08:00
while ( type_id < btf - > start_id )
btf = btf - > base_btf ;
2018-04-18 15:55:58 -07:00
2020-11-09 17:19:28 -08:00
type_id - = btf - > start_id ;
if ( type_id > = btf - > nr_types )
return NULL ;
2018-04-18 15:55:58 -07:00
return btf - > types [ type_id ] ;
}
2022-09-07 10:40:39 -06:00
EXPORT_SYMBOL_GPL ( btf_type_by_id ) ;
2018-04-18 15:55:58 -07:00
2018-05-22 14:57:19 -07:00
/*
* Regular int is not a bit field and it must be either
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
* u8 / u16 / u32 / u64 or __int128 .
2018-05-22 14:57:19 -07:00
*/
static bool btf_type_int_is_regular ( const struct btf_type * t )
{
2018-07-19 22:14:31 -07:00
u8 nr_bits , nr_bytes ;
2018-05-22 14:57:19 -07:00
u32 int_data ;
int_data = btf_type_int ( t ) ;
nr_bits = BTF_INT_BITS ( int_data ) ;
nr_bytes = BITS_ROUNDUP_BYTES ( nr_bits ) ;
if ( BITS_PER_BYTE_MASKED ( nr_bits ) | |
BTF_INT_OFFSET ( int_data ) | |
( nr_bytes ! = sizeof ( u8 ) & & nr_bytes ! = sizeof ( u16 ) & &
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
nr_bytes ! = sizeof ( u32 ) & & nr_bytes ! = sizeof ( u64 ) & &
nr_bytes ! = ( 2 * sizeof ( u64 ) ) ) ) {
2018-05-22 14:57:19 -07:00
return false ;
}
return true ;
}
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-10 15:43:01 -08:00
/*
2018-12-15 22:13:52 -08:00
* Check that given struct member is a regular int with expected
* offset and size .
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-10 15:43:01 -08:00
*/
2018-12-15 22:13:52 -08:00
bool btf_member_is_reg_int ( const struct btf * btf , const struct btf_type * s ,
const struct btf_member * m ,
u32 expected_offset , u32 expected_size )
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-10 15:43:01 -08:00
{
2018-12-15 22:13:52 -08:00
const struct btf_type * t ;
u32 id , int_data ;
u8 nr_bits ;
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-10 15:43:01 -08:00
2018-12-15 22:13:52 -08:00
id = m - > type ;
t = btf_type_id_size ( btf , & id , NULL ) ;
if ( ! t | | ! btf_type_is_int ( t ) )
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-10 15:43:01 -08:00
return false ;
int_data = btf_type_int ( t ) ;
nr_bits = BTF_INT_BITS ( int_data ) ;
2018-12-15 22:13:52 -08:00
if ( btf_type_kflag ( s ) ) {
u32 bitfield_size = BTF_MEMBER_BITFIELD_SIZE ( m - > offset ) ;
u32 bit_offset = BTF_MEMBER_BIT_OFFSET ( m - > offset ) ;
/* if kflag set, int should be a regular int and
* bit offset should be at byte boundary .
*/
return ! bitfield_size & &
BITS_ROUNDUP_BYTES ( bit_offset ) = = expected_offset & &
BITS_ROUNDUP_BYTES ( nr_bits ) = = expected_size ;
}
if ( BTF_INT_OFFSET ( int_data ) | |
BITS_PER_BYTE_MASKED ( m - > offset ) | |
BITS_ROUNDUP_BYTES ( m - > offset ) ! = expected_offset | |
BITS_PER_BYTE_MASKED ( nr_bits ) | |
BITS_ROUNDUP_BYTES ( nr_bits ) ! = expected_size )
bpf: add bpffs pretty print for cgroup local storage maps
Implement bpffs pretty printing for cgroup local storage maps
(both shared and per-cpu).
Output example (captured for tools/testing/selftests/bpf/netcnt_prog.c):
Shared:
$ cat /sys/fs/bpf/map_2
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {9999,1039896}
Per-cpu:
$ cat /sys/fs/bpf/map_1
# WARNING!! The output is for debug purpose only
# WARNING!! The output format will change
{4294968594,1}: {
cpu0: {0,0,0,0,0}
cpu1: {0,0,0,0,0}
cpu2: {1,104,0,0,0}
cpu3: {0,0,0,0,0}
}
Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2018-12-10 15:43:01 -08:00
return false ;
return true ;
}
2020-09-28 12:31:04 +01:00
/* Similar to btf_type_skip_modifiers() but does not skip typedefs. */
static const struct btf_type * btf_type_skip_qualifiers ( const struct btf * btf ,
u32 id )
{
const struct btf_type * t = btf_type_by_id ( btf , id ) ;
while ( btf_type_is_modifier ( t ) & &
BTF_INFO_KIND ( t - > info ) ! = BTF_KIND_TYPEDEF ) {
t = btf_type_by_id ( btf , t - > type ) ;
}
return t ;
}
# define BTF_SHOW_MAX_ITER 10
# define BTF_KIND_BIT(kind) (1ULL << kind)
/*
* Populate show - > state . name with type name information .
* Format of type name is
*
* [ . member_name = ] ( type_name )
*/
static const char * btf_show_name ( struct btf_show * show )
{
/* BTF_MAX_ITER array suffixes "[]" */
const char * array_suffixes = " [][][][][][][][][][] " ;
const char * array_suffix = & array_suffixes [ strlen ( array_suffixes ) ] ;
/* BTF_MAX_ITER pointer suffixes "*" */
const char * ptr_suffixes = " ********** " ;
const char * ptr_suffix = & ptr_suffixes [ strlen ( ptr_suffixes ) ] ;
const char * name = NULL , * prefix = " " , * parens = " " ;
const struct btf_member * m = show - > state . member ;
2021-12-07 22:47:18 +00:00
const struct btf_type * t ;
2020-09-28 12:31:04 +01:00
const struct btf_array * array ;
u32 id = show - > state . type_id ;
const char * member = NULL ;
bool show_member = false ;
u64 kinds = 0 ;
int i ;
show - > state . name [ 0 ] = ' \0 ' ;
/*
* Don ' t show type name if we ' re showing an array member ;
* in that case we show the array type so don ' t need to repeat
* ourselves for each member .
*/
if ( show - > state . array_member )
return " " ;
/* Retrieve member name, if any. */
if ( m ) {
member = btf_name_by_offset ( show - > btf , m - > name_off ) ;
show_member = strlen ( member ) > 0 ;
id = m - > type ;
}
/*
* Start with type_id , as we have resolved the struct btf_type *
* via btf_modifier_show ( ) past the parent typedef to the child
* struct , int etc it is defined as . In such cases , the type_id
* still represents the starting type while the struct btf_type *
* in our show - > state points at the resolved type of the typedef .
*/
t = btf_type_by_id ( show - > btf , id ) ;
if ( ! t )
return " " ;
/*
* The goal here is to build up the right number of pointer and
* array suffixes while ensuring the type name for a typedef
* is represented . Along the way we accumulate a list of
* BTF kinds we have encountered , since these will inform later
* display ; for example , pointer types will not require an
* opening " { " for struct , we will just display the pointer value .
*
* We also want to accumulate the right number of pointer or array
* indices in the format string while iterating until we get to
* the typedef / pointee / array member target type .
*
* We start by pointing at the end of pointer and array suffix
* strings ; as we accumulate pointers and arrays we move the pointer
* or array string backwards so it will show the expected number of
* ' * ' or ' [ ] ' for the type . BTF_SHOW_MAX_ITER of nesting of pointers
* and / or arrays and typedefs are supported as a precaution .
*
* We also want to get typedef name while proceeding to resolve
* type it points to so that we can add parentheses if it is a
* " typedef struct " etc .
*/
for ( i = 0 ; i < BTF_SHOW_MAX_ITER ; i + + ) {
switch ( BTF_INFO_KIND ( t - > info ) ) {
case BTF_KIND_TYPEDEF :
if ( ! name )
name = btf_name_by_offset ( show - > btf ,
t - > name_off ) ;
kinds | = BTF_KIND_BIT ( BTF_KIND_TYPEDEF ) ;
id = t - > type ;
break ;
case BTF_KIND_ARRAY :
kinds | = BTF_KIND_BIT ( BTF_KIND_ARRAY ) ;
parens = " [ " ;
if ( ! t )
return " " ;
array = btf_type_array ( t ) ;
if ( array_suffix > array_suffixes )
array_suffix - = 2 ;
id = array - > type ;
break ;
case BTF_KIND_PTR :
kinds | = BTF_KIND_BIT ( BTF_KIND_PTR ) ;
if ( ptr_suffix > ptr_suffixes )
ptr_suffix - = 1 ;
id = t - > type ;
break ;
default :
id = 0 ;
break ;
}
if ( ! id )
break ;
t = btf_type_skip_qualifiers ( show - > btf , id ) ;
}
/* We may not be able to represent this type; bail to be safe */
if ( i = = BTF_SHOW_MAX_ITER )
return " " ;
if ( ! name )
name = btf_name_by_offset ( show - > btf , t - > name_off ) ;
switch ( BTF_INFO_KIND ( t - > info ) ) {
case BTF_KIND_STRUCT :
case BTF_KIND_UNION :
prefix = BTF_INFO_KIND ( t - > info ) = = BTF_KIND_STRUCT ?
" struct " : " union " ;
/* if it's an array of struct/union, parens is already set */
if ( ! ( kinds & ( BTF_KIND_BIT ( BTF_KIND_ARRAY ) ) ) )
parens = " { " ;
break ;
case BTF_KIND_ENUM :
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
case BTF_KIND_ENUM64 :
2020-09-28 12:31:04 +01:00
prefix = " enum " ;
break ;
default :
break ;
}
/* pointer does not require parens */
if ( kinds & BTF_KIND_BIT ( BTF_KIND_PTR ) )
parens = " " ;
/* typedef does not require struct/union/enum prefix */
if ( kinds & BTF_KIND_BIT ( BTF_KIND_TYPEDEF ) )
prefix = " " ;
if ( ! name )
name = " " ;
/* Even if we don't want type name info, we want parentheses etc */
if ( show - > flags & BTF_SHOW_NONAME )
snprintf ( show - > state . name , sizeof ( show - > state . name ) , " %s " ,
parens ) ;
else
snprintf ( show - > state . name , sizeof ( show - > state . name ) ,
" %s%s%s(%s%s%s%s%s%s)%s " ,
/* first 3 strings comprise ".member = " */
show_member ? " . " : " " ,
show_member ? member : " " ,
show_member ? " = " : " " ,
/* ...next is our prefix (struct, enum, etc) */
prefix ,
strlen ( prefix ) > 0 & & strlen ( name ) > 0 ? " " : " " ,
/* ...this is the type name itself */
name ,
/* ...suffixed by the appropriate '*', '[]' suffixes */
strlen ( ptr_suffix ) > 0 ? " " : " " , ptr_suffix ,
array_suffix , parens ) ;
return show - > state . name ;
}
static const char * __btf_show_indent ( struct btf_show * show )
{
const char * indents = " " ;
const char * indent = & indents [ strlen ( indents ) ] ;
if ( ( indent - show - > state . depth ) > = indents )
return indent - show - > state . depth ;
return indents ;
}
static const char * btf_show_indent ( struct btf_show * show )
{
return show - > flags & BTF_SHOW_COMPACT ? " " : __btf_show_indent ( show ) ;
}
static const char * btf_show_newline ( struct btf_show * show )
{
return show - > flags & BTF_SHOW_COMPACT ? " " : " \n " ;
}
static const char * btf_show_delim ( struct btf_show * show )
{
if ( show - > state . depth = = 0 )
return " " ;
if ( ( show - > flags & BTF_SHOW_COMPACT ) & & show - > state . type & &
BTF_INFO_KIND ( show - > state . type - > info ) = = BTF_KIND_UNION )
return " | " ;
return " , " ;
}
__printf ( 2 , 3 ) static void btf_show ( struct btf_show * show , const char * fmt , . . . )
{
va_list args ;
if ( ! show - > state . depth_check ) {
va_start ( args , fmt ) ;
show - > showfn ( show , fmt , args ) ;
va_end ( args ) ;
}
}
/* Macros are used here as btf_show_type_value[s]() prepends and appends
* format specifiers to the format specifier passed in ; these do the work of
* adding indentation , delimiters etc while the caller simply has to specify
* the type value ( s ) in the format specifier + value ( s ) .
*/
# define btf_show_type_value(show, fmt, value) \
do { \
2022-07-14 11:03:22 +01:00
if ( ( value ) ! = ( __typeof__ ( value ) ) 0 | | \
( show - > flags & BTF_SHOW_ZERO ) | | \
2020-09-28 12:31:04 +01:00
show - > state . depth = = 0 ) { \
btf_show ( show , " %s%s " fmt " %s%s " , \
btf_show_indent ( show ) , \
btf_show_name ( show ) , \
value , btf_show_delim ( show ) , \
btf_show_newline ( show ) ) ; \
if ( show - > state . depth > show - > state . depth_to_show ) \
show - > state . depth_to_show = show - > state . depth ; \
} \
} while ( 0 )
# define btf_show_type_values(show, fmt, ...) \
do { \
btf_show ( show , " %s%s " fmt " %s%s " , btf_show_indent ( show ) , \
btf_show_name ( show ) , \
__VA_ARGS__ , btf_show_delim ( show ) , \
btf_show_newline ( show ) ) ; \
if ( show - > state . depth > show - > state . depth_to_show ) \
show - > state . depth_to_show = show - > state . depth ; \
} while ( 0 )
/* How much is left to copy to safe buffer after @data? */
static int btf_show_obj_size_left ( struct btf_show * show , void * data )
{
return show - > obj . head + show - > obj . size - data ;
}
/* Is object pointed to by @data of @size already copied to our safe buffer? */
static bool btf_show_obj_is_safe ( struct btf_show * show , void * data , int size )
{
return data > = show - > obj . data & &
( data + size ) < ( show - > obj . data + BTF_SHOW_OBJ_SAFE_SIZE ) ;
}
/*
* If object pointed to by @ data of @ size falls within our safe buffer , return
* the equivalent pointer to the same safe data . Assumes
* copy_from_kernel_nofault ( ) has already happened and our safe buffer is
* populated .
*/
static void * __btf_show_obj_safe ( struct btf_show * show , void * data , int size )
{
if ( btf_show_obj_is_safe ( show , data , size ) )
return show - > obj . safe + ( data - show - > obj . data ) ;
return NULL ;
}
/*
* Return a safe - to - access version of data pointed to by @ data .
* We do this by copying the relevant amount of information
* to the struct btf_show obj . safe buffer using copy_from_kernel_nofault ( ) .
*
* If BTF_SHOW_UNSAFE is specified , just return data as - is ; no
* safe copy is needed .
*
* Otherwise we need to determine if we have the required amount
* of data ( determined by the @ data pointer and the size of the
* largest base type we can encounter ( represented by
* BTF_SHOW_OBJ_BASE_TYPE_SIZE ) . Having that much data ensures
* that we will be able to print some of the current object ,
* and if more is needed a copy will be triggered .
* Some objects such as structs will not fit into the buffer ;
* in such cases additional copies when we iterate over their
* members may be needed .
*
* btf_show_obj_safe ( ) is used to return a safe buffer for
* btf_show_start_type ( ) ; this ensures that as we recurse into
* nested types we always have safe data for the given type .
* This approach is somewhat wasteful ; it ' s possible for example
* that when iterating over a large union we ' ll end up copying the
* same data repeatedly , but the goal is safety not performance .
* We use stack data as opposed to per - CPU buffers because the
* iteration over a type can take some time , and preemption handling
* would greatly complicate use of the safe buffer .
*/
static void * btf_show_obj_safe ( struct btf_show * show ,
const struct btf_type * t ,
void * data )
{
const struct btf_type * rt ;
int size_left , size ;
void * safe = NULL ;
if ( show - > flags & BTF_SHOW_UNSAFE )
return data ;
rt = btf_resolve_size ( show - > btf , t , & size ) ;
if ( IS_ERR ( rt ) ) {
show - > state . status = PTR_ERR ( rt ) ;
return NULL ;
}
/*
* Is this toplevel object ? If so , set total object size and
* initialize pointers . Otherwise check if we still fall within
* our safe object data .
*/
if ( show - > state . depth = = 0 ) {
show - > obj . size = size ;
show - > obj . head = data ;
} else {
/*
* If the size of the current object is > our remaining
* safe buffer we _may_ need to do a new copy . However
* consider the case of a nested struct ; it ' s size pushes
* us over the safe buffer limit , but showing any individual
* struct members does not . In such cases , we don ' t need
* to initiate a fresh copy yet ; however we definitely need
* at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes left
* in our buffer , regardless of the current object size .
* The logic here is that as we resolve types we will
* hit a base type at some point , and we need to be sure
* the next chunk of data is safely available to display
* that type info safely . We cannot rely on the size of
* the current object here because it may be much larger
* than our current buffer ( e . g . task_struct is 8 k ) .
* All we want to do here is ensure that we can print the
* next basic type , which we can if either
* - the current type size is within the safe buffer ; or
* - at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes are left in
* the safe buffer .
*/
safe = __btf_show_obj_safe ( show , data ,
min ( size ,
BTF_SHOW_OBJ_BASE_TYPE_SIZE ) ) ;
}
/*
* We need a new copy to our safe object , either because we haven ' t
2021-05-25 10:56:59 +08:00
* yet copied and are initializing safe data , or because the data
2020-09-28 12:31:04 +01:00
* we want falls outside the boundaries of the safe object .
*/
if ( ! safe ) {
size_left = btf_show_obj_size_left ( show , data ) ;
if ( size_left > BTF_SHOW_OBJ_SAFE_SIZE )
size_left = BTF_SHOW_OBJ_SAFE_SIZE ;
show - > state . status = copy_from_kernel_nofault ( show - > obj . safe ,
data , size_left ) ;
if ( ! show - > state . status ) {
show - > obj . data = data ;
safe = show - > obj . safe ;
}
}
return safe ;
}
/*
* Set the type we are starting to show and return a safe data pointer
* to be used for showing the associated data .
*/
static void * btf_show_start_type ( struct btf_show * show ,
const struct btf_type * t ,
u32 type_id , void * data )
{
show - > state . type = t ;
show - > state . type_id = type_id ;
show - > state . name [ 0 ] = ' \0 ' ;
return btf_show_obj_safe ( show , t , data ) ;
}
static void btf_show_end_type ( struct btf_show * show )
{
show - > state . type = NULL ;
show - > state . type_id = 0 ;
show - > state . name [ 0 ] = ' \0 ' ;
}
static void * btf_show_start_aggr_type ( struct btf_show * show ,
const struct btf_type * t ,
u32 type_id , void * data )
{
void * safe_data = btf_show_start_type ( show , t , type_id , data ) ;
if ( ! safe_data )
return safe_data ;
btf_show ( show , " %s%s%s " , btf_show_indent ( show ) ,
btf_show_name ( show ) ,
btf_show_newline ( show ) ) ;
show - > state . depth + + ;
return safe_data ;
}
static void btf_show_end_aggr_type ( struct btf_show * show ,
const char * suffix )
{
show - > state . depth - - ;
btf_show ( show , " %s%s%s%s " , btf_show_indent ( show ) , suffix ,
btf_show_delim ( show ) , btf_show_newline ( show ) ) ;
btf_show_end_type ( show ) ;
}
static void btf_show_start_member ( struct btf_show * show ,
const struct btf_member * m )
{
show - > state . member = m ;
}
static void btf_show_start_array_member ( struct btf_show * show )
{
show - > state . array_member = 1 ;
btf_show_start_member ( show , NULL ) ;
}
static void btf_show_end_member ( struct btf_show * show )
{
show - > state . member = NULL ;
}
static void btf_show_end_array_member ( struct btf_show * show )
{
show - > state . array_member = 0 ;
btf_show_end_member ( show ) ;
}
static void * btf_show_start_array_type ( struct btf_show * show ,
const struct btf_type * t ,
u32 type_id ,
u16 array_encoding ,
void * data )
{
show - > state . array_encoding = array_encoding ;
show - > state . array_terminated = 0 ;
return btf_show_start_aggr_type ( show , t , type_id , data ) ;
}
static void btf_show_end_array_type ( struct btf_show * show )
{
show - > state . array_encoding = 0 ;
show - > state . array_terminated = 0 ;
btf_show_end_aggr_type ( show , " ] " ) ;
}
static void * btf_show_start_struct_type ( struct btf_show * show ,
const struct btf_type * t ,
u32 type_id ,
void * data )
{
return btf_show_start_aggr_type ( show , t , type_id , data ) ;
}
static void btf_show_end_struct_type ( struct btf_show * show )
{
btf_show_end_aggr_type ( show , " } " ) ;
}
2018-04-18 15:55:57 -07:00
__printf ( 2 , 3 ) static void __btf_verifier_log ( struct bpf_verifier_log * log ,
const char * fmt , . . . )
{
va_list args ;
va_start ( args , fmt ) ;
bpf_verifier_vlog ( log , fmt , args ) ;
va_end ( args ) ;
}
__printf ( 2 , 3 ) static void btf_verifier_log ( struct btf_verifier_env * env ,
const char * fmt , . . . )
{
struct bpf_verifier_log * log = & env - > log ;
va_list args ;
if ( ! bpf_verifier_log_needed ( log ) )
return ;
va_start ( args , fmt ) ;
bpf_verifier_vlog ( log , fmt , args ) ;
va_end ( args ) ;
}
__printf ( 4 , 5 ) static void __btf_verifier_log_type ( struct btf_verifier_env * env ,
const struct btf_type * t ,
bool log_details ,
const char * fmt , . . . )
{
struct bpf_verifier_log * log = & env - > log ;
struct btf * btf = env - > btf ;
va_list args ;
if ( ! bpf_verifier_log_needed ( log ) )
return ;
2019-10-15 20:24:57 -07:00
/* btf verifier prints all types it is processing via
* btf_verifier_log_type ( . . . , fmt = NULL ) .
* Skip those prints for in - kernel BTF verification .
*/
if ( log - > level = = BPF_LOG_KERNEL & & ! fmt )
return ;
2018-04-18 15:55:57 -07:00
__btf_verifier_log ( log , " [%u] %s %s%s " ,
env - > log_type_id ,
2022-09-16 13:28:00 -07:00
btf_type_str ( t ) ,
2018-12-13 10:41:46 -08:00
__btf_name_by_offset ( btf , t - > name_off ) ,
2018-04-18 15:55:57 -07:00
log_details ? " " : " " ) ;
if ( log_details )
btf_type_ops ( t ) - > log_details ( env , t ) ;
if ( fmt & & * fmt ) {
__btf_verifier_log ( log , " " ) ;
va_start ( args , fmt ) ;
bpf_verifier_vlog ( log , fmt , args ) ;
va_end ( args ) ;
}
__btf_verifier_log ( log , " \n " ) ;
}
# define btf_verifier_log_type(env, t, ...) \
__btf_verifier_log_type ( ( env ) , ( t ) , true , __VA_ARGS__ )
# define btf_verifier_log_basic(env, t, ...) \
__btf_verifier_log_type ( ( env ) , ( t ) , false , __VA_ARGS__ )
__printf ( 4 , 5 )
static void btf_verifier_log_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const char * fmt , . . . )
{
struct bpf_verifier_log * log = & env - > log ;
struct btf * btf = env - > btf ;
va_list args ;
if ( ! bpf_verifier_log_needed ( log ) )
return ;
2019-10-15 20:24:57 -07:00
if ( log - > level = = BPF_LOG_KERNEL & & ! fmt )
return ;
2018-04-18 15:55:58 -07:00
/* The CHECK_META phase already did a btf dump.
*
* If member is logged again , it must hit an error in
* parsing this member . It is useful to print out which
* struct this member belongs to .
*/
if ( env - > phase ! = CHECK_META )
btf_verifier_log_type ( env , struct_type , NULL ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( struct_type ) )
__btf_verifier_log ( log ,
" \t %s type_id=%u bitfield_size=%u bits_offset=%u " ,
__btf_name_by_offset ( btf , member - > name_off ) ,
member - > type ,
BTF_MEMBER_BITFIELD_SIZE ( member - > offset ) ,
BTF_MEMBER_BIT_OFFSET ( member - > offset ) ) ;
else
__btf_verifier_log ( log , " \t %s type_id=%u bits_offset=%u " ,
__btf_name_by_offset ( btf , member - > name_off ) ,
member - > type , member - > offset ) ;
2018-04-18 15:55:57 -07:00
if ( fmt & & * fmt ) {
__btf_verifier_log ( log , " " ) ;
va_start ( args , fmt ) ;
bpf_verifier_vlog ( log , fmt , args ) ;
va_end ( args ) ;
}
__btf_verifier_log ( log , " \n " ) ;
}
2019-04-09 23:20:09 +02:00
__printf ( 4 , 5 )
static void btf_verifier_log_vsi ( struct btf_verifier_env * env ,
const struct btf_type * datasec_type ,
const struct btf_var_secinfo * vsi ,
const char * fmt , . . . )
{
struct bpf_verifier_log * log = & env - > log ;
va_list args ;
if ( ! bpf_verifier_log_needed ( log ) )
return ;
2019-10-15 20:24:57 -07:00
if ( log - > level = = BPF_LOG_KERNEL & & ! fmt )
return ;
2019-04-09 23:20:09 +02:00
if ( env - > phase ! = CHECK_META )
btf_verifier_log_type ( env , datasec_type , NULL ) ;
__btf_verifier_log ( log , " \t type_id=%u offset=%u size=%u " ,
vsi - > type , vsi - > offset , vsi - > size ) ;
if ( fmt & & * fmt ) {
__btf_verifier_log ( log , " " ) ;
va_start ( args , fmt ) ;
bpf_verifier_vlog ( log , fmt , args ) ;
va_end ( args ) ;
}
__btf_verifier_log ( log , " \n " ) ;
}
2018-05-22 14:57:18 -07:00
static void btf_verifier_log_hdr ( struct btf_verifier_env * env ,
u32 btf_data_size )
2018-04-18 15:55:57 -07:00
{
struct bpf_verifier_log * log = & env - > log ;
const struct btf * btf = env - > btf ;
const struct btf_header * hdr ;
if ( ! bpf_verifier_log_needed ( log ) )
return ;
2019-10-15 20:24:57 -07:00
if ( log - > level = = BPF_LOG_KERNEL )
return ;
2018-05-22 14:57:18 -07:00
hdr = & btf - > hdr ;
2018-04-18 15:55:57 -07:00
__btf_verifier_log ( log , " magic: 0x%x \n " , hdr - > magic ) ;
__btf_verifier_log ( log , " version: %u \n " , hdr - > version ) ;
__btf_verifier_log ( log , " flags: 0x%x \n " , hdr - > flags ) ;
2018-05-22 14:57:18 -07:00
__btf_verifier_log ( log , " hdr_len: %u \n " , hdr - > hdr_len ) ;
2018-04-18 15:55:57 -07:00
__btf_verifier_log ( log , " type_off: %u \n " , hdr - > type_off ) ;
2018-05-22 14:57:18 -07:00
__btf_verifier_log ( log , " type_len: %u \n " , hdr - > type_len ) ;
2018-04-18 15:55:57 -07:00
__btf_verifier_log ( log , " str_off: %u \n " , hdr - > str_off ) ;
__btf_verifier_log ( log , " str_len: %u \n " , hdr - > str_len ) ;
2018-05-22 14:57:18 -07:00
__btf_verifier_log ( log , " btf_total_size: %u \n " , btf_data_size ) ;
2018-04-18 15:55:57 -07:00
}
static int btf_add_type ( struct btf_verifier_env * env , struct btf_type * t )
{
struct btf * btf = env - > btf ;
2020-11-09 17:19:28 -08:00
if ( btf - > types_size = = btf - > nr_types ) {
2018-04-18 15:55:57 -07:00
/* Expand 'types' array */
struct btf_type * * new_types ;
u32 expand_by , new_size ;
2020-11-09 17:19:28 -08:00
if ( btf - > start_id + btf - > types_size = = BTF_MAX_TYPE ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log ( env , " Exceeded max num of types " ) ;
return - E2BIG ;
}
expand_by = max_t ( u32 , btf - > types_size > > 2 , 16 ) ;
2018-05-22 14:57:20 -07:00
new_size = min_t ( u32 , BTF_MAX_TYPE ,
2018-04-18 15:55:57 -07:00
btf - > types_size + expand_by ) ;
treewide: kvzalloc() -> kvcalloc()
The kvzalloc() function has a 2-factor argument form, kvcalloc(). This
patch replaces cases of:
kvzalloc(a * b, gfp)
with:
kvcalloc(a * b, gfp)
as well as handling cases of:
kvzalloc(a * b * c, gfp)
with:
kvzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kvcalloc(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kvzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kvzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kvzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kvzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kvzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kvzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kvzalloc
+ kvcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kvzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kvzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kvzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kvzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kvzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kvzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kvzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kvzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kvzalloc(C1 * C2 * C3, ...)
|
kvzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kvzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kvzalloc(sizeof(THING) * C2, ...)
|
kvzalloc(sizeof(TYPE) * C2, ...)
|
kvzalloc(C1 * C2 * C3, ...)
|
kvzalloc(C1 * C2, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kvzalloc
+ kvcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kvzalloc
+ kvcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kvzalloc
+ kvcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:04:48 -07:00
new_types = kvcalloc ( new_size , sizeof ( * new_types ) ,
2018-04-18 15:55:57 -07:00
GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! new_types )
return - ENOMEM ;
2020-11-09 17:19:28 -08:00
if ( btf - > nr_types = = 0 ) {
if ( ! btf - > base_btf ) {
/* lazily init VOID type */
new_types [ 0 ] = & btf_void ;
btf - > nr_types + + ;
}
} else {
2018-04-18 15:55:57 -07:00
memcpy ( new_types , btf - > types ,
2020-11-09 17:19:28 -08:00
sizeof ( * btf - > types ) * btf - > nr_types ) ;
}
2018-04-18 15:55:57 -07:00
kvfree ( btf - > types ) ;
btf - > types = new_types ;
btf - > types_size = new_size ;
}
2020-11-09 17:19:28 -08:00
btf - > types [ btf - > nr_types + + ] = t ;
2018-04-18 15:55:57 -07:00
return 0 ;
}
2018-05-04 14:49:51 -07:00
static int btf_alloc_id ( struct btf * btf )
{
int id ;
idr_preload ( GFP_KERNEL ) ;
spin_lock_bh ( & btf_idr_lock ) ;
id = idr_alloc_cyclic ( & btf_idr , btf , 1 , INT_MAX , GFP_ATOMIC ) ;
if ( id > 0 )
btf - > id = id ;
spin_unlock_bh ( & btf_idr_lock ) ;
idr_preload_end ( ) ;
if ( WARN_ON_ONCE ( ! id ) )
return - ENOSPC ;
return id > 0 ? 0 : id ;
}
static void btf_free_id ( struct btf * btf )
{
unsigned long flags ;
/*
* In map - in - map , calling map_delete_elem ( ) on outer
* map will call bpf_map_put on the inner map .
* It will then eventually call btf_free_id ( )
* on the inner map . Some of the map_delete_elem ( )
* implementation may have irq disabled , so
* we need to use the _irqsave ( ) version instead
* of the _bh ( ) version .
*/
spin_lock_irqsave ( & btf_idr_lock , flags ) ;
idr_remove ( & btf_idr , btf - > id ) ;
spin_unlock_irqrestore ( & btf_idr_lock , flags ) ;
}
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
static void btf_free_kfunc_set_tab ( struct btf * btf )
{
struct btf_kfunc_set_tab * tab = btf - > kfunc_set_tab ;
2022-07-21 15:42:35 +02:00
int hook ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
if ( ! tab )
return ;
/* For module BTF, we directly assign the sets being registered, so
* there is nothing to free except kfunc_set_tab .
*/
if ( btf_is_module ( btf ) )
goto free_tab ;
2022-07-21 15:42:35 +02:00
for ( hook = 0 ; hook < ARRAY_SIZE ( tab - > sets ) ; hook + + )
kfree ( tab - > sets [ hook ] ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
free_tab :
kfree ( tab ) ;
btf - > kfunc_set_tab = NULL ;
}
2022-04-25 03:18:54 +05:30
static void btf_free_dtor_kfunc_tab ( struct btf * btf )
{
struct btf_id_dtor_kfunc_tab * tab = btf - > dtor_kfunc_tab ;
if ( ! tab )
return ;
kfree ( tab ) ;
btf - > dtor_kfunc_tab = NULL ;
}
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
static void btf_struct_metas_free ( struct btf_struct_metas * tab )
{
int i ;
if ( ! tab )
return ;
for ( i = 0 ; i < tab - > cnt ; i + + ) {
btf_record_free ( tab - > types [ i ] . record ) ;
kfree ( tab - > types [ i ] . field_offs ) ;
}
kfree ( tab ) ;
}
static void btf_free_struct_meta_tab ( struct btf * btf )
{
struct btf_struct_metas * tab = btf - > struct_meta_tab ;
btf_struct_metas_free ( tab ) ;
btf - > struct_meta_tab = NULL ;
}
2018-04-18 15:55:57 -07:00
static void btf_free ( struct btf * btf )
{
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
btf_free_struct_meta_tab ( btf ) ;
2022-04-25 03:18:54 +05:30
btf_free_dtor_kfunc_tab ( btf ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
btf_free_kfunc_set_tab ( btf ) ;
2018-04-18 15:55:57 -07:00
kvfree ( btf - > types ) ;
2018-04-18 15:55:58 -07:00
kvfree ( btf - > resolved_sizes ) ;
kvfree ( btf - > resolved_ids ) ;
2018-04-18 15:55:57 -07:00
kvfree ( btf - > data ) ;
kfree ( btf ) ;
}
2018-05-04 14:49:51 -07:00
static void btf_free_rcu ( struct rcu_head * rcu )
2018-04-18 15:56:01 -07:00
{
2018-05-04 14:49:51 -07:00
struct btf * btf = container_of ( rcu , struct btf , rcu ) ;
btf_free ( btf ) ;
2018-04-18 15:56:01 -07:00
}
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
void btf_get ( struct btf * btf )
{
refcount_inc ( & btf - > refcnt ) ;
}
2018-04-18 15:56:01 -07:00
void btf_put ( struct btf * btf )
{
2018-05-04 14:49:51 -07:00
if ( btf & & refcount_dec_and_test ( & btf - > refcnt ) ) {
btf_free_id ( btf ) ;
call_rcu ( & btf - > rcu , btf_free_rcu ) ;
}
2018-04-18 15:56:01 -07:00
}
2018-04-18 15:55:58 -07:00
static int env_resolve_init ( struct btf_verifier_env * env )
{
struct btf * btf = env - > btf ;
u32 nr_types = btf - > nr_types ;
u32 * resolved_sizes = NULL ;
u32 * resolved_ids = NULL ;
u8 * visit_states = NULL ;
2020-11-09 17:19:28 -08:00
resolved_sizes = kvcalloc ( nr_types , sizeof ( * resolved_sizes ) ,
2018-04-18 15:55:58 -07:00
GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! resolved_sizes )
goto nomem ;
2020-11-09 17:19:28 -08:00
resolved_ids = kvcalloc ( nr_types , sizeof ( * resolved_ids ) ,
2018-04-18 15:55:58 -07:00
GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! resolved_ids )
goto nomem ;
2020-11-09 17:19:28 -08:00
visit_states = kvcalloc ( nr_types , sizeof ( * visit_states ) ,
2018-04-18 15:55:58 -07:00
GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! visit_states )
goto nomem ;
btf - > resolved_sizes = resolved_sizes ;
btf - > resolved_ids = resolved_ids ;
env - > visit_states = visit_states ;
return 0 ;
nomem :
kvfree ( resolved_sizes ) ;
kvfree ( resolved_ids ) ;
kvfree ( visit_states ) ;
return - ENOMEM ;
}
2018-04-18 15:55:57 -07:00
static void btf_verifier_env_free ( struct btf_verifier_env * env )
{
2018-04-18 15:55:58 -07:00
kvfree ( env - > visit_states ) ;
2018-04-18 15:55:57 -07:00
kfree ( env ) ;
}
2018-04-18 15:55:58 -07:00
static bool env_type_is_resolve_sink ( const struct btf_verifier_env * env ,
const struct btf_type * next_type )
{
switch ( env - > resolve_mode ) {
case RESOLVE_TBD :
/* int, enum or void is a sink */
return ! btf_type_needs_resolve ( next_type ) ;
case RESOLVE_PTR :
2018-11-19 15:29:08 -08:00
/* int, enum, void, struct, array, func or func_proto is a sink
* for ptr
*/
2018-04-18 15:55:58 -07:00
return ! btf_type_is_modifier ( next_type ) & &
! btf_type_is_ptr ( next_type ) ;
case RESOLVE_STRUCT_OR_ARRAY :
2018-11-19 15:29:08 -08:00
/* int, enum, void, ptr, func or func_proto is a sink
* for struct and array
*/
2018-04-18 15:55:58 -07:00
return ! btf_type_is_modifier ( next_type ) & &
! btf_type_is_array ( next_type ) & &
! btf_type_is_struct ( next_type ) ;
default :
2018-05-25 23:33:19 +02:00
BUG ( ) ;
2018-04-18 15:55:58 -07:00
}
}
static bool env_type_is_resolved ( const struct btf_verifier_env * env ,
u32 type_id )
{
2020-11-09 17:19:28 -08:00
/* base BTF types should be resolved by now */
if ( type_id < env - > btf - > start_id )
return true ;
return env - > visit_states [ type_id - env - > btf - > start_id ] = = RESOLVED ;
2018-04-18 15:55:58 -07:00
}
static int env_stack_push ( struct btf_verifier_env * env ,
const struct btf_type * t , u32 type_id )
{
2020-11-09 17:19:28 -08:00
const struct btf * btf = env - > btf ;
2018-04-18 15:55:58 -07:00
struct resolve_vertex * v ;
if ( env - > top_stack = = MAX_RESOLVE_DEPTH )
return - E2BIG ;
2020-11-09 17:19:28 -08:00
if ( type_id < btf - > start_id
| | env - > visit_states [ type_id - btf - > start_id ] ! = NOT_VISITED )
2018-04-18 15:55:58 -07:00
return - EEXIST ;
2020-11-09 17:19:28 -08:00
env - > visit_states [ type_id - btf - > start_id ] = VISITED ;
2018-04-18 15:55:58 -07:00
v = & env - > stack [ env - > top_stack + + ] ;
v - > t = t ;
v - > type_id = type_id ;
v - > next_member = 0 ;
if ( env - > resolve_mode = = RESOLVE_TBD ) {
if ( btf_type_is_ptr ( t ) )
env - > resolve_mode = RESOLVE_PTR ;
else if ( btf_type_is_struct ( t ) | | btf_type_is_array ( t ) )
env - > resolve_mode = RESOLVE_STRUCT_OR_ARRAY ;
}
return 0 ;
}
static void env_stack_set_next_member ( struct btf_verifier_env * env ,
u16 next_member )
{
env - > stack [ env - > top_stack - 1 ] . next_member = next_member ;
}
static void env_stack_pop_resolved ( struct btf_verifier_env * env ,
u32 resolved_type_id ,
u32 resolved_size )
{
u32 type_id = env - > stack [ - - ( env - > top_stack ) ] . type_id ;
struct btf * btf = env - > btf ;
2020-11-09 17:19:28 -08:00
type_id - = btf - > start_id ; /* adjust to local type id */
2018-04-18 15:55:58 -07:00
btf - > resolved_sizes [ type_id ] = resolved_size ;
btf - > resolved_ids [ type_id ] = resolved_type_id ;
env - > visit_states [ type_id ] = RESOLVED ;
}
static const struct resolve_vertex * env_stack_peak ( struct btf_verifier_env * env )
{
return env - > top_stack ? & env - > stack [ env - > top_stack - 1 ] : NULL ;
}
2019-11-07 10:09:03 -08:00
/* Resolve the size of a passed-in "type"
*
* type : is an array ( e . g . u32 array [ x ] [ y ] )
* return type : type " u32[x][y] " , i . e . BTF_KIND_ARRAY ,
* * type_size : ( x * y * sizeof ( u32 ) ) . Hence , * type_size always
* corresponds to the return type .
* * elem_type : u32
2020-08-25 21:21:14 +02:00
* * elem_id : id of u32
2019-11-07 10:09:03 -08:00
* * total_nelems : ( x * y ) . Hence , individual elem size is
* ( * type_size / * total_nelems )
2020-08-25 21:21:15 +02:00
* * type_id : id of type if it ' s changed within the function , 0 if not
2019-11-07 10:09:03 -08:00
*
* type : is not an array ( e . g . const struct X )
* return type : type " struct X "
* * type_size : sizeof ( struct X )
* * elem_type : same as return type ( " struct X " )
2020-08-25 21:21:14 +02:00
* * elem_id : 0
2019-11-07 10:09:03 -08:00
* * total_nelems : 1
2020-08-25 21:21:15 +02:00
* * type_id : id of type if it ' s changed within the function , 0 if not
2019-11-07 10:09:03 -08:00
*/
2020-08-25 21:21:13 +02:00
static const struct btf_type *
__btf_resolve_size ( const struct btf * btf , const struct btf_type * type ,
u32 * type_size , const struct btf_type * * elem_type ,
2020-08-25 21:21:15 +02:00
u32 * elem_id , u32 * total_nelems , u32 * type_id )
2019-11-07 10:09:03 -08:00
{
const struct btf_type * array_type = NULL ;
2020-08-25 21:21:14 +02:00
const struct btf_array * array = NULL ;
2020-08-25 21:21:15 +02:00
u32 i , size , nelems = 1 , id = 0 ;
2019-11-07 10:09:03 -08:00
for ( i = 0 ; i < MAX_RESOLVE_DEPTH ; i + + ) {
switch ( BTF_INFO_KIND ( type - > info ) ) {
/* type->size can be used */
case BTF_KIND_INT :
case BTF_KIND_STRUCT :
case BTF_KIND_UNION :
case BTF_KIND_ENUM :
2021-02-26 21:22:52 +01:00
case BTF_KIND_FLOAT :
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
case BTF_KIND_ENUM64 :
2019-11-07 10:09:03 -08:00
size = type - > size ;
goto resolved ;
case BTF_KIND_PTR :
size = sizeof ( void * ) ;
goto resolved ;
/* Modifiers */
case BTF_KIND_TYPEDEF :
case BTF_KIND_VOLATILE :
case BTF_KIND_CONST :
case BTF_KIND_RESTRICT :
2021-11-11 17:26:09 -08:00
case BTF_KIND_TYPE_TAG :
2020-08-25 21:21:15 +02:00
id = type - > type ;
2019-11-07 10:09:03 -08:00
type = btf_type_by_id ( btf , type - > type ) ;
break ;
case BTF_KIND_ARRAY :
if ( ! array_type )
array_type = type ;
array = btf_type_array ( type ) ;
if ( nelems & & array - > nelems > U32_MAX / nelems )
return ERR_PTR ( - EINVAL ) ;
nelems * = array - > nelems ;
type = btf_type_by_id ( btf , array - > type ) ;
break ;
/* type without size */
default :
return ERR_PTR ( - EINVAL ) ;
}
}
return ERR_PTR ( - EINVAL ) ;
resolved :
if ( nelems & & size > U32_MAX / nelems )
return ERR_PTR ( - EINVAL ) ;
* type_size = nelems * size ;
bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS
The patch introduces BPF_MAP_TYPE_STRUCT_OPS. The map value
is a kernel struct with its func ptr implemented in bpf prog.
This new map is the interface to register/unregister/introspect
a bpf implemented kernel struct.
The kernel struct is actually embedded inside another new struct
(or called the "value" struct in the code). For example,
"struct tcp_congestion_ops" is embbeded in:
struct bpf_struct_ops_tcp_congestion_ops {
refcount_t refcnt;
enum bpf_struct_ops_state state;
struct tcp_congestion_ops data; /* <-- kernel subsystem struct here */
}
The map value is "struct bpf_struct_ops_tcp_congestion_ops".
The "bpftool map dump" will then be able to show the
state ("inuse"/"tobefree") and the number of subsystem's refcnt (e.g.
number of tcp_sock in the tcp_congestion_ops case). This "value" struct
is created automatically by a macro. Having a separate "value" struct
will also make extending "struct bpf_struct_ops_XYZ" easier (e.g. adding
"void (*init)(void)" to "struct bpf_struct_ops_XYZ" to do some
initialization works before registering the struct_ops to the kernel
subsystem). The libbpf will take care of finding and populating the
"struct bpf_struct_ops_XYZ" from "struct XYZ".
Register a struct_ops to a kernel subsystem:
1. Load all needed BPF_PROG_TYPE_STRUCT_OPS prog(s)
2. Create a BPF_MAP_TYPE_STRUCT_OPS with attr->btf_vmlinux_value_type_id
set to the btf id "struct bpf_struct_ops_tcp_congestion_ops" of the
running kernel.
Instead of reusing the attr->btf_value_type_id,
btf_vmlinux_value_type_id s added such that attr->btf_fd can still be
used as the "user" btf which could store other useful sysadmin/debug
info that may be introduced in the furture,
e.g. creation-date/compiler-details/map-creator...etc.
3. Create a "struct bpf_struct_ops_tcp_congestion_ops" object as described
in the running kernel btf. Populate the value of this object.
The function ptr should be populated with the prog fds.
4. Call BPF_MAP_UPDATE with the object created in (3) as
the map value. The key is always "0".
During BPF_MAP_UPDATE, the code that saves the kernel-func-ptr's
args as an array of u64 is generated. BPF_MAP_UPDATE also allows
the specific struct_ops to do some final checks in "st_ops->init_member()"
(e.g. ensure all mandatory func ptrs are implemented).
If everything looks good, it will register this kernel struct
to the kernel subsystem. The map will not allow further update
from this point.
Unregister a struct_ops from the kernel subsystem:
BPF_MAP_DELETE with key "0".
Introspect a struct_ops:
BPF_MAP_LOOKUP_ELEM with key "0". The map value returned will
have the prog _id_ populated as the func ptr.
The map value state (enum bpf_struct_ops_state) will transit from:
INIT (map created) =>
INUSE (map updated, i.e. reg) =>
TOBEFREE (map value deleted, i.e. unreg)
The kernel subsystem needs to call bpf_struct_ops_get() and
bpf_struct_ops_put() to manage the "refcnt" in the
"struct bpf_struct_ops_XYZ". This patch uses a separate refcnt
for the purose of tracking the subsystem usage. Another approach
is to reuse the map->refcnt and then "show" (i.e. during map_lookup)
the subsystem's usage by doing map->refcnt - map->usercnt to filter out
the map-fd/pinned-map usage. However, that will also tie down the
future semantics of map->refcnt and map->usercnt.
The very first subsystem's refcnt (during reg()) holds one
count to map->refcnt. When the very last subsystem's refcnt
is gone, it will also release the map->refcnt. All bpf_prog will be
freed when the map->refcnt reaches 0 (i.e. during map_free()).
Here is how the bpftool map command will look like:
[root@arch-fb-vm1 bpf]# bpftool map show
6: struct_ops name dctcp flags 0x0
key 4B value 256B max_entries 1 memlock 4096B
btf_id 6
[root@arch-fb-vm1 bpf]# bpftool map dump id 6
[{
"value": {
"refcnt": {
"refs": {
"counter": 1
}
},
"state": 1,
"data": {
"list": {
"next": 0,
"prev": 0
},
"key": 0,
"flags": 2,
"init": 24,
"release": 0,
"ssthresh": 25,
"cong_avoid": 30,
"set_state": 27,
"cwnd_event": 28,
"in_ack_event": 26,
"undo_cwnd": 29,
"pkts_acked": 0,
"min_tso_segs": 0,
"sndbuf_expand": 0,
"cong_control": 0,
"get_info": 0,
"name": [98,112,102,95,100,99,116,99,112,0,0,0,0,0,0,0
],
"owner": 0
}
}
}
]
Misc Notes:
* bpf_struct_ops_map_sys_lookup_elem() is added for syscall lookup.
It does an inplace update on "*value" instead returning a pointer
to syscall.c. Otherwise, it needs a separate copy of "zero" value
for the BPF_STRUCT_OPS_STATE_INIT to avoid races.
* The bpf_struct_ops_map_delete_elem() is also called without
preempt_disable() from map_delete_elem(). It is because
the "->unreg()" may requires sleepable context, e.g.
the "tcp_unregister_congestion_control()".
* "const" is added to some of the existing "struct btf_func_model *"
function arg to avoid a compiler warning caused by this patch.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200109003505.3855919-1-kafai@fb.com
2020-01-08 16:35:05 -08:00
if ( total_nelems )
* total_nelems = nelems ;
if ( elem_type )
* elem_type = type ;
2020-08-25 21:21:14 +02:00
if ( elem_id )
* elem_id = array ? array - > type : 0 ;
2020-08-25 21:21:15 +02:00
if ( type_id & & id )
* type_id = id ;
2019-11-07 10:09:03 -08:00
return array_type ? : type ;
}
2020-08-25 21:21:13 +02:00
const struct btf_type *
btf_resolve_size ( const struct btf * btf , const struct btf_type * type ,
u32 * type_size )
{
2020-08-25 21:21:15 +02:00
return __btf_resolve_size ( btf , type , type_size , NULL , NULL , NULL , NULL ) ;
2020-08-25 21:21:13 +02:00
}
2020-11-09 17:19:28 -08:00
static u32 btf_resolved_type_id ( const struct btf * btf , u32 type_id )
{
while ( type_id < btf - > start_id )
btf = btf - > base_btf ;
return btf - > resolved_ids [ type_id - btf - > start_id ] ;
}
2018-04-18 15:55:58 -07:00
/* The input param "type_id" must point to a needs_resolve type */
static const struct btf_type * btf_type_id_resolve ( const struct btf * btf ,
u32 * type_id )
{
2020-11-09 17:19:28 -08:00
* type_id = btf_resolved_type_id ( btf , * type_id ) ;
2018-04-18 15:55:58 -07:00
return btf_type_by_id ( btf , * type_id ) ;
}
2020-11-09 17:19:28 -08:00
static u32 btf_resolved_type_size ( const struct btf * btf , u32 type_id )
{
while ( type_id < btf - > start_id )
btf = btf - > base_btf ;
return btf - > resolved_sizes [ type_id - btf - > start_id ] ;
}
2018-04-18 15:55:58 -07:00
const struct btf_type * btf_type_id_size ( const struct btf * btf ,
u32 * type_id , u32 * ret_size )
{
const struct btf_type * size_type ;
u32 size_type_id = * type_id ;
u32 size = 0 ;
size_type = btf_type_by_id ( btf , size_type_id ) ;
2018-11-19 15:29:06 -08:00
if ( btf_type_nosize_or_null ( size_type ) )
2018-04-18 15:55:58 -07:00
return NULL ;
if ( btf_type_has_size ( size_type ) ) {
size = size_type - > size ;
} else if ( btf_type_is_array ( size_type ) ) {
2020-11-09 17:19:28 -08:00
size = btf_resolved_type_size ( btf , size_type_id ) ;
2018-04-18 15:55:58 -07:00
} else if ( btf_type_is_ptr ( size_type ) ) {
size = sizeof ( void * ) ;
} else {
2019-04-09 23:20:09 +02:00
if ( WARN_ON_ONCE ( ! btf_type_is_modifier ( size_type ) & &
! btf_type_is_var ( size_type ) ) )
2018-04-18 15:55:58 -07:00
return NULL ;
2020-11-09 17:19:28 -08:00
size_type_id = btf_resolved_type_id ( btf , size_type_id ) ;
2018-04-18 15:55:58 -07:00
size_type = btf_type_by_id ( btf , size_type_id ) ;
2018-11-19 15:29:06 -08:00
if ( btf_type_nosize_or_null ( size_type ) )
2018-04-18 15:55:58 -07:00
return NULL ;
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 10:25:55 -07:00
else if ( btf_type_has_size ( size_type ) )
size = size_type - > size ;
else if ( btf_type_is_array ( size_type ) )
2020-11-09 17:19:28 -08:00
size = btf_resolved_type_size ( btf , size_type_id ) ;
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 10:25:55 -07:00
else if ( btf_type_is_ptr ( size_type ) )
size = sizeof ( void * ) ;
else
return NULL ;
2018-04-18 15:55:58 -07:00
}
* type_id = size_type_id ;
if ( ret_size )
* ret_size = size ;
return size_type ;
}
2018-04-18 15:55:59 -07:00
static int btf_df_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
btf_verifier_log_basic ( env , struct_type ,
" Unsupported check_member " ) ;
return - EINVAL ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
static int btf_df_check_kflag_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
btf_verifier_log_basic ( env , struct_type ,
" Unsupported check_kflag_member " ) ;
return - EINVAL ;
}
2021-02-26 21:22:52 +01:00
/* Used for ptr, array struct/union and float type members.
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
* int , enum and modifier types have their specific callback functions .
*/
static int btf_generic_check_kflag_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
if ( BTF_MEMBER_BITFIELD_SIZE ( member - > offset ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Invalid member bitfield_size " ) ;
return - EINVAL ;
}
/* bitfield size is 0, so member->offset represents bit offset only.
* It is safe to call non kflag check_member variants .
*/
return btf_type_ops ( member_type ) - > check_member ( env , struct_type ,
member ,
member_type ) ;
}
2018-04-18 15:55:58 -07:00
static int btf_df_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
btf_verifier_log_basic ( env , v - > t , " Unsupported resolve " ) ;
return - EINVAL ;
}
2020-09-28 12:31:04 +01:00
static void btf_df_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offsets ,
struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
2020-09-28 12:31:04 +01:00
btf_show ( show , " <unsupported kind:%u> " , BTF_INFO_KIND ( t - > info ) ) ;
2018-04-18 15:56:00 -07:00
}
2018-04-18 15:55:59 -07:00
static int btf_int_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u32 int_data = btf_type_int ( member_type ) ;
u32 struct_bits_off = member - > offset ;
u32 struct_size = struct_type - > size ;
u32 nr_copy_bits ;
u32 bytes_offset ;
if ( U32_MAX - struct_bits_off < BTF_INT_OFFSET ( int_data ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" bits_offset exceeds U32_MAX " ) ;
return - EINVAL ;
}
struct_bits_off + = BTF_INT_OFFSET ( int_data ) ;
bytes_offset = BITS_ROUNDDOWN_BYTES ( struct_bits_off ) ;
nr_copy_bits = BTF_INT_BITS ( int_data ) +
BITS_PER_BYTE_MASKED ( struct_bits_off ) ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
if ( nr_copy_bits > BITS_PER_U128 ) {
2018-04-18 15:55:59 -07:00
btf_verifier_log_member ( env , struct_type , member ,
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
" nr_copy_bits exceeds 128 " ) ;
2018-04-18 15:55:59 -07:00
return - EINVAL ;
}
if ( struct_size < bytes_offset | |
struct_size - bytes_offset < BITS_ROUNDUP_BYTES ( nr_copy_bits ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
static int btf_int_check_kflag_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u32 struct_bits_off , nr_bits , nr_int_data_bits , bytes_offset ;
u32 int_data = btf_type_int ( member_type ) ;
u32 struct_size = struct_type - > size ;
u32 nr_copy_bits ;
/* a regular int type is required for the kflag int member */
if ( ! btf_type_int_is_regular ( member_type ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Invalid member base type " ) ;
return - EINVAL ;
}
/* check sanity of bitfield size */
nr_bits = BTF_MEMBER_BITFIELD_SIZE ( member - > offset ) ;
struct_bits_off = BTF_MEMBER_BIT_OFFSET ( member - > offset ) ;
nr_int_data_bits = BTF_INT_BITS ( int_data ) ;
if ( ! nr_bits ) {
/* Not a bitfield member, member offset must be at byte
* boundary .
*/
if ( BITS_PER_BYTE_MASKED ( struct_bits_off ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Invalid member offset " ) ;
return - EINVAL ;
}
nr_bits = nr_int_data_bits ;
} else if ( nr_bits > nr_int_data_bits ) {
btf_verifier_log_member ( env , struct_type , member ,
" Invalid member bitfield_size " ) ;
return - EINVAL ;
}
bytes_offset = BITS_ROUNDDOWN_BYTES ( struct_bits_off ) ;
nr_copy_bits = nr_bits + BITS_PER_BYTE_MASKED ( struct_bits_off ) ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
if ( nr_copy_bits > BITS_PER_U128 ) {
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
btf_verifier_log_member ( env , struct_type , member ,
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
" nr_copy_bits exceeds 128 " ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
return - EINVAL ;
}
if ( struct_size < bytes_offset | |
struct_size - bytes_offset < BITS_ROUNDUP_BYTES ( nr_copy_bits ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
2018-04-18 15:55:57 -07:00
static s32 btf_int_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
u32 int_data , nr_bits , meta_needed = sizeof ( int_data ) ;
u16 encoding ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
if ( btf_type_vlen ( t ) ) {
btf_verifier_log_type ( env , t , " vlen != 0 " ) ;
return - EINVAL ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
2018-04-18 15:55:57 -07:00
int_data = btf_type_int ( t ) ;
2018-05-22 14:57:20 -07:00
if ( int_data & ~ BTF_INT_MASK ) {
btf_verifier_log_basic ( env , t , " Invalid int_data:%x " ,
int_data ) ;
return - EINVAL ;
}
2018-04-18 15:55:57 -07:00
nr_bits = BTF_INT_BITS ( int_data ) + BTF_INT_OFFSET ( int_data ) ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
if ( nr_bits > BITS_PER_U128 ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log_type ( env , t , " nr_bits exceeds %zu " ,
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
BITS_PER_U128 ) ;
2018-04-18 15:55:57 -07:00
return - EINVAL ;
}
if ( BITS_ROUNDUP_BYTES ( nr_bits ) > t - > size ) {
btf_verifier_log_type ( env , t , " nr_bits exceeds type_size " ) ;
return - EINVAL ;
}
2018-05-22 14:57:20 -07:00
/*
* Only one of the encoding bits is allowed and it
* should be sufficient for the pretty print purpose ( i . e . decoding ) .
* Multiple bits can be allowed later if it is found
* to be insufficient .
*/
2018-04-18 15:55:57 -07:00
encoding = BTF_INT_ENCODING ( int_data ) ;
if ( encoding & &
encoding ! = BTF_INT_SIGNED & &
encoding ! = BTF_INT_CHAR & &
2018-05-22 14:57:20 -07:00
encoding ! = BTF_INT_BOOL ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log_type ( env , t , " Unsupported encoding " ) ;
return - ENOTSUPP ;
}
btf_verifier_log_type ( env , t , NULL ) ;
return meta_needed ;
}
static void btf_int_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
int int_data = btf_type_int ( t ) ;
btf_verifier_log ( env ,
" size=%u bits_offset=%u nr_bits=%u encoding=%s " ,
t - > size , BTF_INT_OFFSET ( int_data ) ,
BTF_INT_BITS ( int_data ) ,
btf_int_encoding_str ( BTF_INT_ENCODING ( int_data ) ) ) ;
}
2020-09-28 12:31:04 +01:00
static void btf_int128_print ( struct btf_show * show , void * data )
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
{
/* data points to a __int128 number.
* Suppose
* int128_num = * ( __int128 * ) data ;
* The below formulas shows what upper_num and lower_num represents :
* upper_num = int128_num > > 64 ;
* lower_num = int128_num & 0xffffffffFFFFFFFFULL ;
*/
u64 upper_num , lower_num ;
# ifdef __BIG_ENDIAN_BITFIELD
upper_num = * ( u64 * ) data ;
lower_num = * ( u64 * ) ( data + 8 ) ;
# else
upper_num = * ( u64 * ) ( data + 8 ) ;
lower_num = * ( u64 * ) data ;
# endif
if ( upper_num = = 0 )
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " 0x%llx " , lower_num ) ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
else
2020-09-28 12:31:04 +01:00
btf_show_type_values ( show , " 0x%llx%016llx " , upper_num ,
lower_num ) ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
}
static void btf_int128_shift ( u64 * print_num , u16 left_shift_bits ,
u16 right_shift_bits )
{
u64 upper_num , lower_num ;
# ifdef __BIG_ENDIAN_BITFIELD
upper_num = print_num [ 0 ] ;
lower_num = print_num [ 1 ] ;
# else
upper_num = print_num [ 1 ] ;
lower_num = print_num [ 0 ] ;
# endif
/* shake out un-needed bits by shift/or operations */
if ( left_shift_bits > = 64 ) {
upper_num = lower_num < < ( left_shift_bits - 64 ) ;
lower_num = 0 ;
} else {
upper_num = ( upper_num < < left_shift_bits ) |
( lower_num > > ( 64 - left_shift_bits ) ) ;
lower_num = lower_num < < left_shift_bits ;
}
if ( right_shift_bits > = 64 ) {
lower_num = upper_num > > ( right_shift_bits - 64 ) ;
upper_num = 0 ;
} else {
lower_num = ( lower_num > > right_shift_bits ) |
( upper_num < < ( 64 - right_shift_bits ) ) ;
upper_num = upper_num > > right_shift_bits ;
}
# ifdef __BIG_ENDIAN_BITFIELD
print_num [ 0 ] = upper_num ;
print_num [ 1 ] = lower_num ;
# else
print_num [ 0 ] = lower_num ;
print_num [ 1 ] = upper_num ;
# endif
}
2020-09-28 12:31:04 +01:00
static void btf_bitfield_show ( void * data , u8 bits_offset ,
u8 nr_bits , struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
2018-07-10 14:33:07 -07:00
u16 left_shift_bits , right_shift_bits ;
2018-07-19 22:14:31 -07:00
u8 nr_copy_bytes ;
u8 nr_copy_bits ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
u64 print_num [ 2 ] = { } ;
2018-04-18 15:56:00 -07:00
nr_copy_bits = nr_bits + bits_offset ;
nr_copy_bytes = BITS_ROUNDUP_BYTES ( nr_copy_bits ) ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
memcpy ( print_num , data , nr_copy_bytes ) ;
2018-04-18 15:56:00 -07:00
2018-07-10 14:33:07 -07:00
# ifdef __BIG_ENDIAN_BITFIELD
left_shift_bits = bits_offset ;
# else
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
left_shift_bits = BITS_PER_U128 - nr_copy_bits ;
2018-07-10 14:33:07 -07:00
# endif
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
right_shift_bits = BITS_PER_U128 - nr_bits ;
2018-04-18 15:56:00 -07:00
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
btf_int128_shift ( print_num , left_shift_bits , right_shift_bits ) ;
2020-09-28 12:31:04 +01:00
btf_int128_print ( show , print_num ) ;
2018-04-18 15:56:00 -07:00
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
2020-09-28 12:31:04 +01:00
static void btf_int_bits_show ( const struct btf * btf ,
const struct btf_type * t ,
void * data , u8 bits_offset ,
struct btf_show * show )
2018-12-15 22:13:50 -08:00
{
u32 int_data = btf_type_int ( t ) ;
u8 nr_bits = BTF_INT_BITS ( int_data ) ;
u8 total_bits_offset ;
/*
* bits_offset is at most 7.
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
* BTF_INT_OFFSET ( ) cannot exceed 128 bits .
2018-12-15 22:13:50 -08:00
*/
total_bits_offset = bits_offset + BTF_INT_OFFSET ( int_data ) ;
2019-01-10 11:14:00 -08:00
data + = BITS_ROUNDDOWN_BYTES ( total_bits_offset ) ;
bits_offset = BITS_PER_BYTE_MASKED ( total_bits_offset ) ;
2020-09-28 12:31:04 +01:00
btf_bitfield_show ( data , bits_offset , nr_bits , show ) ;
2018-12-15 22:13:50 -08:00
}
2020-09-28 12:31:04 +01:00
static void btf_int_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
u32 int_data = btf_type_int ( t ) ;
u8 encoding = BTF_INT_ENCODING ( int_data ) ;
bool sign = encoding & BTF_INT_SIGNED ;
2018-07-19 22:14:31 -07:00
u8 nr_bits = BTF_INT_BITS ( int_data ) ;
2020-09-28 12:31:04 +01:00
void * safe_data ;
safe_data = btf_show_start_type ( show , t , type_id , data ) ;
if ( ! safe_data )
return ;
2018-04-18 15:56:00 -07:00
if ( bits_offset | | BTF_INT_OFFSET ( int_data ) | |
BITS_PER_BYTE_MASKED ( nr_bits ) ) {
2020-09-28 12:31:04 +01:00
btf_int_bits_show ( btf , t , safe_data , bits_offset , show ) ;
goto out ;
2018-04-18 15:56:00 -07:00
}
switch ( nr_bits ) {
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
case 128 :
2020-09-28 12:31:04 +01:00
btf_int128_print ( show , safe_data ) ;
bpf: btf: support 128 bit integer type
Currently, btf only supports up to 64-bit integer.
On the other hand, 128bit support for gcc and clang
has existed for a long time. For example, both gcc 4.8
and llvm 3.7 supports types "__int128" and
"unsigned __int128" for virtually all 64bit architectures
including bpf.
The requirement for __int128 support comes from two areas:
. bpf program may use __int128. For example, some bcc tools
(https://github.com/iovisor/bcc/tree/master/tools),
mostly tcp v6 related, tcpstates.py, tcpaccept.py, etc.,
are using __int128 to represent the ipv6 addresses.
. linux itself is using __int128 types. Hence supporting
__int128 type in BTF is required for vmlinux BTF,
which will be used by "compile once and run everywhere"
and other projects.
For 128bit integer, instead of base-10, hex numbers are pretty
printed out as large decimal number is hard to decipher, e.g.,
for ipv6 addresses.
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-01-15 17:07:47 -08:00
break ;
2018-04-18 15:56:00 -07:00
case 64 :
if ( sign )
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %lld " , * ( s64 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
else
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %llu " , * ( u64 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
break ;
case 32 :
if ( sign )
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %d " , * ( s32 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
else
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %u " , * ( u32 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
break ;
case 16 :
if ( sign )
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %d " , * ( s16 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
else
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %u " , * ( u16 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
break ;
case 8 :
2020-09-28 12:31:04 +01:00
if ( show - > state . array_encoding = = BTF_INT_CHAR ) {
/* check for null terminator */
if ( show - > state . array_terminated )
break ;
if ( * ( char * ) data = = ' \0 ' ) {
show - > state . array_terminated = 1 ;
break ;
}
if ( isprint ( * ( char * ) data ) ) {
btf_show_type_value ( show , " '%c' " ,
* ( char * ) safe_data ) ;
break ;
}
}
2018-04-18 15:56:00 -07:00
if ( sign )
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %d " , * ( s8 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
else
2020-09-28 12:31:04 +01:00
btf_show_type_value ( show , " %u " , * ( u8 * ) safe_data ) ;
2018-04-18 15:56:00 -07:00
break ;
default :
2020-09-28 12:31:04 +01:00
btf_int_bits_show ( btf , t , safe_data , bits_offset , show ) ;
break ;
2018-04-18 15:56:00 -07:00
}
2020-09-28 12:31:04 +01:00
out :
btf_show_end_type ( show ) ;
2018-04-18 15:56:00 -07:00
}
2018-04-18 15:55:57 -07:00
static const struct btf_kind_operations int_ops = {
. check_meta = btf_int_check_meta ,
2018-04-18 15:55:58 -07:00
. resolve = btf_df_resolve ,
2018-04-18 15:55:59 -07:00
. check_member = btf_int_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_int_check_kflag_member ,
2018-04-18 15:55:57 -07:00
. log_details = btf_int_log ,
2020-09-28 12:31:04 +01:00
. show = btf_int_show ,
2018-04-18 15:55:57 -07:00
} ;
2018-04-18 15:55:59 -07:00
static int btf_modifier_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
const struct btf_type * resolved_type ;
u32 resolved_type_id = member - > type ;
struct btf_member resolved_member ;
struct btf * btf = env - > btf ;
resolved_type = btf_type_id_size ( btf , & resolved_type_id , NULL ) ;
if ( ! resolved_type ) {
btf_verifier_log_member ( env , struct_type , member ,
" Invalid member " ) ;
return - EINVAL ;
}
resolved_member = * member ;
resolved_member . type = resolved_type_id ;
return btf_type_ops ( resolved_type ) - > check_member ( env , struct_type ,
& resolved_member ,
resolved_type ) ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
static int btf_modifier_check_kflag_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
const struct btf_type * resolved_type ;
u32 resolved_type_id = member - > type ;
struct btf_member resolved_member ;
struct btf * btf = env - > btf ;
resolved_type = btf_type_id_size ( btf , & resolved_type_id , NULL ) ;
if ( ! resolved_type ) {
btf_verifier_log_member ( env , struct_type , member ,
" Invalid member " ) ;
return - EINVAL ;
}
resolved_member = * member ;
resolved_member . type = resolved_type_id ;
return btf_type_ops ( resolved_type ) - > check_kflag_member ( env , struct_type ,
& resolved_member ,
resolved_type ) ;
}
2018-04-18 15:55:59 -07:00
static int btf_ptr_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u32 struct_size , struct_bits_off , bytes_offset ;
struct_size = struct_type - > size ;
struct_bits_off = member - > offset ;
bytes_offset = BITS_ROUNDDOWN_BYTES ( struct_bits_off ) ;
if ( BITS_PER_BYTE_MASKED ( struct_bits_off ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member is not byte aligned " ) ;
return - EINVAL ;
}
if ( struct_size - bytes_offset < sizeof ( void * ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
2018-04-18 15:55:57 -07:00
static int btf_ref_type_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
2021-11-11 17:26:09 -08:00
const char * value ;
2018-04-18 15:55:57 -07:00
if ( btf_type_vlen ( t ) ) {
btf_verifier_log_type ( env , t , " vlen != 0 " ) ;
return - EINVAL ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
2018-05-22 14:57:20 -07:00
if ( ! BTF_TYPE_ID_VALID ( t - > type ) ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log_type ( env , t , " Invalid type_id " ) ;
return - EINVAL ;
}
2021-11-11 17:26:09 -08:00
/* typedef/type_tag type must have a valid name, and other ref types,
2018-11-27 13:23:28 -08:00
* volatile , const , restrict , should have a null name .
*/
if ( BTF_INFO_KIND ( t - > info ) = = BTF_KIND_TYPEDEF ) {
if ( ! t - > name_off | |
! btf_name_valid_identifier ( env - > btf , t - > name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2021-11-11 17:26:09 -08:00
} else if ( BTF_INFO_KIND ( t - > info ) = = BTF_KIND_TYPE_TAG ) {
value = btf_name_by_offset ( env - > btf , t - > name_off ) ;
if ( ! value | | ! value [ 0 ] ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2018-11-27 13:23:28 -08:00
} else {
if ( t - > name_off ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
}
2018-04-18 15:55:57 -07:00
btf_verifier_log_type ( env , t , NULL ) ;
return 0 ;
}
2018-04-18 15:55:58 -07:00
static int btf_modifier_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
const struct btf_type * t = v - > t ;
const struct btf_type * next_type ;
u32 next_type_id = t - > type ;
struct btf * btf = env - > btf ;
next_type = btf_type_by_id ( btf , next_type_id ) ;
2019-04-09 23:20:09 +02:00
if ( ! next_type | | btf_type_is_resolve_source_only ( next_type ) ) {
2018-04-18 15:55:58 -07:00
btf_verifier_log_type ( env , v - > t , " Invalid type_id " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , next_type ) & &
! env_type_is_resolved ( env , next_type_id ) )
return env_stack_push ( env , next_type , next_type_id ) ;
/* Figure out the resolved next_type_id with size.
* They will be stored in the current modifier ' s
* resolved_ids and resolved_sizes such that it can
* save us a few type - following when we use it later ( e . g . in
* pretty print ) .
*/
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 10:25:55 -07:00
if ( ! btf_type_id_size ( btf , & next_type_id , NULL ) ) {
2018-11-19 15:29:08 -08:00
if ( env_type_is_resolved ( env , next_type_id ) )
next_type = btf_type_id_resolve ( btf , & next_type_id ) ;
/* "typedef void new_void", "const void"...etc */
if ( ! btf_type_is_void ( next_type ) & &
2019-01-29 16:38:16 -08:00
! btf_type_is_fwd ( next_type ) & &
! btf_type_is_func_proto ( next_type ) ) {
2018-11-19 15:29:08 -08:00
btf_verifier_log_type ( env , v - > t , " Invalid type_id " ) ;
return - EINVAL ;
}
2018-04-18 15:55:58 -07:00
}
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 10:25:55 -07:00
env_stack_pop_resolved ( env , next_type_id , 0 ) ;
2018-04-18 15:55:58 -07:00
return 0 ;
}
2019-04-09 23:20:09 +02:00
static int btf_var_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
const struct btf_type * next_type ;
const struct btf_type * t = v - > t ;
u32 next_type_id = t - > type ;
struct btf * btf = env - > btf ;
next_type = btf_type_by_id ( btf , next_type_id ) ;
if ( ! next_type | | btf_type_is_resolve_source_only ( next_type ) ) {
btf_verifier_log_type ( env , v - > t , " Invalid type_id " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , next_type ) & &
! env_type_is_resolved ( env , next_type_id ) )
return env_stack_push ( env , next_type , next_type_id ) ;
if ( btf_type_is_modifier ( next_type ) ) {
const struct btf_type * resolved_type ;
u32 resolved_type_id ;
resolved_type_id = next_type_id ;
resolved_type = btf_type_id_resolve ( btf , & resolved_type_id ) ;
if ( btf_type_is_ptr ( resolved_type ) & &
! env_type_is_resolve_sink ( env , resolved_type ) & &
! env_type_is_resolved ( env , resolved_type_id ) )
return env_stack_push ( env , resolved_type ,
resolved_type_id ) ;
}
/* We must resolve to something concrete at this point, no
* forward types or similar that would resolve to size of
* zero is allowed .
*/
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 10:25:55 -07:00
if ( ! btf_type_id_size ( btf , & next_type_id , NULL ) ) {
2019-04-09 23:20:09 +02:00
btf_verifier_log_type ( env , v - > t , " Invalid type_id " ) ;
return - EINVAL ;
}
bpf: fix BTF verifier size resolution logic
BTF verifier has a size resolution bug which in some circumstances leads to
invalid size resolution for, e.g., TYPEDEF modifier. This happens if we have
[1] PTR -> [2] TYPEDEF -> [3] ARRAY, in which case due to being in pointer
context ARRAY size won't be resolved (because for pointer it doesn't matter, so
it's a sink in pointer context), but it will be permanently remembered as zero
for TYPEDEF and TYPEDEF will be marked as RESOLVED. Eventually ARRAY size will
be resolved correctly, but TYPEDEF resolved_size won't be updated anymore.
This, subsequently, will lead to erroneous map creation failure, if that
TYPEDEF is specified as either key or value, as key_size/value_size won't
correspond to resolved size of TYPEDEF (kernel will believe it's zero).
Note, that if BTF was ordered as [1] ARRAY <- [2] TYPEDEF <- [3] PTR, this
won't be a problem, as by the time we get to TYPEDEF, ARRAY's size is already
calculated and stored.
This bug manifests itself in rejecting BTF-defined maps that use array
typedef as a value type:
typedef int array_t[16];
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__type(value, array_t); /* i.e., array_t *value; */
} test_map SEC(".maps");
The fix consists on not relying on modifier's resolved_size and instead using
modifier's resolved_id (type ID for "concrete" type to which modifier
eventually resolves) and doing size determination for that resolved type. This
allow to preserve existing "early DFS termination" logic for PTR or
STRUCT_OR_ARRAY contexts, but still do correct size determination for modifier
types.
Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
Cc: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2019-07-12 10:25:55 -07:00
env_stack_pop_resolved ( env , next_type_id , 0 ) ;
2019-04-09 23:20:09 +02:00
return 0 ;
}
2018-04-18 15:55:58 -07:00
static int btf_ptr_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
const struct btf_type * next_type ;
const struct btf_type * t = v - > t ;
u32 next_type_id = t - > type ;
struct btf * btf = env - > btf ;
next_type = btf_type_by_id ( btf , next_type_id ) ;
2019-04-09 23:20:09 +02:00
if ( ! next_type | | btf_type_is_resolve_source_only ( next_type ) ) {
2018-04-18 15:55:58 -07:00
btf_verifier_log_type ( env , v - > t , " Invalid type_id " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , next_type ) & &
! env_type_is_resolved ( env , next_type_id ) )
return env_stack_push ( env , next_type , next_type_id ) ;
/* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY,
* the modifier may have stopped resolving when it was resolved
* to a ptr ( last - resolved - ptr ) .
*
* We now need to continue from the last - resolved - ptr to
* ensure the last - resolved - ptr will not referring back to
2022-02-20 10:40:55 -08:00
* the current ptr ( t ) .
2018-04-18 15:55:58 -07:00
*/
if ( btf_type_is_modifier ( next_type ) ) {
const struct btf_type * resolved_type ;
u32 resolved_type_id ;
resolved_type_id = next_type_id ;
resolved_type = btf_type_id_resolve ( btf , & resolved_type_id ) ;
if ( btf_type_is_ptr ( resolved_type ) & &
! env_type_is_resolve_sink ( env , resolved_type ) & &
! env_type_is_resolved ( env , resolved_type_id ) )
return env_stack_push ( env , resolved_type ,
resolved_type_id ) ;
}
2018-11-19 15:29:08 -08:00
if ( ! btf_type_id_size ( btf , & next_type_id , NULL ) ) {
if ( env_type_is_resolved ( env , next_type_id ) )
next_type = btf_type_id_resolve ( btf , & next_type_id ) ;
if ( ! btf_type_is_void ( next_type ) & &
! btf_type_is_fwd ( next_type ) & &
! btf_type_is_func_proto ( next_type ) ) {
btf_verifier_log_type ( env , v - > t , " Invalid type_id " ) ;
return - EINVAL ;
}
2018-04-18 15:55:58 -07:00
}
env_stack_pop_resolved ( env , next_type_id , 0 ) ;
return 0 ;
}
2020-09-28 12:31:04 +01:00
static void btf_modifier_show ( const struct btf * btf ,
const struct btf_type * t ,
u32 type_id , void * data ,
u8 bits_offset , struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
bpf: Introduce BPF_MAP_TYPE_STRUCT_OPS
The patch introduces BPF_MAP_TYPE_STRUCT_OPS. The map value
is a kernel struct with its func ptr implemented in bpf prog.
This new map is the interface to register/unregister/introspect
a bpf implemented kernel struct.
The kernel struct is actually embedded inside another new struct
(or called the "value" struct in the code). For example,
"struct tcp_congestion_ops" is embbeded in:
struct bpf_struct_ops_tcp_congestion_ops {
refcount_t refcnt;
enum bpf_struct_ops_state state;
struct tcp_congestion_ops data; /* <-- kernel subsystem struct here */
}
The map value is "struct bpf_struct_ops_tcp_congestion_ops".
The "bpftool map dump" will then be able to show the
state ("inuse"/"tobefree") and the number of subsystem's refcnt (e.g.
number of tcp_sock in the tcp_congestion_ops case). This "value" struct
is created automatically by a macro. Having a separate "value" struct
will also make extending "struct bpf_struct_ops_XYZ" easier (e.g. adding
"void (*init)(void)" to "struct bpf_struct_ops_XYZ" to do some
initialization works before registering the struct_ops to the kernel
subsystem). The libbpf will take care of finding and populating the
"struct bpf_struct_ops_XYZ" from "struct XYZ".
Register a struct_ops to a kernel subsystem:
1. Load all needed BPF_PROG_TYPE_STRUCT_OPS prog(s)
2. Create a BPF_MAP_TYPE_STRUCT_OPS with attr->btf_vmlinux_value_type_id
set to the btf id "struct bpf_struct_ops_tcp_congestion_ops" of the
running kernel.
Instead of reusing the attr->btf_value_type_id,
btf_vmlinux_value_type_id s added such that attr->btf_fd can still be
used as the "user" btf which could store other useful sysadmin/debug
info that may be introduced in the furture,
e.g. creation-date/compiler-details/map-creator...etc.
3. Create a "struct bpf_struct_ops_tcp_congestion_ops" object as described
in the running kernel btf. Populate the value of this object.
The function ptr should be populated with the prog fds.
4. Call BPF_MAP_UPDATE with the object created in (3) as
the map value. The key is always "0".
During BPF_MAP_UPDATE, the code that saves the kernel-func-ptr's
args as an array of u64 is generated. BPF_MAP_UPDATE also allows
the specific struct_ops to do some final checks in "st_ops->init_member()"
(e.g. ensure all mandatory func ptrs are implemented).
If everything looks good, it will register this kernel struct
to the kernel subsystem. The map will not allow further update
from this point.
Unregister a struct_ops from the kernel subsystem:
BPF_MAP_DELETE with key "0".
Introspect a struct_ops:
BPF_MAP_LOOKUP_ELEM with key "0". The map value returned will
have the prog _id_ populated as the func ptr.
The map value state (enum bpf_struct_ops_state) will transit from:
INIT (map created) =>
INUSE (map updated, i.e. reg) =>
TOBEFREE (map value deleted, i.e. unreg)
The kernel subsystem needs to call bpf_struct_ops_get() and
bpf_struct_ops_put() to manage the "refcnt" in the
"struct bpf_struct_ops_XYZ". This patch uses a separate refcnt
for the purose of tracking the subsystem usage. Another approach
is to reuse the map->refcnt and then "show" (i.e. during map_lookup)
the subsystem's usage by doing map->refcnt - map->usercnt to filter out
the map-fd/pinned-map usage. However, that will also tie down the
future semantics of map->refcnt and map->usercnt.
The very first subsystem's refcnt (during reg()) holds one
count to map->refcnt. When the very last subsystem's refcnt
is gone, it will also release the map->refcnt. All bpf_prog will be
freed when the map->refcnt reaches 0 (i.e. during map_free()).
Here is how the bpftool map command will look like:
[root@arch-fb-vm1 bpf]# bpftool map show
6: struct_ops name dctcp flags 0x0
key 4B value 256B max_entries 1 memlock 4096B
btf_id 6
[root@arch-fb-vm1 bpf]# bpftool map dump id 6
[{
"value": {
"refcnt": {
"refs": {
"counter": 1
}
},
"state": 1,
"data": {
"list": {
"next": 0,
"prev": 0
},
"key": 0,
"flags": 2,
"init": 24,
"release": 0,
"ssthresh": 25,
"cong_avoid": 30,
"set_state": 27,
"cwnd_event": 28,
"in_ack_event": 26,
"undo_cwnd": 29,
"pkts_acked": 0,
"min_tso_segs": 0,
"sndbuf_expand": 0,
"cong_control": 0,
"get_info": 0,
"name": [98,112,102,95,100,99,116,99,112,0,0,0,0,0,0,0
],
"owner": 0
}
}
}
]
Misc Notes:
* bpf_struct_ops_map_sys_lookup_elem() is added for syscall lookup.
It does an inplace update on "*value" instead returning a pointer
to syscall.c. Otherwise, it needs a separate copy of "zero" value
for the BPF_STRUCT_OPS_STATE_INIT to avoid races.
* The bpf_struct_ops_map_delete_elem() is also called without
preempt_disable() from map_delete_elem(). It is because
the "->unreg()" may requires sleepable context, e.g.
the "tcp_unregister_congestion_control()".
* "const" is added to some of the existing "struct btf_func_model *"
function arg to avoid a compiler warning caused by this patch.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200109003505.3855919-1-kafai@fb.com
2020-01-08 16:35:05 -08:00
if ( btf - > resolved_ids )
t = btf_type_id_resolve ( btf , & type_id ) ;
else
t = btf_type_skip_modifiers ( btf , type_id , NULL ) ;
2018-04-18 15:56:00 -07:00
2020-09-28 12:31:04 +01:00
btf_type_ops ( t ) - > show ( btf , t , type_id , data , bits_offset , show ) ;
2018-04-18 15:56:00 -07:00
}
2020-09-28 12:31:04 +01:00
static void btf_var_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
2019-04-09 23:20:09 +02:00
{
t = btf_type_id_resolve ( btf , & type_id ) ;
2020-09-28 12:31:04 +01:00
btf_type_ops ( t ) - > show ( btf , t , type_id , data , bits_offset , show ) ;
2019-04-09 23:20:09 +02:00
}
2020-09-28 12:31:04 +01:00
static void btf_ptr_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
2020-09-28 12:31:04 +01:00
void * safe_data ;
safe_data = btf_show_start_type ( show , t , type_id , data ) ;
if ( ! safe_data )
return ;
/* It is a hashed value unless BTF_SHOW_PTR_RAW is specified */
if ( show - > flags & BTF_SHOW_PTR_RAW )
btf_show_type_value ( show , " 0x%px " , * ( void * * ) safe_data ) ;
else
btf_show_type_value ( show , " 0x%p " , * ( void * * ) safe_data ) ;
btf_show_end_type ( show ) ;
2018-04-18 15:56:00 -07:00
}
2018-04-18 15:55:57 -07:00
static void btf_ref_type_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
btf_verifier_log ( env , " type_id=%u " , t - > type ) ;
}
static struct btf_kind_operations modifier_ops = {
. check_meta = btf_ref_type_check_meta ,
2018-04-18 15:55:58 -07:00
. resolve = btf_modifier_resolve ,
2018-04-18 15:55:59 -07:00
. check_member = btf_modifier_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_modifier_check_kflag_member ,
2018-04-18 15:55:57 -07:00
. log_details = btf_ref_type_log ,
2020-09-28 12:31:04 +01:00
. show = btf_modifier_show ,
2018-04-18 15:55:57 -07:00
} ;
static struct btf_kind_operations ptr_ops = {
. check_meta = btf_ref_type_check_meta ,
2018-04-18 15:55:58 -07:00
. resolve = btf_ptr_resolve ,
2018-04-18 15:55:59 -07:00
. check_member = btf_ptr_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_generic_check_kflag_member ,
2018-04-18 15:55:57 -07:00
. log_details = btf_ref_type_log ,
2020-09-28 12:31:04 +01:00
. show = btf_ptr_show ,
2018-04-18 15:55:57 -07:00
} ;
2018-06-02 09:06:51 -07:00
static s32 btf_fwd_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
if ( btf_type_vlen ( t ) ) {
btf_verifier_log_type ( env , t , " vlen != 0 " ) ;
return - EINVAL ;
}
if ( t - > type ) {
btf_verifier_log_type ( env , t , " type != 0 " ) ;
return - EINVAL ;
}
2018-11-27 13:23:28 -08:00
/* fwd type must have a valid name */
if ( ! t - > name_off | |
! btf_name_valid_identifier ( env - > btf , t - > name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2018-06-02 09:06:51 -07:00
btf_verifier_log_type ( env , t , NULL ) ;
return 0 ;
}
2018-12-18 13:43:58 -08:00
static void btf_fwd_type_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
btf_verifier_log ( env , " %s " , btf_type_kflag ( t ) ? " union " : " struct " ) ;
}
2018-04-18 15:55:57 -07:00
static struct btf_kind_operations fwd_ops = {
2018-06-02 09:06:51 -07:00
. check_meta = btf_fwd_check_meta ,
2018-04-18 15:55:58 -07:00
. resolve = btf_df_resolve ,
2018-04-18 15:55:59 -07:00
. check_member = btf_df_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_df_check_kflag_member ,
2018-12-18 13:43:58 -08:00
. log_details = btf_fwd_type_log ,
2020-09-28 12:31:04 +01:00
. show = btf_df_show ,
2018-04-18 15:55:57 -07:00
} ;
2018-04-18 15:55:59 -07:00
static int btf_array_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u32 struct_bits_off = member - > offset ;
u32 struct_size , bytes_offset ;
u32 array_type_id , array_size ;
struct btf * btf = env - > btf ;
if ( BITS_PER_BYTE_MASKED ( struct_bits_off ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member is not byte aligned " ) ;
return - EINVAL ;
}
array_type_id = member - > type ;
btf_type_id_size ( btf , & array_type_id , & array_size ) ;
struct_size = struct_type - > size ;
bytes_offset = BITS_ROUNDDOWN_BYTES ( struct_bits_off ) ;
if ( struct_size - bytes_offset < array_size ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
2018-04-18 15:55:57 -07:00
static s32 btf_array_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
const struct btf_array * array = btf_type_array ( t ) ;
u32 meta_needed = sizeof ( * array ) ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
2018-11-27 13:23:28 -08:00
/* array type should not have a name */
if ( t - > name_off ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2018-04-18 15:55:57 -07:00
if ( btf_type_vlen ( t ) ) {
btf_verifier_log_type ( env , t , " vlen != 0 " ) ;
return - EINVAL ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
2018-06-02 09:06:50 -07:00
if ( t - > size ) {
btf_verifier_log_type ( env , t , " size != 0 " ) ;
return - EINVAL ;
}
2018-05-22 14:57:19 -07:00
/* Array elem type and index type cannot be in type void,
* so ! array - > type and ! array - > index_type are not allowed .
2018-04-18 15:55:57 -07:00
*/
2018-05-22 14:57:20 -07:00
if ( ! array - > type | | ! BTF_TYPE_ID_VALID ( array - > type ) ) {
2018-05-22 14:57:19 -07:00
btf_verifier_log_type ( env , t , " Invalid elem " ) ;
return - EINVAL ;
}
2018-05-22 14:57:20 -07:00
if ( ! array - > index_type | | ! BTF_TYPE_ID_VALID ( array - > index_type ) ) {
2018-05-22 14:57:19 -07:00
btf_verifier_log_type ( env , t , " Invalid index " ) ;
2018-04-18 15:55:57 -07:00
return - EINVAL ;
}
btf_verifier_log_type ( env , t , NULL ) ;
return meta_needed ;
}
2018-04-18 15:55:58 -07:00
static int btf_array_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
const struct btf_array * array = btf_type_array ( v - > t ) ;
2018-05-22 14:57:19 -07:00
const struct btf_type * elem_type , * index_type ;
u32 elem_type_id , index_type_id ;
2018-04-18 15:55:58 -07:00
struct btf * btf = env - > btf ;
u32 elem_size ;
2018-05-22 14:57:19 -07:00
/* Check array->index_type */
index_type_id = array - > index_type ;
index_type = btf_type_by_id ( btf , index_type_id ) ;
2019-06-19 12:01:05 -07:00
if ( btf_type_nosize_or_null ( index_type ) | |
btf_type_is_resolve_source_only ( index_type ) ) {
2018-05-22 14:57:19 -07:00
btf_verifier_log_type ( env , v - > t , " Invalid index " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , index_type ) & &
! env_type_is_resolved ( env , index_type_id ) )
return env_stack_push ( env , index_type , index_type_id ) ;
index_type = btf_type_id_size ( btf , & index_type_id , NULL ) ;
if ( ! index_type | | ! btf_type_is_int ( index_type ) | |
! btf_type_int_is_regular ( index_type ) ) {
btf_verifier_log_type ( env , v - > t , " Invalid index " ) ;
return - EINVAL ;
}
/* Check array->type */
elem_type_id = array - > type ;
2018-04-18 15:55:58 -07:00
elem_type = btf_type_by_id ( btf , elem_type_id ) ;
2019-06-19 12:01:05 -07:00
if ( btf_type_nosize_or_null ( elem_type ) | |
btf_type_is_resolve_source_only ( elem_type ) ) {
2018-04-18 15:55:58 -07:00
btf_verifier_log_type ( env , v - > t ,
" Invalid elem " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , elem_type ) & &
! env_type_is_resolved ( env , elem_type_id ) )
return env_stack_push ( env , elem_type , elem_type_id ) ;
elem_type = btf_type_id_size ( btf , & elem_type_id , & elem_size ) ;
if ( ! elem_type ) {
btf_verifier_log_type ( env , v - > t , " Invalid elem " ) ;
return - EINVAL ;
}
2018-05-22 14:57:19 -07:00
if ( btf_type_is_int ( elem_type ) & & ! btf_type_int_is_regular ( elem_type ) ) {
btf_verifier_log_type ( env , v - > t , " Invalid array of int " ) ;
return - EINVAL ;
2018-04-18 15:55:58 -07:00
}
if ( array - > nelems & & elem_size > U32_MAX / array - > nelems ) {
btf_verifier_log_type ( env , v - > t ,
" Array size overflows U32_MAX " ) ;
return - EINVAL ;
}
env_stack_pop_resolved ( env , elem_type_id , elem_size * array - > nelems ) ;
return 0 ;
}
2018-04-18 15:55:57 -07:00
static void btf_array_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
const struct btf_array * array = btf_type_array ( t ) ;
btf_verifier_log ( env , " type_id=%u index_type_id=%u nr_elems=%u " ,
array - > type , array - > index_type , array - > nelems ) ;
}
2020-09-28 12:31:04 +01:00
static void __btf_array_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
const struct btf_array * array = btf_type_array ( t ) ;
const struct btf_kind_operations * elem_ops ;
const struct btf_type * elem_type ;
2020-09-28 12:31:04 +01:00
u32 i , elem_size = 0 , elem_type_id ;
u16 encoding = 0 ;
2018-04-18 15:56:00 -07:00
elem_type_id = array - > type ;
2020-09-28 12:31:04 +01:00
elem_type = btf_type_skip_modifiers ( btf , elem_type_id , NULL ) ;
if ( elem_type & & btf_type_has_size ( elem_type ) )
elem_size = elem_type - > size ;
if ( elem_type & & btf_type_is_int ( elem_type ) ) {
u32 int_type = btf_type_int ( elem_type ) ;
encoding = BTF_INT_ENCODING ( int_type ) ;
/*
* BTF_INT_CHAR encoding never seems to be set for
* char arrays , so if size is 1 and element is
* printable as a char , we ' ll do that .
*/
if ( elem_size = = 1 )
encoding = BTF_INT_CHAR ;
}
if ( ! btf_show_start_array_type ( show , t , type_id , encoding , data ) )
return ;
if ( ! elem_type )
goto out ;
2018-04-18 15:56:00 -07:00
elem_ops = btf_type_ops ( elem_type ) ;
2020-09-28 12:31:04 +01:00
2018-04-18 15:56:00 -07:00
for ( i = 0 ; i < array - > nelems ; i + + ) {
2020-09-28 12:31:04 +01:00
btf_show_start_array_member ( show ) ;
elem_ops - > show ( btf , elem_type , elem_type_id , data ,
bits_offset , show ) ;
2018-04-18 15:56:00 -07:00
data + = elem_size ;
2020-09-28 12:31:04 +01:00
btf_show_end_array_member ( show ) ;
if ( show - > state . array_terminated )
break ;
2018-04-18 15:56:00 -07:00
}
2020-09-28 12:31:04 +01:00
out :
btf_show_end_array_type ( show ) ;
}
static void btf_array_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
{
const struct btf_member * m = show - > state . member ;
/*
* First check if any members would be shown ( are non - zero ) .
* See comments above " struct btf_show " definition for more
* details on how this works at a high - level .
*/
if ( show - > state . depth > 0 & & ! ( show - > flags & BTF_SHOW_ZERO ) ) {
if ( ! show - > state . depth_check ) {
show - > state . depth_check = show - > state . depth + 1 ;
show - > state . depth_to_show = 0 ;
}
__btf_array_show ( btf , t , type_id , data , bits_offset , show ) ;
show - > state . member = m ;
if ( show - > state . depth_check ! = show - > state . depth + 1 )
return ;
show - > state . depth_check = 0 ;
if ( show - > state . depth_to_show < = show - > state . depth )
return ;
/*
* Reaching here indicates we have recursed and found
* non - zero array member ( s ) .
*/
}
__btf_array_show ( btf , t , type_id , data , bits_offset , show ) ;
2018-04-18 15:56:00 -07:00
}
2018-04-18 15:55:57 -07:00
static struct btf_kind_operations array_ops = {
. check_meta = btf_array_check_meta ,
2018-04-18 15:55:58 -07:00
. resolve = btf_array_resolve ,
2018-04-18 15:55:59 -07:00
. check_member = btf_array_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_generic_check_kflag_member ,
2018-04-18 15:55:57 -07:00
. log_details = btf_array_log ,
2020-09-28 12:31:04 +01:00
. show = btf_array_show ,
2018-04-18 15:55:57 -07:00
} ;
2018-04-18 15:55:59 -07:00
static int btf_struct_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u32 struct_bits_off = member - > offset ;
u32 struct_size , bytes_offset ;
if ( BITS_PER_BYTE_MASKED ( struct_bits_off ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member is not byte aligned " ) ;
return - EINVAL ;
}
struct_size = struct_type - > size ;
bytes_offset = BITS_ROUNDDOWN_BYTES ( struct_bits_off ) ;
if ( struct_size - bytes_offset < member_type - > size ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
2018-04-18 15:55:57 -07:00
static s32 btf_struct_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
bool is_union = BTF_INFO_KIND ( t - > info ) = = BTF_KIND_UNION ;
const struct btf_member * member ;
2018-07-20 17:38:37 -07:00
u32 meta_needed , last_offset ;
2018-04-18 15:55:57 -07:00
struct btf * btf = env - > btf ;
u32 struct_size = t - > size ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
u32 offset ;
2018-04-18 15:55:57 -07:00
u16 i ;
meta_needed = btf_type_vlen ( t ) * sizeof ( * member ) ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
2018-11-27 13:23:28 -08:00
/* struct type either no name or a valid one */
if ( t - > name_off & &
! btf_name_valid_identifier ( env - > btf , t - > name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2018-04-18 15:55:57 -07:00
btf_verifier_log_type ( env , t , NULL ) ;
2018-07-20 17:38:37 -07:00
last_offset = 0 ;
2018-04-18 15:55:57 -07:00
for_each_member ( i , t , member ) {
2018-04-21 09:48:23 -07:00
if ( ! btf_name_offset_valid ( btf , member - > name_off ) ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log_member ( env , t , member ,
" Invalid member name_offset:%u " ,
2018-04-21 09:48:23 -07:00
member - > name_off ) ;
2018-04-18 15:55:57 -07:00
return - EINVAL ;
}
2018-11-27 13:23:28 -08:00
/* struct member either no name or a valid one */
if ( member - > name_off & &
! btf_name_valid_identifier ( btf , member - > name_off ) ) {
btf_verifier_log_member ( env , t , member , " Invalid name " ) ;
return - EINVAL ;
}
2018-04-18 15:55:57 -07:00
/* A member cannot be in type void */
2018-05-22 14:57:20 -07:00
if ( ! member - > type | | ! BTF_TYPE_ID_VALID ( member - > type ) ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log_member ( env , t , member ,
" Invalid type_id " ) ;
return - EINVAL ;
}
2021-12-01 10:10:25 -08:00
offset = __btf_member_bit_offset ( t , member ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( is_union & & offset ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log_member ( env , t , member ,
" Invalid member bits_offset " ) ;
return - EINVAL ;
}
2018-07-20 17:38:37 -07:00
/*
* " > " instead of " >= " because the last member could be
* " char a[0]; "
*/
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( last_offset > offset ) {
2018-07-20 17:38:37 -07:00
btf_verifier_log_member ( env , t , member ,
" Invalid member bits_offset " ) ;
return - EINVAL ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( BITS_ROUNDUP_BYTES ( offset ) > struct_size ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log_member ( env , t , member ,
2018-11-25 23:32:51 +00:00
" Member bits_offset exceeds its struct size " ) ;
2018-04-18 15:55:57 -07:00
return - EINVAL ;
}
btf_verifier_log_member ( env , t , member , NULL ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
last_offset = offset ;
2018-04-18 15:55:57 -07:00
}
return meta_needed ;
}
2018-04-18 15:55:58 -07:00
static int btf_struct_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
const struct btf_member * member ;
2018-04-18 15:55:59 -07:00
int err ;
2018-04-18 15:55:58 -07:00
u16 i ;
/* Before continue resolving the next_member,
* ensure the last member is indeed resolved to a
* type with size info .
*/
if ( v - > next_member ) {
2018-04-18 15:55:59 -07:00
const struct btf_type * last_member_type ;
2018-04-18 15:55:58 -07:00
const struct btf_member * last_member ;
2022-09-10 11:01:20 +00:00
u32 last_member_type_id ;
2018-04-18 15:55:58 -07:00
last_member = btf_type_member ( v - > t ) + v - > next_member - 1 ;
last_member_type_id = last_member - > type ;
if ( WARN_ON_ONCE ( ! env_type_is_resolved ( env ,
last_member_type_id ) ) )
return - EINVAL ;
2018-04-18 15:55:59 -07:00
last_member_type = btf_type_by_id ( env - > btf ,
last_member_type_id ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( v - > t ) )
err = btf_type_ops ( last_member_type ) - > check_kflag_member ( env , v - > t ,
last_member ,
last_member_type ) ;
else
err = btf_type_ops ( last_member_type ) - > check_member ( env , v - > t ,
last_member ,
last_member_type ) ;
2018-04-18 15:55:59 -07:00
if ( err )
return err ;
2018-04-18 15:55:58 -07:00
}
for_each_member_from ( i , v - > next_member , v - > t , member ) {
u32 member_type_id = member - > type ;
const struct btf_type * member_type = btf_type_by_id ( env - > btf ,
member_type_id ) ;
2019-06-19 12:01:05 -07:00
if ( btf_type_nosize_or_null ( member_type ) | |
btf_type_is_resolve_source_only ( member_type ) ) {
2018-04-18 15:55:58 -07:00
btf_verifier_log_member ( env , v - > t , member ,
" Invalid member " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , member_type ) & &
! env_type_is_resolved ( env , member_type_id ) ) {
env_stack_set_next_member ( env , i + 1 ) ;
return env_stack_push ( env , member_type , member_type_id ) ;
}
2018-04-18 15:55:59 -07:00
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( v - > t ) )
err = btf_type_ops ( member_type ) - > check_kflag_member ( env , v - > t ,
member ,
member_type ) ;
else
err = btf_type_ops ( member_type ) - > check_member ( env , v - > t ,
member ,
member_type ) ;
2018-04-18 15:55:59 -07:00
if ( err )
return err ;
2018-04-18 15:55:58 -07:00
}
env_stack_pop_resolved ( env , 0 , 0 ) ;
return 0 ;
}
2018-04-18 15:55:57 -07:00
static void btf_struct_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
btf_verifier_log ( env , " size=%u vlen=%u " , t - > size , btf_type_vlen ( t ) ) ;
}
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
enum btf_field_info_type {
2022-04-15 21:33:42 +05:30
BTF_FIELD_SPIN_LOCK ,
BTF_FIELD_TIMER ,
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
BTF_FIELD_KPTR ,
} ;
enum {
BTF_FIELD_IGNORE = 0 ,
BTF_FIELD_FOUND = 1 ,
2022-04-15 21:33:42 +05:30
} ;
struct btf_field_info {
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
enum btf_field_type type ;
2022-04-15 21:33:42 +05:30
u32 off ;
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
union {
struct {
u32 type_id ;
} kptr ;
struct {
const char * node_name ;
u32 value_btf_id ;
} list_head ;
} ;
2022-04-15 21:33:42 +05:30
} ;
static int btf_find_struct ( const struct btf * btf , const struct btf_type * t ,
2022-11-04 00:39:56 +05:30
u32 off , int sz , enum btf_field_type field_type ,
struct btf_field_info * info )
2022-04-15 21:33:42 +05:30
{
if ( ! __btf_type_is_struct ( t ) )
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return BTF_FIELD_IGNORE ;
2022-04-15 21:33:42 +05:30
if ( t - > size ! = sz )
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return BTF_FIELD_IGNORE ;
2022-11-04 00:39:56 +05:30
info - > type = field_type ;
2022-04-15 21:33:42 +05:30
info - > off = off ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return BTF_FIELD_FOUND ;
}
static int btf_find_kptr ( const struct btf * btf , const struct btf_type * t ,
u32 off , int sz , struct btf_field_info * info )
{
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
enum btf_field_type type ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
u32 res_id ;
2022-11-04 00:39:51 +05:30
/* Permit modifiers on the pointer itself */
if ( btf_type_is_volatile ( t ) )
t = btf_type_by_id ( btf , t - > type ) ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
/* For PTR, sz is always == 8 */
if ( ! btf_type_is_ptr ( t ) )
return BTF_FIELD_IGNORE ;
t = btf_type_by_id ( btf , t - > type ) ;
if ( ! btf_type_is_type_tag ( t ) )
return BTF_FIELD_IGNORE ;
/* Reject extra tags */
if ( btf_type_is_type_tag ( btf_type_by_id ( btf , t - > type ) ) )
return - EINVAL ;
bpf: Allow storing referenced kptr in map
Extending the code in previous commits, introduce referenced kptr
support, which needs to be tagged using 'kptr_ref' tag instead. Unlike
unreferenced kptr, referenced kptr have a lot more restrictions. In
addition to the type matching, only a newly introduced bpf_kptr_xchg
helper is allowed to modify the map value at that offset. This transfers
the referenced pointer being stored into the map, releasing the
references state for the program, and returning the old value and
creating new reference state for the returned pointer.
Similar to unreferenced pointer case, return value for this case will
also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer
must either be eventually released by calling the corresponding release
function, otherwise it must be transferred into another map.
It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear
the value, and obtain the old value if any.
BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future
commit will permit using BPF_LDX for such pointers, but attempt at
making it safe, since the lifetime of object won't be guaranteed.
There are valid reasons to enforce the restriction of permitting only
bpf_kptr_xchg to operate on referenced kptr. The pointer value must be
consistent in face of concurrent modification, and any prior values
contained in the map must also be released before a new one is moved
into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg
returns the old value, which the verifier would require the user to
either free or move into another map, and releases the reference held
for the pointer being moved in.
In the future, direct BPF_XCHG instruction may also be permitted to work
like bpf_kptr_xchg helper.
Note that process_kptr_func doesn't have to call
check_helper_mem_access, since we already disallow rdonly/wronly flags
for map, which is what check_map_access_type checks, and we already
ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc,
so check_map_access is also not required.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com
2022-04-25 03:18:51 +05:30
if ( ! strcmp ( " kptr " , __btf_name_by_offset ( btf , t - > name_off ) ) )
type = BPF_KPTR_UNREF ;
else if ( ! strcmp ( " kptr_ref " , __btf_name_by_offset ( btf , t - > name_off ) ) )
type = BPF_KPTR_REF ;
else
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return - EINVAL ;
/* Get the base type */
t = btf_type_skip_modifiers ( btf , t - > type , & res_id ) ;
/* Only pointer to struct is allowed */
if ( ! __btf_type_is_struct ( t ) )
return - EINVAL ;
bpf: Allow storing referenced kptr in map
Extending the code in previous commits, introduce referenced kptr
support, which needs to be tagged using 'kptr_ref' tag instead. Unlike
unreferenced kptr, referenced kptr have a lot more restrictions. In
addition to the type matching, only a newly introduced bpf_kptr_xchg
helper is allowed to modify the map value at that offset. This transfers
the referenced pointer being stored into the map, releasing the
references state for the program, and returning the old value and
creating new reference state for the returned pointer.
Similar to unreferenced pointer case, return value for this case will
also be PTR_TO_BTF_ID_OR_NULL. The reference for the returned pointer
must either be eventually released by calling the corresponding release
function, otherwise it must be transferred into another map.
It is also allowed to call bpf_kptr_xchg with a NULL pointer, to clear
the value, and obtain the old value if any.
BPF_LDX, BPF_STX, and BPF_ST cannot access referenced kptr. A future
commit will permit using BPF_LDX for such pointers, but attempt at
making it safe, since the lifetime of object won't be guaranteed.
There are valid reasons to enforce the restriction of permitting only
bpf_kptr_xchg to operate on referenced kptr. The pointer value must be
consistent in face of concurrent modification, and any prior values
contained in the map must also be released before a new one is moved
into the map. To ensure proper transfer of this ownership, bpf_kptr_xchg
returns the old value, which the verifier would require the user to
either free or move into another map, and releases the reference held
for the pointer being moved in.
In the future, direct BPF_XCHG instruction may also be permitted to work
like bpf_kptr_xchg helper.
Note that process_kptr_func doesn't have to call
check_helper_mem_access, since we already disallow rdonly/wronly flags
for map, which is what check_map_access_type checks, and we already
ensure the PTR_TO_MAP_VALUE refers to kptr by obtaining its off_desc,
so check_map_access is also not required.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-4-memxor@gmail.com
2022-04-25 03:18:51 +05:30
info - > type = type ;
2022-11-04 00:39:56 +05:30
info - > off = off ;
info - > kptr . type_id = res_id ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return BTF_FIELD_FOUND ;
2022-04-15 21:33:42 +05:30
}
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
static const char * btf_find_decl_tag_value ( const struct btf * btf ,
const struct btf_type * pt ,
int comp_idx , const char * tag_key )
{
int i ;
for ( i = 1 ; i < btf_nr_types ( btf ) ; i + + ) {
const struct btf_type * t = btf_type_by_id ( btf , i ) ;
int len = strlen ( tag_key ) ;
if ( ! btf_type_is_decl_tag ( t ) )
continue ;
if ( pt ! = btf_type_by_id ( btf , t - > type ) | |
btf_type_decl_tag ( t ) - > component_idx ! = comp_idx )
continue ;
if ( strncmp ( __btf_name_by_offset ( btf , t - > name_off ) , tag_key , len ) )
continue ;
return __btf_name_by_offset ( btf , t - > name_off ) + len ;
}
return NULL ;
}
static int btf_find_list_head ( const struct btf * btf , const struct btf_type * pt ,
const struct btf_type * t , int comp_idx ,
u32 off , int sz , struct btf_field_info * info )
{
const char * value_type ;
const char * list_node ;
s32 id ;
if ( ! __btf_type_is_struct ( t ) )
return BTF_FIELD_IGNORE ;
if ( t - > size ! = sz )
return BTF_FIELD_IGNORE ;
value_type = btf_find_decl_tag_value ( btf , pt , comp_idx , " contains: " ) ;
if ( ! value_type )
return - EINVAL ;
list_node = strstr ( value_type , " : " ) ;
if ( ! list_node )
return - EINVAL ;
value_type = kstrndup ( value_type , list_node - value_type , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! value_type )
return - ENOMEM ;
id = btf_find_by_name_kind ( btf , value_type , BTF_KIND_STRUCT ) ;
kfree ( value_type ) ;
if ( id < 0 )
return id ;
list_node + + ;
if ( str_is_empty ( list_node ) )
return - EINVAL ;
info - > type = BPF_LIST_HEAD ;
info - > off = off ;
info - > list_head . value_btf_id = id ;
info - > list_head . node_name = list_node ;
return BTF_FIELD_FOUND ;
}
2022-11-04 00:39:56 +05:30
static int btf_get_field_type ( const char * name , u32 field_mask , u32 * seen_mask ,
int * align , int * sz )
{
int type = 0 ;
if ( field_mask & BPF_SPIN_LOCK ) {
if ( ! strcmp ( name , " bpf_spin_lock " ) ) {
if ( * seen_mask & BPF_SPIN_LOCK )
return - E2BIG ;
* seen_mask | = BPF_SPIN_LOCK ;
type = BPF_SPIN_LOCK ;
goto end ;
}
}
if ( field_mask & BPF_TIMER ) {
if ( ! strcmp ( name , " bpf_timer " ) ) {
if ( * seen_mask & BPF_TIMER )
return - E2BIG ;
* seen_mask | = BPF_TIMER ;
type = BPF_TIMER ;
goto end ;
}
}
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
if ( field_mask & BPF_LIST_HEAD ) {
if ( ! strcmp ( name , " bpf_list_head " ) ) {
type = BPF_LIST_HEAD ;
goto end ;
}
}
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
if ( field_mask & BPF_LIST_NODE ) {
if ( ! strcmp ( name , " bpf_list_node " ) ) {
type = BPF_LIST_NODE ;
goto end ;
}
}
2022-11-04 00:39:56 +05:30
/* Only return BPF_KPTR when all other types with matchable names fail */
if ( field_mask & BPF_KPTR ) {
type = BPF_KPTR_REF ;
goto end ;
}
return 0 ;
end :
* sz = btf_field_type_size ( type ) ;
* align = btf_field_type_align ( type ) ;
return type ;
}
static int btf_find_struct_field ( const struct btf * btf ,
const struct btf_type * t , u32 field_mask ,
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
struct btf_field_info * info , int info_cnt )
2019-01-31 15:40:04 -08:00
{
2022-11-04 00:39:56 +05:30
int ret , idx = 0 , align , sz , field_type ;
2019-01-31 15:40:04 -08:00
const struct btf_member * member ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
struct btf_field_info tmp ;
2022-11-04 00:39:56 +05:30
u32 i , off , seen_mask = 0 ;
2019-01-31 15:40:04 -08:00
for_each_member ( i , t , member ) {
const struct btf_type * member_type = btf_type_by_id ( btf ,
member - > type ) ;
2022-04-15 21:33:42 +05:30
2022-11-04 00:39:56 +05:30
field_type = btf_get_field_type ( __btf_name_by_offset ( btf , member_type - > name_off ) ,
field_mask , & seen_mask , & align , & sz ) ;
if ( field_type = = 0 )
2019-01-31 15:40:04 -08:00
continue ;
2022-11-04 00:39:56 +05:30
if ( field_type < 0 )
return field_type ;
2022-04-15 21:33:42 +05:30
2021-12-01 10:10:25 -08:00
off = __btf_member_bit_offset ( t , member ) ;
2019-01-31 15:40:04 -08:00
if ( off % 8 )
/* valid C code cannot generate such BTF */
return - EINVAL ;
off / = 8 ;
2021-07-14 17:54:10 -07:00
if ( off % align )
2022-11-04 00:39:56 +05:30
continue ;
2022-04-15 21:33:42 +05:30
switch ( field_type ) {
2022-11-04 00:39:56 +05:30
case BPF_SPIN_LOCK :
case BPF_TIMER :
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
case BPF_LIST_NODE :
2022-11-04 00:39:56 +05:30
ret = btf_find_struct ( btf , member_type , off , sz , field_type ,
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
idx < info_cnt ? & info [ idx ] : & tmp ) ;
if ( ret < 0 )
return ret ;
break ;
2022-11-04 00:39:56 +05:30
case BPF_KPTR_UNREF :
case BPF_KPTR_REF :
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
ret = btf_find_kptr ( btf , member_type , off , sz ,
idx < info_cnt ? & info [ idx ] : & tmp ) ;
if ( ret < 0 )
return ret ;
break ;
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
case BPF_LIST_HEAD :
ret = btf_find_list_head ( btf , t , member_type , i , off , sz ,
idx < info_cnt ? & info [ idx ] : & tmp ) ;
if ( ret < 0 )
return ret ;
break ;
2022-04-15 21:33:42 +05:30
default :
return - EFAULT ;
}
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
if ( ret = = BTF_FIELD_IGNORE )
continue ;
if ( idx > = info_cnt )
return - E2BIG ;
+ + idx ;
2021-07-14 17:54:10 -07:00
}
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return idx ;
2021-07-14 17:54:10 -07:00
}
static int btf_find_datasec_var ( const struct btf * btf , const struct btf_type * t ,
2022-11-04 00:39:56 +05:30
u32 field_mask , struct btf_field_info * info ,
int info_cnt )
2021-07-14 17:54:10 -07:00
{
2022-11-04 00:39:56 +05:30
int ret , idx = 0 , align , sz , field_type ;
2021-07-14 17:54:10 -07:00
const struct btf_var_secinfo * vsi ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
struct btf_field_info tmp ;
2022-11-04 00:39:56 +05:30
u32 i , off , seen_mask = 0 ;
2021-07-14 17:54:10 -07:00
for_each_vsi ( i , t , vsi ) {
const struct btf_type * var = btf_type_by_id ( btf , vsi - > type ) ;
const struct btf_type * var_type = btf_type_by_id ( btf , var - > type ) ;
2022-11-04 00:39:56 +05:30
field_type = btf_get_field_type ( __btf_name_by_offset ( btf , var_type - > name_off ) ,
field_mask , & seen_mask , & align , & sz ) ;
if ( field_type = = 0 )
2021-07-14 17:54:10 -07:00
continue ;
2022-11-04 00:39:56 +05:30
if ( field_type < 0 )
return field_type ;
off = vsi - > offset ;
2021-07-14 17:54:10 -07:00
if ( vsi - > size ! = sz )
continue ;
if ( off % align )
2022-11-04 00:39:56 +05:30
continue ;
2022-04-15 21:33:42 +05:30
switch ( field_type ) {
2022-11-04 00:39:56 +05:30
case BPF_SPIN_LOCK :
case BPF_TIMER :
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
case BPF_LIST_NODE :
2022-11-04 00:39:56 +05:30
ret = btf_find_struct ( btf , var_type , off , sz , field_type ,
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
idx < info_cnt ? & info [ idx ] : & tmp ) ;
if ( ret < 0 )
return ret ;
break ;
2022-11-04 00:39:56 +05:30
case BPF_KPTR_UNREF :
case BPF_KPTR_REF :
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
ret = btf_find_kptr ( btf , var_type , off , sz ,
idx < info_cnt ? & info [ idx ] : & tmp ) ;
if ( ret < 0 )
return ret ;
break ;
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
case BPF_LIST_HEAD :
ret = btf_find_list_head ( btf , var , var_type , - 1 , off , sz ,
idx < info_cnt ? & info [ idx ] : & tmp ) ;
if ( ret < 0 )
return ret ;
break ;
2022-04-15 21:33:42 +05:30
default :
return - EFAULT ;
}
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
if ( ret = = BTF_FIELD_IGNORE )
continue ;
if ( idx > = info_cnt )
return - E2BIG ;
+ + idx ;
2019-01-31 15:40:04 -08:00
}
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return idx ;
2019-01-31 15:40:04 -08:00
}
2021-07-14 17:54:10 -07:00
static int btf_find_field ( const struct btf * btf , const struct btf_type * t ,
2022-11-04 00:39:56 +05:30
u32 field_mask , struct btf_field_info * info ,
int info_cnt )
2021-07-14 17:54:10 -07:00
{
if ( __btf_type_is_struct ( t ) )
2022-11-04 00:39:56 +05:30
return btf_find_struct_field ( btf , t , field_mask , info , info_cnt ) ;
2021-07-14 17:54:10 -07:00
else if ( btf_type_is_datasec ( t ) )
2022-11-04 00:39:56 +05:30
return btf_find_datasec_var ( btf , t , field_mask , info , info_cnt ) ;
2021-07-14 17:54:10 -07:00
return - EINVAL ;
}
2022-11-04 00:39:56 +05:30
static int btf_parse_kptr ( const struct btf * btf , struct btf_field * field ,
struct btf_field_info * info )
2021-07-14 17:54:10 -07:00
{
2022-11-04 00:39:56 +05:30
struct module * mod = NULL ;
const struct btf_type * t ;
struct btf * kernel_btf ;
2022-04-15 21:33:42 +05:30
int ret ;
2022-11-04 00:39:56 +05:30
s32 id ;
2022-04-15 21:33:42 +05:30
2022-11-04 00:39:56 +05:30
/* Find type in map BTF, and use it to look up the matching type
* in vmlinux or module BTFs , by name and kind .
*/
t = btf_type_by_id ( btf , info - > kptr . type_id ) ;
id = bpf_find_btf_id ( __btf_name_by_offset ( btf , t - > name_off ) , BTF_INFO_KIND ( t - > info ) ,
& kernel_btf ) ;
if ( id < 0 )
return id ;
/* Find and stash the function pointer for the destruction function that
* needs to be eventually invoked from the map free path .
*/
if ( info - > type = = BPF_KPTR_REF ) {
const struct btf_type * dtor_func ;
const char * dtor_func_name ;
unsigned long addr ;
s32 dtor_btf_id ;
/* This call also serves as a whitelist of allowed objects that
* can be used as a referenced pointer and be stored in a map at
* the same time .
*/
dtor_btf_id = btf_find_dtor_kfunc ( kernel_btf , id ) ;
if ( dtor_btf_id < 0 ) {
ret = dtor_btf_id ;
goto end_btf ;
}
2021-07-14 17:54:10 -07:00
2022-11-04 00:39:56 +05:30
dtor_func = btf_type_by_id ( kernel_btf , dtor_btf_id ) ;
if ( ! dtor_func ) {
ret = - ENOENT ;
goto end_btf ;
}
2022-04-15 21:33:42 +05:30
2022-11-04 00:39:56 +05:30
if ( btf_is_module ( kernel_btf ) ) {
mod = btf_try_get_module ( kernel_btf ) ;
if ( ! mod ) {
ret = - ENXIO ;
goto end_btf ;
}
}
/* We already verified dtor_func to be btf_type_is_func
* in register_btf_id_dtor_kfuncs .
*/
dtor_func_name = __btf_name_by_offset ( kernel_btf , dtor_func - > name_off ) ;
addr = kallsyms_lookup_name ( dtor_func_name ) ;
if ( ! addr ) {
ret = - EINVAL ;
goto end_mod ;
}
field - > kptr . dtor = ( void * ) addr ;
}
field - > kptr . btf_id = id ;
field - > kptr . btf = kernel_btf ;
field - > kptr . module = mod ;
return 0 ;
end_mod :
module_put ( mod ) ;
end_btf :
btf_put ( kernel_btf ) ;
return ret ;
2021-07-14 17:54:10 -07:00
}
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
static int btf_parse_list_head ( const struct btf * btf , struct btf_field * field ,
struct btf_field_info * info )
{
const struct btf_type * t , * n = NULL ;
const struct btf_member * member ;
u32 offset ;
int i ;
t = btf_type_by_id ( btf , info - > list_head . value_btf_id ) ;
/* We've already checked that value_btf_id is a struct type. We
* just need to figure out the offset of the list_node , and
* verify its type .
*/
for_each_member ( i , t , member ) {
if ( strcmp ( info - > list_head . node_name , __btf_name_by_offset ( btf , member - > name_off ) ) )
continue ;
/* Invalid BTF, two members with same name */
if ( n )
return - EINVAL ;
n = btf_type_by_id ( btf , member - > type ) ;
if ( ! __btf_type_is_struct ( n ) )
return - EINVAL ;
if ( strcmp ( " bpf_list_node " , __btf_name_by_offset ( btf , n - > name_off ) ) )
return - EINVAL ;
offset = __btf_member_bit_offset ( n , member ) ;
if ( offset % 8 )
return - EINVAL ;
offset / = 8 ;
if ( offset % __alignof__ ( struct bpf_list_node ) )
return - EINVAL ;
field - > list_head . btf = ( struct btf * ) btf ;
field - > list_head . value_btf_id = info - > list_head . value_btf_id ;
field - > list_head . node_offset = offset ;
}
if ( ! n )
return - ENOENT ;
return 0 ;
}
2022-11-04 00:39:56 +05:30
struct btf_record * btf_parse_fields ( const struct btf * btf , const struct btf_type * t ,
u32 field_mask , u32 value_size )
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
{
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
struct btf_field_info info_arr [ BTF_FIELDS_MAX ] ;
struct btf_record * rec ;
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
u32 next_off = 0 ;
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
int ret , i , cnt ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
2022-11-04 00:39:56 +05:30
ret = btf_find_field ( btf , t , field_mask , info_arr , ARRAY_SIZE ( info_arr ) ) ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
if ( ret < 0 )
return ERR_PTR ( ret ) ;
if ( ! ret )
return NULL ;
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
cnt = ret ;
rec = kzalloc ( offsetof ( struct btf_record , fields [ cnt ] ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! rec )
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return ERR_PTR ( - ENOMEM ) ;
2022-11-04 00:39:56 +05:30
rec - > spin_lock_off = - EINVAL ;
rec - > timer_off = - EINVAL ;
for ( i = 0 ; i < cnt ; i + + ) {
if ( info_arr [ i ] . off + btf_field_type_size ( info_arr [ i ] . type ) > value_size ) {
WARN_ONCE ( 1 , " verifier bug off %d size %d " , info_arr [ i ] . off , value_size ) ;
ret = - EFAULT ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
goto end ;
}
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
if ( info_arr [ i ] . off < next_off ) {
ret = - EEXIST ;
goto end ;
}
next_off = info_arr [ i ] . off + btf_field_type_size ( info_arr [ i ] . type ) ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
rec - > field_mask | = info_arr [ i ] . type ;
rec - > fields [ i ] . offset = info_arr [ i ] . off ;
rec - > fields [ i ] . type = info_arr [ i ] . type ;
2022-11-04 00:39:56 +05:30
switch ( info_arr [ i ] . type ) {
case BPF_SPIN_LOCK :
WARN_ON_ONCE ( rec - > spin_lock_off > = 0 ) ;
/* Cache offset for faster lookup at runtime */
rec - > spin_lock_off = rec - > fields [ i ] . offset ;
break ;
case BPF_TIMER :
WARN_ON_ONCE ( rec - > timer_off > = 0 ) ;
/* Cache offset for faster lookup at runtime */
rec - > timer_off = rec - > fields [ i ] . offset ;
break ;
case BPF_KPTR_UNREF :
case BPF_KPTR_REF :
ret = btf_parse_kptr ( btf , & rec - > fields [ i ] , & info_arr [ i ] ) ;
if ( ret < 0 )
goto end ;
break ;
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
case BPF_LIST_HEAD :
ret = btf_parse_list_head ( btf , & rec - > fields [ i ] , & info_arr [ i ] ) ;
if ( ret < 0 )
goto end ;
break ;
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
case BPF_LIST_NODE :
break ;
2022-11-04 00:39:56 +05:30
default :
ret = - EFAULT ;
goto end ;
}
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
rec - > cnt + + ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
}
bpf: Support bpf_list_head in map values
Add the support on the map side to parse, recognize, verify, and build
metadata table for a new special field of the type struct bpf_list_head.
To parameterize the bpf_list_head for a certain value type and the
list_node member it will accept in that value type, we use BTF
declaration tags.
The definition of bpf_list_head in a map value will be done as follows:
struct foo {
struct bpf_list_node node;
int data;
};
struct map_value {
struct bpf_list_head head __contains(foo, node);
};
Then, the bpf_list_head only allows adding to the list 'head' using the
bpf_list_node 'node' for the type struct foo.
The 'contains' annotation is a BTF declaration tag composed of four
parts, "contains:name:node" where the name is then used to look up the
type in the map BTF, with its kind hardcoded to BTF_KIND_STRUCT during
the lookup. The node defines name of the member in this type that has
the type struct bpf_list_node, which is actually used for linking into
the linked list. For now, 'kind' part is hardcoded as struct.
This allows building intrusive linked lists in BPF, using container_of
to obtain pointer to entry, while being completely type safe from the
perspective of the verifier. The verifier knows exactly the type of the
nodes, and knows that list helpers return that type at some fixed offset
where the bpf_list_node member used for this list exists. The verifier
also uses this information to disallow adding types that are not
accepted by a certain list.
For now, no elements can be added to such lists. Support for that is
coming in future patches, hence draining and freeing items is done with
a TODO that will be resolved in a future patch.
Note that the bpf_list_head_free function moves the list out to a local
variable under the lock and releases it, doing the actual draining of
the list items outside the lock. While this helps with not holding the
lock for too long pessimizing other concurrent list operations, it is
also necessary for deadlock prevention: unless every function called in
the critical section would be notrace, a fentry/fexit program could
attach and call bpf_map_update_elem again on the map, leading to the
same lock being acquired if the key matches and lead to a deadlock.
While this requires some special effort on part of the BPF programmer to
trigger and is highly unlikely to occur in practice, it is always better
if we can avoid such a condition.
While notrace would prevent this, doing the draining outside the lock
has advantages of its own, hence it is used to also fix the deadlock
related problem.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221114191547.1694267-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-15 00:45:25 +05:30
/* bpf_list_head requires bpf_spin_lock */
if ( btf_record_has_field ( rec , BPF_LIST_HEAD ) & & rec - > spin_lock_off < 0 ) {
ret = - EINVAL ;
goto end ;
}
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
return rec ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
end :
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
btf_record_free ( rec ) ;
bpf: Allow storing unreferenced kptr in map
This commit introduces a new pointer type 'kptr' which can be embedded
in a map value to hold a PTR_TO_BTF_ID stored by a BPF program during
its invocation. When storing such a kptr, BPF program's PTR_TO_BTF_ID
register must have the same type as in the map value's BTF, and loading
a kptr marks the destination register as PTR_TO_BTF_ID with the correct
kernel BTF and BTF ID.
Such kptr are unreferenced, i.e. by the time another invocation of the
BPF program loads this pointer, the object which the pointer points to
may not longer exist. Since PTR_TO_BTF_ID loads (using BPF_LDX) are
patched to PROBE_MEM loads by the verifier, it would safe to allow user
to still access such invalid pointer, but passing such pointers into
BPF helpers and kfuncs should not be permitted. A future patch in this
series will close this gap.
The flexibility offered by allowing programs to dereference such invalid
pointers while being safe at runtime frees the verifier from doing
complex lifetime tracking. As long as the user may ensure that the
object remains valid, it can ensure data read by it from the kernel
object is valid.
The user indicates that a certain pointer must be treated as kptr
capable of accepting stores of PTR_TO_BTF_ID of a certain type, by using
a BTF type tag 'kptr' on the pointed to type of the pointer. Then, this
information is recorded in the object BTF which will be passed into the
kernel by way of map's BTF information. The name and kind from the map
value BTF is used to look up the in-kernel type, and the actual BTF and
BTF ID is recorded in the map struct in a new kptr_off_tab member. For
now, only storing pointers to structs is permitted.
An example of this specification is shown below:
#define __kptr __attribute__((btf_type_tag("kptr")))
struct map_value {
...
struct task_struct __kptr *task;
...
};
Then, in a BPF program, user may store PTR_TO_BTF_ID with the type
task_struct into the map, and then load it later.
Note that the destination register is marked PTR_TO_BTF_ID_OR_NULL, as
the verifier cannot know whether the value is NULL or not statically, it
must treat all potential loads at that map value offset as loading a
possibly NULL pointer.
Only BPF_LDX, BPF_STX, and BPF_ST (with insn->imm = 0 to denote NULL)
are allowed instructions that can access such a pointer. On BPF_LDX, the
destination register is updated to be a PTR_TO_BTF_ID, and on BPF_STX,
it is checked whether the source register type is a PTR_TO_BTF_ID with
same BTF type as specified in the map BTF. The access size must always
be BPF_DW.
For the map in map support, the kptr_off_tab for outer map is copied
from the inner map's kptr_off_tab. It was chosen to do a deep copy
instead of introducing a refcount to kptr_off_tab, because the copy only
needs to be done when paramterizing using inner_map_fd in the map in map
case, hence would be unnecessary for all other users.
It is not permitted to use MAP_FREEZE command and mmap for BPF map
having kptrs, similar to the bpf_timer case. A kptr also requires that
BPF program has both read and write access to the map (hence both
BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG are disallowed).
Note that check_map_access must be called from both
check_helper_mem_access and for the BPF instructions, hence the kptr
check must distinguish between ACCESS_DIRECT and ACCESS_HELPER, and
reject ACCESS_HELPER cases. We rename stack_access_src to bpf_access_src
and reuse it for this purpose.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-2-memxor@gmail.com
2022-04-25 03:18:49 +05:30
return ERR_PTR ( ret ) ;
}
2022-11-18 07:25:57 +05:30
int btf_check_and_fixup_fields ( const struct btf * btf , struct btf_record * rec )
{
int i ;
/* There are two owning types, kptr_ref and bpf_list_head. The former
* only supports storing kernel types , which can never store references
* to program allocated local types , atleast not yet . Hence we only need
* to ensure that bpf_list_head ownership does not form cycles .
*/
if ( IS_ERR_OR_NULL ( rec ) | | ! ( rec - > field_mask & BPF_LIST_HEAD ) )
return 0 ;
for ( i = 0 ; i < rec - > cnt ; i + + ) {
struct btf_struct_meta * meta ;
u32 btf_id ;
if ( ! ( rec - > fields [ i ] . type & BPF_LIST_HEAD ) )
continue ;
btf_id = rec - > fields [ i ] . list_head . value_btf_id ;
meta = btf_find_struct_meta ( btf , btf_id ) ;
if ( ! meta )
return - EFAULT ;
rec - > fields [ i ] . list_head . value_rec = meta - > record ;
if ( ! ( rec - > field_mask & BPF_LIST_NODE ) )
continue ;
/* We need to ensure ownership acyclicity among all types. The
* proper way to do it would be to topologically sort all BTF
* IDs based on the ownership edges , since there can be multiple
* bpf_list_head in a type . Instead , we use the following
* reasoning :
*
* - A type can only be owned by another type in user BTF if it
* has a bpf_list_node .
* - A type can only _own_ another type in user BTF if it has a
* bpf_list_head .
*
* We ensure that if a type has both bpf_list_head and
* bpf_list_node , its element types cannot be owning types .
*
* To ensure acyclicity :
*
* When A only has bpf_list_head , ownership chain can be :
* A - > B - > C
* Where :
* - B has both bpf_list_head and bpf_list_node .
* - C only has bpf_list_node .
*
* When A has both bpf_list_head and bpf_list_node , some other
* type already owns it in the BTF domain , hence it can not own
* another owning type through any of the bpf_list_head edges .
* A - > B
* Where :
* - B only has bpf_list_node .
*/
if ( meta - > record - > field_mask & BPF_LIST_HEAD )
return - ELOOP ;
}
return 0 ;
}
2022-11-04 00:39:57 +05:30
static int btf_field_offs_cmp ( const void * _a , const void * _b , const void * priv )
{
const u32 a = * ( const u32 * ) _a ;
const u32 b = * ( const u32 * ) _b ;
if ( a < b )
return - 1 ;
else if ( a > b )
return 1 ;
return 0 ;
}
static void btf_field_offs_swap ( void * _a , void * _b , int size , const void * priv )
{
struct btf_field_offs * foffs = ( void * ) priv ;
u32 * off_base = foffs - > field_off ;
u32 * a = _a , * b = _b ;
u8 * sz_a , * sz_b ;
sz_a = foffs - > field_sz + ( a - off_base ) ;
sz_b = foffs - > field_sz + ( b - off_base ) ;
swap ( * a , * b ) ;
swap ( * sz_a , * sz_b ) ;
}
struct btf_field_offs * btf_parse_field_offs ( struct btf_record * rec )
{
struct btf_field_offs * foffs ;
u32 i , * off ;
u8 * sz ;
BUILD_BUG_ON ( ARRAY_SIZE ( foffs - > field_off ) ! = ARRAY_SIZE ( foffs - > field_sz ) ) ;
2022-11-15 00:45:23 +05:30
if ( IS_ERR_OR_NULL ( rec ) )
2022-11-04 00:39:57 +05:30
return NULL ;
foffs = kzalloc ( sizeof ( * foffs ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! foffs )
return ERR_PTR ( - ENOMEM ) ;
off = foffs - > field_off ;
sz = foffs - > field_sz ;
for ( i = 0 ; i < rec - > cnt ; i + + ) {
off [ i ] = rec - > fields [ i ] . offset ;
sz [ i ] = btf_field_type_size ( rec - > fields [ i ] . type ) ;
}
foffs - > cnt = rec - > cnt ;
if ( foffs - > cnt = = 1 )
return foffs ;
sort_r ( foffs - > field_off , foffs - > cnt , sizeof ( foffs - > field_off [ 0 ] ) ,
btf_field_offs_cmp , btf_field_offs_swap , foffs ) ;
return foffs ;
}
2020-09-28 12:31:04 +01:00
static void __btf_struct_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
const struct btf_member * member ;
2020-09-28 12:31:04 +01:00
void * safe_data ;
2018-04-18 15:56:00 -07:00
u32 i ;
2020-09-28 12:31:04 +01:00
safe_data = btf_show_start_struct_type ( show , t , type_id , data ) ;
if ( ! safe_data )
return ;
2018-04-18 15:56:00 -07:00
for_each_member ( i , t , member ) {
const struct btf_type * member_type = btf_type_by_id ( btf ,
member - > type ) ;
const struct btf_kind_operations * ops ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
u32 member_offset , bitfield_size ;
u32 bytes_offset ;
u8 bits8_offset ;
2018-04-18 15:56:00 -07:00
2020-09-28 12:31:04 +01:00
btf_show_start_member ( show , member ) ;
2018-04-18 15:56:00 -07:00
2021-12-01 10:10:25 -08:00
member_offset = __btf_member_bit_offset ( t , member ) ;
bitfield_size = __btf_member_bitfield_size ( t , member ) ;
2019-01-10 11:14:00 -08:00
bytes_offset = BITS_ROUNDDOWN_BYTES ( member_offset ) ;
bits8_offset = BITS_PER_BYTE_MASKED ( member_offset ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( bitfield_size ) {
2020-09-28 12:31:04 +01:00
safe_data = btf_show_start_type ( show , member_type ,
member - > type ,
data + bytes_offset ) ;
if ( safe_data )
btf_bitfield_show ( safe_data ,
bits8_offset ,
bitfield_size , show ) ;
btf_show_end_type ( show ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
} else {
ops = btf_type_ops ( member_type ) ;
2020-09-28 12:31:04 +01:00
ops - > show ( btf , member_type , member - > type ,
data + bytes_offset , bits8_offset , show ) ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
}
2020-09-28 12:31:04 +01:00
btf_show_end_member ( show ) ;
2018-04-18 15:56:00 -07:00
}
2020-09-28 12:31:04 +01:00
btf_show_end_struct_type ( show ) ;
}
static void btf_struct_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
{
const struct btf_member * m = show - > state . member ;
/*
* First check if any members would be shown ( are non - zero ) .
* See comments above " struct btf_show " definition for more
* details on how this works at a high - level .
*/
if ( show - > state . depth > 0 & & ! ( show - > flags & BTF_SHOW_ZERO ) ) {
if ( ! show - > state . depth_check ) {
show - > state . depth_check = show - > state . depth + 1 ;
show - > state . depth_to_show = 0 ;
}
__btf_struct_show ( btf , t , type_id , data , bits_offset , show ) ;
/* Restore saved member data here */
show - > state . member = m ;
if ( show - > state . depth_check ! = show - > state . depth + 1 )
return ;
show - > state . depth_check = 0 ;
if ( show - > state . depth_to_show < = show - > state . depth )
return ;
/*
* Reaching here indicates we have recursed and found
* non - zero child values .
*/
}
__btf_struct_show ( btf , t , type_id , data , bits_offset , show ) ;
2018-04-18 15:56:00 -07:00
}
2018-04-18 15:55:57 -07:00
static struct btf_kind_operations struct_ops = {
. check_meta = btf_struct_check_meta ,
2018-04-18 15:55:58 -07:00
. resolve = btf_struct_resolve ,
2018-04-18 15:55:59 -07:00
. check_member = btf_struct_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_generic_check_kflag_member ,
2018-04-18 15:55:57 -07:00
. log_details = btf_struct_log ,
2020-09-28 12:31:04 +01:00
. show = btf_struct_show ,
2018-04-18 15:55:57 -07:00
} ;
2018-04-18 15:55:59 -07:00
static int btf_enum_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u32 struct_bits_off = member - > offset ;
u32 struct_size , bytes_offset ;
if ( BITS_PER_BYTE_MASKED ( struct_bits_off ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member is not byte aligned " ) ;
return - EINVAL ;
}
struct_size = struct_type - > size ;
bytes_offset = BITS_ROUNDDOWN_BYTES ( struct_bits_off ) ;
2020-03-10 16:32:29 +09:00
if ( struct_size - bytes_offset < member_type - > size ) {
2018-04-18 15:55:59 -07:00
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
static int btf_enum_check_kflag_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u32 struct_bits_off , nr_bits , bytes_end , struct_size ;
u32 int_bitsize = sizeof ( int ) * BITS_PER_BYTE ;
struct_bits_off = BTF_MEMBER_BIT_OFFSET ( member - > offset ) ;
nr_bits = BTF_MEMBER_BITFIELD_SIZE ( member - > offset ) ;
if ( ! nr_bits ) {
if ( BITS_PER_BYTE_MASKED ( struct_bits_off ) ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member is not byte aligned " ) ;
2019-09-25 10:38:35 +01:00
return - EINVAL ;
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
}
nr_bits = int_bitsize ;
} else if ( nr_bits > int_bitsize ) {
btf_verifier_log_member ( env , struct_type , member ,
" Invalid member bitfield_size " ) ;
return - EINVAL ;
}
struct_size = struct_type - > size ;
bytes_end = BITS_ROUNDUP_BYTES ( struct_bits_off + nr_bits ) ;
if ( struct_size < bytes_end ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
2018-04-18 15:55:57 -07:00
static s32 btf_enum_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
const struct btf_enum * enums = btf_type_enum ( t ) ;
struct btf * btf = env - > btf ;
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
const char * fmt_str ;
2018-04-18 15:55:57 -07:00
u16 i , nr_enums ;
u32 meta_needed ;
nr_enums = btf_type_vlen ( t ) ;
meta_needed = nr_enums * sizeof ( * enums ) ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
2019-09-17 10:45:37 -07:00
if ( t - > size > 8 | | ! is_power_of_2 ( t - > size ) ) {
btf_verifier_log_type ( env , t , " Unexpected size " ) ;
2018-04-18 15:55:57 -07:00
return - EINVAL ;
}
2018-11-27 13:23:28 -08:00
/* enum type either no name or a valid one */
if ( t - > name_off & &
! btf_name_valid_identifier ( env - > btf , t - > name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2018-04-18 15:55:57 -07:00
btf_verifier_log_type ( env , t , NULL ) ;
for ( i = 0 ; i < nr_enums ; i + + ) {
2018-04-21 09:48:23 -07:00
if ( ! btf_name_offset_valid ( btf , enums [ i ] . name_off ) ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log ( env , " \t Invalid name_offset:%u " ,
2018-04-21 09:48:23 -07:00
enums [ i ] . name_off ) ;
2018-04-18 15:55:57 -07:00
return - EINVAL ;
}
2018-11-27 13:23:28 -08:00
/* enum member must have a valid name */
if ( ! enums [ i ] . name_off | |
! btf_name_valid_identifier ( btf , enums [ i ] . name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2019-10-15 20:24:57 -07:00
if ( env - > log . level = = BPF_LOG_KERNEL )
continue ;
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
fmt_str = btf_type_kflag ( t ) ? " \t %s val=%d \n " : " \t %s val=%u \n " ;
btf_verifier_log ( env , fmt_str ,
2018-12-13 10:41:46 -08:00
__btf_name_by_offset ( btf , enums [ i ] . name_off ) ,
2018-04-18 15:55:57 -07:00
enums [ i ] . val ) ;
}
return meta_needed ;
}
static void btf_enum_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
btf_verifier_log ( env , " size=%u vlen=%u " , t - > size , btf_type_vlen ( t ) ) ;
}
2020-09-28 12:31:04 +01:00
static void btf_enum_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
2018-04-18 15:56:00 -07:00
{
const struct btf_enum * enums = btf_type_enum ( t ) ;
u32 i , nr_enums = btf_type_vlen ( t ) ;
2020-09-28 12:31:04 +01:00
void * safe_data ;
int v ;
safe_data = btf_show_start_type ( show , t , type_id , data ) ;
if ( ! safe_data )
return ;
v = * ( int * ) safe_data ;
2018-04-18 15:56:00 -07:00
for ( i = 0 ; i < nr_enums ; i + + ) {
2020-09-28 12:31:04 +01:00
if ( v ! = enums [ i ] . val )
continue ;
btf_show_type_value ( show , " %s " ,
__btf_name_by_offset ( btf ,
enums [ i ] . name_off ) ) ;
btf_show_end_type ( show ) ;
return ;
2018-04-18 15:56:00 -07:00
}
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
if ( btf_type_kflag ( t ) )
btf_show_type_value ( show , " %d " , v ) ;
else
btf_show_type_value ( show , " %u " , v ) ;
2020-09-28 12:31:04 +01:00
btf_show_end_type ( show ) ;
2018-04-18 15:56:00 -07:00
}
2018-04-18 15:55:57 -07:00
static struct btf_kind_operations enum_ops = {
. check_meta = btf_enum_check_meta ,
2018-04-18 15:55:58 -07:00
. resolve = btf_df_resolve ,
2018-04-18 15:55:59 -07:00
. check_member = btf_enum_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_enum_check_kflag_member ,
2018-04-18 15:55:57 -07:00
. log_details = btf_enum_log ,
2020-09-28 12:31:04 +01:00
. show = btf_enum_show ,
2018-04-18 15:55:57 -07:00
} ;
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
static s32 btf_enum64_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
const struct btf_enum64 * enums = btf_type_enum64 ( t ) ;
struct btf * btf = env - > btf ;
const char * fmt_str ;
u16 i , nr_enums ;
u32 meta_needed ;
nr_enums = btf_type_vlen ( t ) ;
meta_needed = nr_enums * sizeof ( * enums ) ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
if ( t - > size > 8 | | ! is_power_of_2 ( t - > size ) ) {
btf_verifier_log_type ( env , t , " Unexpected size " ) ;
return - EINVAL ;
}
/* enum type either no name or a valid one */
if ( t - > name_off & &
! btf_name_valid_identifier ( env - > btf , t - > name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
btf_verifier_log_type ( env , t , NULL ) ;
for ( i = 0 ; i < nr_enums ; i + + ) {
if ( ! btf_name_offset_valid ( btf , enums [ i ] . name_off ) ) {
btf_verifier_log ( env , " \t Invalid name_offset:%u " ,
enums [ i ] . name_off ) ;
return - EINVAL ;
}
/* enum member must have a valid name */
if ( ! enums [ i ] . name_off | |
! btf_name_valid_identifier ( btf , enums [ i ] . name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
if ( env - > log . level = = BPF_LOG_KERNEL )
continue ;
fmt_str = btf_type_kflag ( t ) ? " \t %s val=%lld \n " : " \t %s val=%llu \n " ;
btf_verifier_log ( env , fmt_str ,
__btf_name_by_offset ( btf , enums [ i ] . name_off ) ,
btf_enum64_value ( enums + i ) ) ;
}
return meta_needed ;
}
static void btf_enum64_show ( const struct btf * btf , const struct btf_type * t ,
u32 type_id , void * data , u8 bits_offset ,
struct btf_show * show )
{
const struct btf_enum64 * enums = btf_type_enum64 ( t ) ;
u32 i , nr_enums = btf_type_vlen ( t ) ;
void * safe_data ;
s64 v ;
safe_data = btf_show_start_type ( show , t , type_id , data ) ;
if ( ! safe_data )
return ;
v = * ( u64 * ) safe_data ;
for ( i = 0 ; i < nr_enums ; i + + ) {
if ( v ! = btf_enum64_value ( enums + i ) )
continue ;
btf_show_type_value ( show , " %s " ,
__btf_name_by_offset ( btf ,
enums [ i ] . name_off ) ) ;
btf_show_end_type ( show ) ;
return ;
}
if ( btf_type_kflag ( t ) )
btf_show_type_value ( show , " %lld " , v ) ;
else
btf_show_type_value ( show , " %llu " , v ) ;
btf_show_end_type ( show ) ;
}
static struct btf_kind_operations enum64_ops = {
. check_meta = btf_enum64_check_meta ,
. resolve = btf_df_resolve ,
. check_member = btf_enum_check_member ,
. check_kflag_member = btf_enum_check_kflag_member ,
. log_details = btf_enum_log ,
. show = btf_enum64_show ,
} ;
2018-11-19 15:29:08 -08:00
static s32 btf_func_proto_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
u32 meta_needed = btf_type_vlen ( t ) * sizeof ( struct btf_param ) ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
if ( t - > name_off ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
2018-11-19 15:29:08 -08:00
btf_verifier_log_type ( env , t , NULL ) ;
return meta_needed ;
}
static void btf_func_proto_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
const struct btf_param * args = ( const struct btf_param * ) ( t + 1 ) ;
u16 nr_args = btf_type_vlen ( t ) , i ;
btf_verifier_log ( env , " return=%u args=( " , t - > type ) ;
if ( ! nr_args ) {
btf_verifier_log ( env , " void " ) ;
goto done ;
}
if ( nr_args = = 1 & & ! args [ 0 ] . type ) {
/* Only one vararg */
btf_verifier_log ( env , " vararg " ) ;
goto done ;
}
btf_verifier_log ( env , " %u %s " , args [ 0 ] . type ,
2018-12-13 10:41:46 -08:00
__btf_name_by_offset ( env - > btf ,
args [ 0 ] . name_off ) ) ;
2018-11-19 15:29:08 -08:00
for ( i = 1 ; i < nr_args - 1 ; i + + )
btf_verifier_log ( env , " , %u %s " , args [ i ] . type ,
2018-12-13 10:41:46 -08:00
__btf_name_by_offset ( env - > btf ,
args [ i ] . name_off ) ) ;
2018-11-19 15:29:08 -08:00
if ( nr_args > 1 ) {
const struct btf_param * last_arg = & args [ nr_args - 1 ] ;
if ( last_arg - > type )
btf_verifier_log ( env , " , %u %s " , last_arg - > type ,
2018-12-13 10:41:46 -08:00
__btf_name_by_offset ( env - > btf ,
last_arg - > name_off ) ) ;
2018-11-19 15:29:08 -08:00
else
btf_verifier_log ( env , " , vararg " ) ;
}
done :
btf_verifier_log ( env , " ) " ) ;
}
static struct btf_kind_operations func_proto_ops = {
. check_meta = btf_func_proto_check_meta ,
. resolve = btf_df_resolve ,
/*
* BTF_KIND_FUNC_PROTO cannot be directly referred by
* a struct ' s member .
*
2021-05-25 10:56:59 +08:00
* It should be a function pointer instead .
2018-11-19 15:29:08 -08:00
* ( i . e . struct ' s member - > BTF_KIND_PTR - > BTF_KIND_FUNC_PROTO )
*
* Hence , there is no btf_func_check_member ( ) .
*/
. check_member = btf_df_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_df_check_kflag_member ,
2018-11-19 15:29:08 -08:00
. log_details = btf_func_proto_log ,
2020-09-28 12:31:04 +01:00
. show = btf_df_show ,
2018-11-19 15:29:08 -08:00
} ;
static s32 btf_func_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
if ( ! t - > name_off | |
! btf_name_valid_identifier ( env - > btf , t - > name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
2020-01-09 22:41:20 -08:00
if ( btf_type_vlen ( t ) > BTF_FUNC_GLOBAL ) {
btf_verifier_log_type ( env , t , " Invalid func linkage " ) ;
2018-11-19 15:29:08 -08:00
return - EINVAL ;
}
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
2018-11-19 15:29:08 -08:00
btf_verifier_log_type ( env , t , NULL ) ;
return 0 ;
}
2022-02-03 11:17:27 -08:00
static int btf_func_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
const struct btf_type * t = v - > t ;
u32 next_type_id = t - > type ;
int err ;
err = btf_func_check ( env , t ) ;
if ( err )
return err ;
env_stack_pop_resolved ( env , next_type_id , 0 ) ;
return 0 ;
}
2018-11-19 15:29:08 -08:00
static struct btf_kind_operations func_ops = {
. check_meta = btf_func_check_meta ,
2022-02-03 11:17:27 -08:00
. resolve = btf_func_resolve ,
2018-11-19 15:29:08 -08:00
. check_member = btf_df_check_member ,
bpf: btf: fix struct/union/fwd types with kind_flag
This patch fixed two issues with BTF. One is related to
struct/union bitfield encoding and the other is related to
forward type.
Issue #1 and solution:
======================
Current btf encoding of bitfield follows what pahole generates.
For each bitfield, pahole will duplicate the type chain and
put the bitfield size at the final int or enum type.
Since the BTF enum type cannot encode bit size,
pahole workarounds the issue by generating
an int type whenever the enum bit size is not 32.
For example,
-bash-4.4$ cat t.c
typedef int ___int;
enum A { A1, A2, A3 };
struct t {
int a[5];
___int b:4;
volatile enum A c:4;
} g;
-bash-4.4$ gcc -c -O2 -g t.c
The current kernel supports the following BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t size=24 vlen=3
a type_id=5 bits_offset=0
b type_id=9 bits_offset=160
c type_id=11 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
[8] INT int size=1 bit_offset=0 nr_bits=4 encoding=(none)
[9] TYPEDEF ___int type_id=8
[10] INT (anon) size=1 bit_offset=0 nr_bits=4 encoding=SIGNED
[11] VOLATILE (anon) type_id=10
Two issues are in the above:
. by changing enum type to int, we lost the original
type information and this will not be ideal later
when we try to convert BTF to a header file.
. the type duplication for bitfields will cause
BTF bloat. Duplicated types cannot be deduplicated
later if the bitfield size is different.
To fix this issue, this patch implemented a compatible
change for BTF struct type encoding:
. the bit 31 of struct_type->info, previously reserved,
now is used to indicate whether bitfield_size is
encoded in btf_member or not.
. if bit 31 of struct_type->info is set,
btf_member->offset will encode like:
bit 0 - 23: bit offset
bit 24 - 31: bitfield size
if bit 31 is not set, the old behavior is preserved:
bit 0 - 31: bit offset
So if the struct contains a bit field, the maximum bit offset
will be reduced to (2^24 - 1) instead of MAX_UINT. The maximum
bitfield size will be 256 which is enough for today as maximum
bitfield in compiler can be 128 where int128 type is supported.
This kernel patch intends to support the new BTF encoding:
$ pahole -JV t.o
[1] TYPEDEF ___int type_id=2
[2] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[3] ENUM A size=4 vlen=3
A1 val=0
A2 val=1
A3 val=2
[4] STRUCT t kind_flag=1 size=24 vlen=3
a type_id=5 bitfield_size=0 bits_offset=0
b type_id=1 bitfield_size=4 bits_offset=160
c type_id=7 bitfield_size=4 bits_offset=164
[5] ARRAY (anon) type_id=2 index_type_id=2 nr_elems=5
[6] INT sizetype size=8 bit_offset=0 nr_bits=64 encoding=(none)
[7] VOLATILE (anon) type_id=3
Issue #2 and solution:
======================
Current forward type in BTF does not specify whether the original
type is struct or union. This will not work for type pretty print
and BTF-to-header-file conversion as struct/union must be specified.
$ cat tt.c
struct t;
union u;
int foo(struct t *t, union u *u) { return 0; }
$ gcc -c -g -O2 tt.c
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t type_id=0
[3] PTR (anon) type_id=2
[4] FWD u type_id=0
[5] PTR (anon) type_id=4
To fix this issue, similar to issue #1, type->info bit 31
is used. If the bit is set, it is union type. Otherwise, it is
a struct type.
$ pahole -JV tt.o
[1] INT int size=4 bit_offset=0 nr_bits=32 encoding=SIGNED
[2] FWD t kind_flag=0 type_id=0
[3] PTR (anon) kind_flag=0 type_id=2
[4] FWD u kind_flag=1 type_id=0
[5] PTR (anon) kind_flag=0 type_id=4
Pahole/LLVM change:
===================
The new kind_flag functionality has been implemented in pahole
and llvm:
https://github.com/yonghong-song/pahole/tree/bitfield
https://github.com/yonghong-song/llvm/tree/bitfield
Note that pahole hasn't implemented func/func_proto kind
and .BTF.ext. So to print function signature with bpftool,
the llvm compiler should be used.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-12-15 22:13:51 -08:00
. check_kflag_member = btf_df_check_kflag_member ,
2018-11-19 15:29:08 -08:00
. log_details = btf_ref_type_log ,
2020-09-28 12:31:04 +01:00
. show = btf_df_show ,
2018-11-19 15:29:08 -08:00
} ;
2019-04-09 23:20:09 +02:00
static s32 btf_var_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
const struct btf_var * var ;
u32 meta_needed = sizeof ( * var ) ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
if ( btf_type_vlen ( t ) ) {
btf_verifier_log_type ( env , t , " vlen != 0 " ) ;
return - EINVAL ;
}
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
if ( ! t - > name_off | |
! __btf_name_valid ( env - > btf , t - > name_off , true ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
/* A var cannot be in type void */
if ( ! t - > type | | ! BTF_TYPE_ID_VALID ( t - > type ) ) {
btf_verifier_log_type ( env , t , " Invalid type_id " ) ;
return - EINVAL ;
}
var = btf_type_var ( t ) ;
if ( var - > linkage ! = BTF_VAR_STATIC & &
var - > linkage ! = BTF_VAR_GLOBAL_ALLOCATED ) {
btf_verifier_log_type ( env , t , " Linkage not supported " ) ;
return - EINVAL ;
}
btf_verifier_log_type ( env , t , NULL ) ;
return meta_needed ;
}
static void btf_var_log ( struct btf_verifier_env * env , const struct btf_type * t )
{
const struct btf_var * var = btf_type_var ( t ) ;
btf_verifier_log ( env , " type_id=%u linkage=%u " , t - > type , var - > linkage ) ;
}
static const struct btf_kind_operations var_ops = {
. check_meta = btf_var_check_meta ,
. resolve = btf_var_resolve ,
. check_member = btf_df_check_member ,
. check_kflag_member = btf_df_check_kflag_member ,
. log_details = btf_var_log ,
2020-09-28 12:31:04 +01:00
. show = btf_var_show ,
2019-04-09 23:20:09 +02:00
} ;
static s32 btf_datasec_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
const struct btf_var_secinfo * vsi ;
u64 last_vsi_end_off = 0 , sum = 0 ;
u32 i , meta_needed ;
meta_needed = btf_type_vlen ( t ) * sizeof ( * vsi ) ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
if ( ! t - > size ) {
btf_verifier_log_type ( env , t , " size == 0 " ) ;
return - EINVAL ;
}
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
if ( ! t - > name_off | |
! btf_name_valid_section ( env - > btf , t - > name_off ) ) {
btf_verifier_log_type ( env , t , " Invalid name " ) ;
return - EINVAL ;
}
btf_verifier_log_type ( env , t , NULL ) ;
for_each_vsi ( i , t , vsi ) {
/* A var cannot be in type void */
if ( ! vsi - > type | | ! BTF_TYPE_ID_VALID ( vsi - > type ) ) {
btf_verifier_log_vsi ( env , t , vsi ,
" Invalid type_id " ) ;
return - EINVAL ;
}
if ( vsi - > offset < last_vsi_end_off | | vsi - > offset > = t - > size ) {
btf_verifier_log_vsi ( env , t , vsi ,
" Invalid offset " ) ;
return - EINVAL ;
}
if ( ! vsi - > size | | vsi - > size > t - > size ) {
btf_verifier_log_vsi ( env , t , vsi ,
" Invalid size " ) ;
return - EINVAL ;
}
last_vsi_end_off = vsi - > offset + vsi - > size ;
if ( last_vsi_end_off > t - > size ) {
btf_verifier_log_vsi ( env , t , vsi ,
" Invalid offset+size " ) ;
return - EINVAL ;
}
btf_verifier_log_vsi ( env , t , vsi , NULL ) ;
sum + = vsi - > size ;
}
if ( t - > size < sum ) {
btf_verifier_log_type ( env , t , " Invalid btf_info size " ) ;
return - EINVAL ;
}
return meta_needed ;
}
static int btf_datasec_resolve ( struct btf_verifier_env * env ,
const struct resolve_vertex * v )
{
const struct btf_var_secinfo * vsi ;
struct btf * btf = env - > btf ;
u16 i ;
for_each_vsi_from ( i , v - > next_member , v - > t , vsi ) {
u32 var_type_id = vsi - > type , type_id , type_size = 0 ;
const struct btf_type * var_type = btf_type_by_id ( env - > btf ,
var_type_id ) ;
if ( ! var_type | | ! btf_type_is_var ( var_type ) ) {
btf_verifier_log_vsi ( env , v - > t , vsi ,
" Not a VAR kind member " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , var_type ) & &
! env_type_is_resolved ( env , var_type_id ) ) {
env_stack_set_next_member ( env , i + 1 ) ;
return env_stack_push ( env , var_type , var_type_id ) ;
}
type_id = var_type - > type ;
if ( ! btf_type_id_size ( btf , & type_id , & type_size ) ) {
btf_verifier_log_vsi ( env , v - > t , vsi , " Invalid type " ) ;
return - EINVAL ;
}
if ( vsi - > size < type_size ) {
btf_verifier_log_vsi ( env , v - > t , vsi , " Invalid size " ) ;
return - EINVAL ;
}
}
env_stack_pop_resolved ( env , 0 , 0 ) ;
return 0 ;
}
static void btf_datasec_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
btf_verifier_log ( env , " size=%u vlen=%u " , t - > size , btf_type_vlen ( t ) ) ;
}
2020-09-28 12:31:04 +01:00
static void btf_datasec_show ( const struct btf * btf ,
const struct btf_type * t , u32 type_id ,
void * data , u8 bits_offset ,
struct btf_show * show )
2019-04-09 23:20:09 +02:00
{
const struct btf_var_secinfo * vsi ;
const struct btf_type * var ;
u32 i ;
2020-09-28 12:31:04 +01:00
if ( ! btf_show_start_type ( show , t , type_id , data ) )
return ;
btf_show_type_value ( show , " section ( \" %s \" ) = { " ,
__btf_name_by_offset ( btf , t - > name_off ) ) ;
2019-04-09 23:20:09 +02:00
for_each_vsi ( i , t , vsi ) {
var = btf_type_by_id ( btf , vsi - > type ) ;
if ( i )
2020-09-28 12:31:04 +01:00
btf_show ( show , " , " ) ;
btf_type_ops ( var ) - > show ( btf , var , vsi - > type ,
data + vsi - > offset , bits_offset , show ) ;
2019-04-09 23:20:09 +02:00
}
2020-09-28 12:31:04 +01:00
btf_show_end_type ( show ) ;
2019-04-09 23:20:09 +02:00
}
static const struct btf_kind_operations datasec_ops = {
. check_meta = btf_datasec_check_meta ,
. resolve = btf_datasec_resolve ,
. check_member = btf_df_check_member ,
. check_kflag_member = btf_df_check_kflag_member ,
. log_details = btf_datasec_log ,
2020-09-28 12:31:04 +01:00
. show = btf_datasec_show ,
2019-04-09 23:20:09 +02:00
} ;
2021-02-26 21:22:52 +01:00
static s32 btf_float_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
if ( btf_type_vlen ( t ) ) {
btf_verifier_log_type ( env , t , " vlen != 0 " ) ;
return - EINVAL ;
}
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
if ( t - > size ! = 2 & & t - > size ! = 4 & & t - > size ! = 8 & & t - > size ! = 12 & &
t - > size ! = 16 ) {
btf_verifier_log_type ( env , t , " Invalid type_size " ) ;
return - EINVAL ;
}
btf_verifier_log_type ( env , t , NULL ) ;
return 0 ;
}
static int btf_float_check_member ( struct btf_verifier_env * env ,
const struct btf_type * struct_type ,
const struct btf_member * member ,
const struct btf_type * member_type )
{
u64 start_offset_bytes ;
u64 end_offset_bytes ;
u64 misalign_bits ;
u64 align_bytes ;
u64 align_bits ;
/* Different architectures have different alignment requirements, so
* here we check only for the reasonable minimum . This way we ensure
* that types after CO - RE can pass the kernel BTF verifier .
*/
align_bytes = min_t ( u64 , sizeof ( void * ) , member_type - > size ) ;
align_bits = align_bytes * BITS_PER_BYTE ;
div64_u64_rem ( member - > offset , align_bits , & misalign_bits ) ;
if ( misalign_bits ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member is not properly aligned " ) ;
return - EINVAL ;
}
start_offset_bytes = member - > offset / BITS_PER_BYTE ;
end_offset_bytes = start_offset_bytes + member_type - > size ;
if ( end_offset_bytes > struct_type - > size ) {
btf_verifier_log_member ( env , struct_type , member ,
" Member exceeds struct_size " ) ;
return - EINVAL ;
}
return 0 ;
}
static void btf_float_log ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
btf_verifier_log ( env , " size=%u " , t - > size ) ;
}
static const struct btf_kind_operations float_ops = {
. check_meta = btf_float_check_meta ,
. resolve = btf_df_resolve ,
. check_member = btf_float_check_member ,
. check_kflag_member = btf_generic_check_kflag_member ,
. log_details = btf_float_log ,
. show = btf_df_show ,
} ;
2021-10-12 09:48:38 -07:00
static s32 btf_decl_tag_check_meta ( struct btf_verifier_env * env ,
2021-09-14 15:30:15 -07:00
const struct btf_type * t ,
u32 meta_left )
{
2021-10-12 09:48:38 -07:00
const struct btf_decl_tag * tag ;
2021-09-14 15:30:15 -07:00
u32 meta_needed = sizeof ( * tag ) ;
s32 component_idx ;
const char * value ;
if ( meta_left < meta_needed ) {
btf_verifier_log_basic ( env , t ,
" meta_left:%u meta_needed:%u " ,
meta_left , meta_needed ) ;
return - EINVAL ;
}
value = btf_name_by_offset ( env - > btf , t - > name_off ) ;
if ( ! value | | ! value [ 0 ] ) {
btf_verifier_log_type ( env , t , " Invalid value " ) ;
return - EINVAL ;
}
if ( btf_type_vlen ( t ) ) {
btf_verifier_log_type ( env , t , " vlen != 0 " ) ;
return - EINVAL ;
}
if ( btf_type_kflag ( t ) ) {
btf_verifier_log_type ( env , t , " Invalid btf_info kind_flag " ) ;
return - EINVAL ;
}
2021-10-12 09:48:38 -07:00
component_idx = btf_type_decl_tag ( t ) - > component_idx ;
2021-09-14 15:30:15 -07:00
if ( component_idx < - 1 ) {
btf_verifier_log_type ( env , t , " Invalid component_idx " ) ;
return - EINVAL ;
}
btf_verifier_log_type ( env , t , NULL ) ;
return meta_needed ;
}
2021-10-12 09:48:38 -07:00
static int btf_decl_tag_resolve ( struct btf_verifier_env * env ,
2021-09-14 15:30:15 -07:00
const struct resolve_vertex * v )
{
const struct btf_type * next_type ;
const struct btf_type * t = v - > t ;
u32 next_type_id = t - > type ;
struct btf * btf = env - > btf ;
s32 component_idx ;
u32 vlen ;
next_type = btf_type_by_id ( btf , next_type_id ) ;
2021-10-12 09:48:38 -07:00
if ( ! next_type | | ! btf_type_is_decl_tag_target ( next_type ) ) {
2021-09-14 15:30:15 -07:00
btf_verifier_log_type ( env , v - > t , " Invalid type_id " ) ;
return - EINVAL ;
}
if ( ! env_type_is_resolve_sink ( env , next_type ) & &
! env_type_is_resolved ( env , next_type_id ) )
return env_stack_push ( env , next_type , next_type_id ) ;
2021-10-12 09:48:38 -07:00
component_idx = btf_type_decl_tag ( t ) - > component_idx ;
2021-09-14 15:30:15 -07:00
if ( component_idx ! = - 1 ) {
2021-10-21 12:56:28 -07:00
if ( btf_type_is_var ( next_type ) | | btf_type_is_typedef ( next_type ) ) {
2021-09-14 15:30:15 -07:00
btf_verifier_log_type ( env , v - > t , " Invalid component_idx " ) ;
return - EINVAL ;
}
if ( btf_type_is_struct ( next_type ) ) {
vlen = btf_type_vlen ( next_type ) ;
} else {
/* next_type should be a function */
next_type = btf_type_by_id ( btf , next_type - > type ) ;
vlen = btf_type_vlen ( next_type ) ;
}
if ( ( u32 ) component_idx > = vlen ) {
btf_verifier_log_type ( env , v - > t , " Invalid component_idx " ) ;
return - EINVAL ;
}
}
env_stack_pop_resolved ( env , next_type_id , 0 ) ;
return 0 ;
}
2021-10-12 09:48:38 -07:00
static void btf_decl_tag_log ( struct btf_verifier_env * env , const struct btf_type * t )
2021-09-14 15:30:15 -07:00
{
btf_verifier_log ( env , " type=%u component_idx=%d " , t - > type ,
2021-10-12 09:48:38 -07:00
btf_type_decl_tag ( t ) - > component_idx ) ;
2021-09-14 15:30:15 -07:00
}
2021-10-12 09:48:38 -07:00
static const struct btf_kind_operations decl_tag_ops = {
. check_meta = btf_decl_tag_check_meta ,
. resolve = btf_decl_tag_resolve ,
2021-09-14 15:30:15 -07:00
. check_member = btf_df_check_member ,
. check_kflag_member = btf_df_check_kflag_member ,
2021-10-12 09:48:38 -07:00
. log_details = btf_decl_tag_log ,
2021-09-14 15:30:15 -07:00
. show = btf_df_show ,
} ;
2018-11-19 15:29:08 -08:00
static int btf_func_proto_check ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
const struct btf_type * ret_type ;
const struct btf_param * args ;
const struct btf * btf ;
u16 nr_args , i ;
int err ;
btf = env - > btf ;
args = ( const struct btf_param * ) ( t + 1 ) ;
nr_args = btf_type_vlen ( t ) ;
/* Check func return type which could be "void" (t->type == 0) */
if ( t - > type ) {
u32 ret_type_id = t - > type ;
ret_type = btf_type_by_id ( btf , ret_type_id ) ;
if ( ! ret_type ) {
btf_verifier_log_type ( env , t , " Invalid return type " ) ;
return - EINVAL ;
}
2022-10-14 17:24:44 -07:00
if ( btf_type_is_resolve_source_only ( ret_type ) ) {
btf_verifier_log_type ( env , t , " Invalid return type " ) ;
return - EINVAL ;
}
2018-11-19 15:29:08 -08:00
if ( btf_type_needs_resolve ( ret_type ) & &
! env_type_is_resolved ( env , ret_type_id ) ) {
err = btf_resolve ( env , ret_type , ret_type_id ) ;
if ( err )
return err ;
}
/* Ensure the return type is a type that has a size */
if ( ! btf_type_id_size ( btf , & ret_type_id , NULL ) ) {
btf_verifier_log_type ( env , t , " Invalid return type " ) ;
return - EINVAL ;
}
}
if ( ! nr_args )
return 0 ;
/* Last func arg type_id could be 0 if it is a vararg */
if ( ! args [ nr_args - 1 ] . type ) {
if ( args [ nr_args - 1 ] . name_off ) {
btf_verifier_log_type ( env , t , " Invalid arg#%u " ,
nr_args ) ;
return - EINVAL ;
}
nr_args - - ;
}
err = 0 ;
for ( i = 0 ; i < nr_args ; i + + ) {
const struct btf_type * arg_type ;
u32 arg_type_id ;
arg_type_id = args [ i ] . type ;
arg_type = btf_type_by_id ( btf , arg_type_id ) ;
if ( ! arg_type ) {
btf_verifier_log_type ( env , t , " Invalid arg#%u " , i + 1 ) ;
err = - EINVAL ;
break ;
}
if ( args [ i ] . name_off & &
( ! btf_name_offset_valid ( btf , args [ i ] . name_off ) | |
! btf_name_valid_identifier ( btf , args [ i ] . name_off ) ) ) {
btf_verifier_log_type ( env , t ,
" Invalid arg#%u " , i + 1 ) ;
err = - EINVAL ;
break ;
}
if ( btf_type_needs_resolve ( arg_type ) & &
! env_type_is_resolved ( env , arg_type_id ) ) {
err = btf_resolve ( env , arg_type , arg_type_id ) ;
if ( err )
break ;
}
if ( ! btf_type_id_size ( btf , & arg_type_id , NULL ) ) {
btf_verifier_log_type ( env , t , " Invalid arg#%u " , i + 1 ) ;
err = - EINVAL ;
break ;
}
}
return err ;
}
static int btf_func_check ( struct btf_verifier_env * env ,
const struct btf_type * t )
{
const struct btf_type * proto_type ;
const struct btf_param * args ;
const struct btf * btf ;
u16 nr_args , i ;
btf = env - > btf ;
proto_type = btf_type_by_id ( btf , t - > type ) ;
if ( ! proto_type | | ! btf_type_is_func_proto ( proto_type ) ) {
btf_verifier_log_type ( env , t , " Invalid type_id " ) ;
return - EINVAL ;
}
args = ( const struct btf_param * ) ( proto_type + 1 ) ;
nr_args = btf_type_vlen ( proto_type ) ;
for ( i = 0 ; i < nr_args ; i + + ) {
if ( ! args [ i ] . name_off & & args [ i ] . type ) {
btf_verifier_log_type ( env , t , " Invalid arg#%u " , i + 1 ) ;
return - EINVAL ;
}
}
return 0 ;
}
2018-04-18 15:55:57 -07:00
static const struct btf_kind_operations * const kind_ops [ NR_BTF_KINDS ] = {
[ BTF_KIND_INT ] = & int_ops ,
[ BTF_KIND_PTR ] = & ptr_ops ,
[ BTF_KIND_ARRAY ] = & array_ops ,
[ BTF_KIND_STRUCT ] = & struct_ops ,
[ BTF_KIND_UNION ] = & struct_ops ,
[ BTF_KIND_ENUM ] = & enum_ops ,
[ BTF_KIND_FWD ] = & fwd_ops ,
[ BTF_KIND_TYPEDEF ] = & modifier_ops ,
[ BTF_KIND_VOLATILE ] = & modifier_ops ,
[ BTF_KIND_CONST ] = & modifier_ops ,
[ BTF_KIND_RESTRICT ] = & modifier_ops ,
2018-11-19 15:29:08 -08:00
[ BTF_KIND_FUNC ] = & func_ops ,
[ BTF_KIND_FUNC_PROTO ] = & func_proto_ops ,
2019-04-09 23:20:09 +02:00
[ BTF_KIND_VAR ] = & var_ops ,
[ BTF_KIND_DATASEC ] = & datasec_ops ,
2021-02-26 21:22:52 +01:00
[ BTF_KIND_FLOAT ] = & float_ops ,
2021-10-12 09:48:38 -07:00
[ BTF_KIND_DECL_TAG ] = & decl_tag_ops ,
2021-11-11 17:26:09 -08:00
[ BTF_KIND_TYPE_TAG ] = & modifier_ops ,
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
[ BTF_KIND_ENUM64 ] = & enum64_ops ,
2018-04-18 15:55:57 -07:00
} ;
static s32 btf_check_meta ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 meta_left )
{
u32 saved_meta_left = meta_left ;
s32 var_meta_size ;
if ( meta_left < sizeof ( * t ) ) {
btf_verifier_log ( env , " [%u] meta_left:%u meta_needed:%zu " ,
env - > log_type_id , meta_left , sizeof ( * t ) ) ;
return - EINVAL ;
}
meta_left - = sizeof ( * t ) ;
2018-05-22 14:57:20 -07:00
if ( t - > info & ~ BTF_INFO_MASK ) {
btf_verifier_log ( env , " [%u] Invalid btf_info:%x " ,
env - > log_type_id , t - > info ) ;
return - EINVAL ;
}
2018-04-18 15:55:57 -07:00
if ( BTF_INFO_KIND ( t - > info ) > BTF_KIND_MAX | |
BTF_INFO_KIND ( t - > info ) = = BTF_KIND_UNKN ) {
btf_verifier_log ( env , " [%u] Invalid kind:%u " ,
env - > log_type_id , BTF_INFO_KIND ( t - > info ) ) ;
return - EINVAL ;
}
2018-04-21 09:48:23 -07:00
if ( ! btf_name_offset_valid ( env - > btf , t - > name_off ) ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log ( env , " [%u] Invalid name_offset:%u " ,
2018-04-21 09:48:23 -07:00
env - > log_type_id , t - > name_off ) ;
2018-04-18 15:55:57 -07:00
return - EINVAL ;
}
var_meta_size = btf_type_ops ( t ) - > check_meta ( env , t , meta_left ) ;
if ( var_meta_size < 0 )
return var_meta_size ;
meta_left - = var_meta_size ;
return saved_meta_left - meta_left ;
}
static int btf_check_all_metas ( struct btf_verifier_env * env )
{
struct btf * btf = env - > btf ;
struct btf_header * hdr ;
void * cur , * end ;
2018-05-22 14:57:18 -07:00
hdr = & btf - > hdr ;
2018-04-18 15:55:57 -07:00
cur = btf - > nohdr_data + hdr - > type_off ;
2018-09-12 10:29:11 -07:00
end = cur + hdr - > type_len ;
2018-04-18 15:55:57 -07:00
2020-11-09 17:19:28 -08:00
env - > log_type_id = btf - > base_btf ? btf - > start_id : 1 ;
2018-04-18 15:55:57 -07:00
while ( cur < end ) {
struct btf_type * t = cur ;
s32 meta_size ;
meta_size = btf_check_meta ( env , t , end - cur ) ;
if ( meta_size < 0 )
return meta_size ;
btf_add_type ( env , t ) ;
cur + = meta_size ;
env - > log_type_id + + ;
}
return 0 ;
}
2018-04-18 15:55:58 -07:00
static bool btf_resolve_valid ( struct btf_verifier_env * env ,
const struct btf_type * t ,
u32 type_id )
{
struct btf * btf = env - > btf ;
if ( ! env_type_is_resolved ( env , type_id ) )
return false ;
2019-04-09 23:20:09 +02:00
if ( btf_type_is_struct ( t ) | | btf_type_is_datasec ( t ) )
2020-11-09 17:19:28 -08:00
return ! btf_resolved_type_id ( btf , type_id ) & &
! btf_resolved_type_size ( btf , type_id ) ;
2018-04-18 15:55:58 -07:00
2022-02-03 11:17:27 -08:00
if ( btf_type_is_decl_tag ( t ) | | btf_type_is_func ( t ) )
2021-09-14 15:30:15 -07:00
return btf_resolved_type_id ( btf , type_id ) & &
! btf_resolved_type_size ( btf , type_id ) ;
2019-04-09 23:20:09 +02:00
if ( btf_type_is_modifier ( t ) | | btf_type_is_ptr ( t ) | |
btf_type_is_var ( t ) ) {
2018-04-18 15:55:58 -07:00
t = btf_type_id_resolve ( btf , & type_id ) ;
2019-04-09 23:20:09 +02:00
return t & &
! btf_type_is_modifier ( t ) & &
! btf_type_is_var ( t ) & &
! btf_type_is_datasec ( t ) ;
2018-04-18 15:55:58 -07:00
}
if ( btf_type_is_array ( t ) ) {
const struct btf_array * array = btf_type_array ( t ) ;
const struct btf_type * elem_type ;
u32 elem_type_id = array - > type ;
u32 elem_size ;
elem_type = btf_type_id_size ( btf , & elem_type_id , & elem_size ) ;
return elem_type & & ! btf_type_is_modifier ( elem_type ) & &
( array - > nelems * elem_size = =
2020-11-09 17:19:28 -08:00
btf_resolved_type_size ( btf , type_id ) ) ;
2018-04-18 15:55:58 -07:00
}
return false ;
}
2018-11-19 15:29:08 -08:00
static int btf_resolve ( struct btf_verifier_env * env ,
const struct btf_type * t , u32 type_id )
{
u32 save_log_type_id = env - > log_type_id ;
const struct resolve_vertex * v ;
int err = 0 ;
env - > resolve_mode = RESOLVE_TBD ;
env_stack_push ( env , t , type_id ) ;
while ( ! err & & ( v = env_stack_peak ( env ) ) ) {
env - > log_type_id = v - > type_id ;
err = btf_type_ops ( v - > t ) - > resolve ( env , v ) ;
}
env - > log_type_id = type_id ;
if ( err = = - E2BIG ) {
btf_verifier_log_type ( env , t ,
" Exceeded max resolving depth:%u " ,
MAX_RESOLVE_DEPTH ) ;
} else if ( err = = - EEXIST ) {
btf_verifier_log_type ( env , t , " Loop detected " ) ;
}
/* Final sanity check */
if ( ! err & & ! btf_resolve_valid ( env , t , type_id ) ) {
btf_verifier_log_type ( env , t , " Invalid resolve state " ) ;
err = - EINVAL ;
}
env - > log_type_id = save_log_type_id ;
return err ;
}
2018-04-18 15:55:58 -07:00
static int btf_check_all_types ( struct btf_verifier_env * env )
{
struct btf * btf = env - > btf ;
2020-11-09 17:19:28 -08:00
const struct btf_type * t ;
u32 type_id , i ;
2018-04-18 15:55:58 -07:00
int err ;
err = env_resolve_init ( env ) ;
if ( err )
return err ;
env - > phase + + ;
2020-11-09 17:19:28 -08:00
for ( i = btf - > base_btf ? 0 : 1 ; i < btf - > nr_types ; i + + ) {
type_id = btf - > start_id + i ;
t = btf_type_by_id ( btf , type_id ) ;
2018-04-18 15:55:58 -07:00
env - > log_type_id = type_id ;
if ( btf_type_needs_resolve ( t ) & &
! env_type_is_resolved ( env , type_id ) ) {
err = btf_resolve ( env , t , type_id ) ;
if ( err )
return err ;
}
2018-11-19 15:29:08 -08:00
if ( btf_type_is_func_proto ( t ) ) {
err = btf_func_proto_check ( env , t ) ;
if ( err )
return err ;
}
2018-04-18 15:55:58 -07:00
}
return 0 ;
}
2018-04-18 15:55:57 -07:00
static int btf_parse_type_sec ( struct btf_verifier_env * env )
{
2018-05-22 14:57:18 -07:00
const struct btf_header * hdr = & env - > btf - > hdr ;
2018-04-18 15:55:58 -07:00
int err ;
2018-05-22 14:57:18 -07:00
/* Type section must align to 4 bytes */
if ( hdr - > type_off & ( sizeof ( u32 ) - 1 ) ) {
btf_verifier_log ( env , " Unaligned type_off " ) ;
return - EINVAL ;
}
2020-11-09 17:19:28 -08:00
if ( ! env - > btf - > base_btf & & ! hdr - > type_len ) {
2018-05-22 14:57:18 -07:00
btf_verifier_log ( env , " No type found " ) ;
return - EINVAL ;
}
2018-04-18 15:55:58 -07:00
err = btf_check_all_metas ( env ) ;
if ( err )
return err ;
return btf_check_all_types ( env ) ;
2018-04-18 15:55:57 -07:00
}
static int btf_parse_str_sec ( struct btf_verifier_env * env )
{
const struct btf_header * hdr ;
struct btf * btf = env - > btf ;
const char * start , * end ;
2018-05-22 14:57:18 -07:00
hdr = & btf - > hdr ;
2018-04-18 15:55:57 -07:00
start = btf - > nohdr_data + hdr - > str_off ;
end = start + hdr - > str_len ;
2018-05-22 14:57:18 -07:00
if ( end ! = btf - > data + btf - > data_size ) {
btf_verifier_log ( env , " String section is not at the end " ) ;
return - EINVAL ;
}
2020-11-09 17:19:28 -08:00
btf - > strings = start ;
if ( btf - > base_btf & & ! hdr - > str_len )
return 0 ;
if ( ! hdr - > str_len | | hdr - > str_len - 1 > BTF_MAX_NAME_OFFSET | | end [ - 1 ] ) {
btf_verifier_log ( env , " Invalid string section " ) ;
return - EINVAL ;
}
if ( ! btf - > base_btf & & start [ 0 ] ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log ( env , " Invalid string section " ) ;
return - EINVAL ;
}
return 0 ;
}
2018-05-22 14:57:18 -07:00
static const size_t btf_sec_info_offset [ ] = {
offsetof ( struct btf_header , type_off ) ,
offsetof ( struct btf_header , str_off ) ,
} ;
static int btf_sec_info_cmp ( const void * a , const void * b )
2018-04-18 15:55:57 -07:00
{
2018-05-22 14:57:18 -07:00
const struct btf_sec_info * x = a ;
const struct btf_sec_info * y = b ;
return ( int ) ( x - > off - y - > off ) ? : ( int ) ( x - > len - y - > len ) ;
}
static int btf_check_sec_info ( struct btf_verifier_env * env ,
u32 btf_data_size )
{
2018-05-23 11:32:36 -07:00
struct btf_sec_info secs [ ARRAY_SIZE ( btf_sec_info_offset ) ] ;
2018-05-22 14:57:18 -07:00
u32 total , expected_total , i ;
2018-04-18 15:55:57 -07:00
const struct btf_header * hdr ;
2018-05-22 14:57:18 -07:00
const struct btf * btf ;
btf = env - > btf ;
hdr = & btf - > hdr ;
/* Populate the secs from hdr */
2018-05-23 11:32:36 -07:00
for ( i = 0 ; i < ARRAY_SIZE ( btf_sec_info_offset ) ; i + + )
2018-05-22 14:57:18 -07:00
secs [ i ] = * ( struct btf_sec_info * ) ( ( void * ) hdr +
btf_sec_info_offset [ i ] ) ;
2018-05-23 11:32:36 -07:00
sort ( secs , ARRAY_SIZE ( btf_sec_info_offset ) ,
sizeof ( struct btf_sec_info ) , btf_sec_info_cmp , NULL ) ;
2018-05-22 14:57:18 -07:00
/* Check for gaps and overlap among sections */
total = 0 ;
expected_total = btf_data_size - hdr - > hdr_len ;
2018-05-23 11:32:36 -07:00
for ( i = 0 ; i < ARRAY_SIZE ( btf_sec_info_offset ) ; i + + ) {
2018-05-22 14:57:18 -07:00
if ( expected_total < secs [ i ] . off ) {
btf_verifier_log ( env , " Invalid section offset " ) ;
return - EINVAL ;
}
if ( total < secs [ i ] . off ) {
/* gap */
btf_verifier_log ( env , " Unsupported section found " ) ;
return - EINVAL ;
}
if ( total > secs [ i ] . off ) {
btf_verifier_log ( env , " Section overlap found " ) ;
return - EINVAL ;
}
if ( expected_total - total < secs [ i ] . len ) {
btf_verifier_log ( env ,
" Total section length too long " ) ;
return - EINVAL ;
}
total + = secs [ i ] . len ;
}
/* There is data other than hdr and known sections */
if ( expected_total ! = total ) {
btf_verifier_log ( env , " Unsupported section found " ) ;
return - EINVAL ;
}
return 0 ;
}
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 20:42:25 +00:00
static int btf_parse_hdr ( struct btf_verifier_env * env )
2018-05-22 14:57:18 -07:00
{
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 20:42:25 +00:00
u32 hdr_len , hdr_copy , btf_data_size ;
2018-05-22 14:57:18 -07:00
const struct btf_header * hdr ;
struct btf * btf ;
2018-04-18 15:55:57 -07:00
2018-05-22 14:57:18 -07:00
btf = env - > btf ;
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 20:42:25 +00:00
btf_data_size = btf - > data_size ;
2018-05-22 14:57:18 -07:00
2022-03-20 15:52:40 +08:00
if ( btf_data_size < offsetofend ( struct btf_header , hdr_len ) ) {
2018-05-22 14:57:18 -07:00
btf_verifier_log ( env , " hdr_len not found " ) ;
return - EINVAL ;
}
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 20:42:25 +00:00
hdr = btf - > data ;
hdr_len = hdr - > hdr_len ;
2018-05-22 14:57:18 -07:00
if ( btf_data_size < hdr_len ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log ( env , " btf_header not found " ) ;
return - EINVAL ;
}
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 20:42:25 +00:00
/* Ensure the unsupported header fields are zero */
if ( hdr_len > sizeof ( btf - > hdr ) ) {
u8 * expected_zero = btf - > data + sizeof ( btf - > hdr ) ;
u8 * end = btf - > data + hdr_len ;
for ( ; expected_zero < end ; expected_zero + + ) {
if ( * expected_zero ) {
btf_verifier_log ( env , " Unsupported btf_header " ) ;
return - E2BIG ;
}
}
2018-05-22 14:57:18 -07:00
}
hdr_copy = min_t ( u32 , hdr_len , sizeof ( btf - > hdr ) ) ;
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 20:42:25 +00:00
memcpy ( & btf - > hdr , btf - > data , hdr_copy ) ;
2018-05-22 14:57:18 -07:00
hdr = & btf - > hdr ;
btf_verifier_log_hdr ( env , btf_data_size ) ;
2018-04-18 15:55:57 -07:00
if ( hdr - > magic ! = BTF_MAGIC ) {
btf_verifier_log ( env , " Invalid magic " ) ;
return - EINVAL ;
}
if ( hdr - > version ! = BTF_VERSION ) {
btf_verifier_log ( env , " Unsupported version " ) ;
return - ENOTSUPP ;
}
if ( hdr - > flags ) {
btf_verifier_log ( env , " Unsupported flags " ) ;
return - ENOTSUPP ;
}
2021-01-09 23:03:40 -08:00
if ( ! btf - > base_btf & & btf_data_size = = hdr - > hdr_len ) {
2018-04-18 15:55:57 -07:00
btf_verifier_log ( env , " No data " ) ;
return - EINVAL ;
}
2022-09-17 16:42:48 +08:00
return btf_check_sec_info ( env , btf_data_size ) ;
2018-04-18 15:55:57 -07:00
}
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
static const char * alloc_obj_fields [ ] = {
" bpf_spin_lock " ,
" bpf_list_head " ,
" bpf_list_node " ,
} ;
static struct btf_struct_metas *
btf_parse_struct_metas ( struct bpf_verifier_log * log , struct btf * btf )
{
union {
struct btf_id_set set ;
struct {
u32 _cnt ;
u32 _ids [ ARRAY_SIZE ( alloc_obj_fields ) ] ;
} _arr ;
} aof ;
struct btf_struct_metas * tab = NULL ;
int i , n , id , ret ;
BUILD_BUG_ON ( offsetof ( struct btf_id_set , cnt ) ! = 0 ) ;
BUILD_BUG_ON ( sizeof ( struct btf_id_set ) ! = sizeof ( u32 ) ) ;
memset ( & aof , 0 , sizeof ( aof ) ) ;
for ( i = 0 ; i < ARRAY_SIZE ( alloc_obj_fields ) ; i + + ) {
/* Try to find whether this special type exists in user BTF, and
* if so remember its ID so we can easily find it among members
* of structs that we iterate in the next loop .
*/
id = btf_find_by_name_kind ( btf , alloc_obj_fields [ i ] , BTF_KIND_STRUCT ) ;
if ( id < 0 )
continue ;
aof . set . ids [ aof . set . cnt + + ] = id ;
}
if ( ! aof . set . cnt )
return NULL ;
sort ( & aof . set . ids , aof . set . cnt , sizeof ( aof . set . ids [ 0 ] ) , btf_id_cmp_func , NULL ) ;
n = btf_nr_types ( btf ) ;
for ( i = 1 ; i < n ; i + + ) {
struct btf_struct_metas * new_tab ;
const struct btf_member * member ;
struct btf_field_offs * foffs ;
struct btf_struct_meta * type ;
struct btf_record * record ;
const struct btf_type * t ;
int j , tab_cnt ;
t = btf_type_by_id ( btf , i ) ;
if ( ! t ) {
ret = - EINVAL ;
goto free ;
}
if ( ! __btf_type_is_struct ( t ) )
continue ;
cond_resched ( ) ;
for_each_member ( j , t , member ) {
if ( btf_id_set_contains ( & aof . set , member - > type ) )
goto parse ;
}
continue ;
parse :
tab_cnt = tab ? tab - > cnt : 0 ;
new_tab = krealloc ( tab , offsetof ( struct btf_struct_metas , types [ tab_cnt + 1 ] ) ,
GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! new_tab ) {
ret = - ENOMEM ;
goto free ;
}
if ( ! tab )
new_tab - > cnt = 0 ;
tab = new_tab ;
type = & tab - > types [ tab - > cnt ] ;
type - > btf_id = i ;
record = btf_parse_fields ( btf , t , BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE , t - > size ) ;
/* The record cannot be unset, treat it as an error if so */
if ( IS_ERR_OR_NULL ( record ) ) {
ret = PTR_ERR_OR_ZERO ( record ) ? : - EFAULT ;
goto free ;
}
foffs = btf_parse_field_offs ( record ) ;
/* We need the field_offs to be valid for a valid record,
* either both should be set or both should be unset .
*/
if ( IS_ERR_OR_NULL ( foffs ) ) {
btf_record_free ( record ) ;
ret = - EFAULT ;
goto free ;
}
type - > record = record ;
type - > field_offs = foffs ;
tab - > cnt + + ;
}
return tab ;
free :
btf_struct_metas_free ( tab ) ;
return ERR_PTR ( ret ) ;
}
struct btf_struct_meta * btf_find_struct_meta ( const struct btf * btf , u32 btf_id )
{
struct btf_struct_metas * tab ;
BUILD_BUG_ON ( offsetof ( struct btf_struct_meta , btf_id ) ! = 0 ) ;
tab = btf - > struct_meta_tab ;
if ( ! tab )
return NULL ;
return bsearch ( & btf_id , tab - > types , tab - > cnt , sizeof ( tab - > types [ 0 ] ) , btf_id_cmp_func ) ;
}
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-19 22:16:07 +05:30
static int btf_check_type_tags ( struct btf_verifier_env * env ,
struct btf * btf , int start_id )
{
int i , n , good_id = start_id - 1 ;
bool in_tags ;
n = btf_nr_types ( btf ) ;
for ( i = start_id ; i < n ; i + + ) {
const struct btf_type * t ;
2022-06-15 09:51:51 +05:30
int chain_limit = 32 ;
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-19 22:16:07 +05:30
u32 cur_id = i ;
t = btf_type_by_id ( btf , i ) ;
if ( ! t )
return - EINVAL ;
if ( ! btf_type_is_modifier ( t ) )
continue ;
cond_resched ( ) ;
in_tags = btf_type_is_type_tag ( t ) ;
while ( btf_type_is_modifier ( t ) ) {
2022-06-15 09:51:51 +05:30
if ( ! chain_limit - - ) {
btf_verifier_log ( env , " Max chain length or cycle detected " ) ;
return - ELOOP ;
}
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-19 22:16:07 +05:30
if ( btf_type_is_type_tag ( t ) ) {
if ( ! in_tags ) {
btf_verifier_log ( env , " Type tags don't precede modifiers " ) ;
return - EINVAL ;
}
} else if ( in_tags ) {
in_tags = false ;
}
if ( cur_id < = good_id )
break ;
/* Move to next type */
cur_id = t - > type ;
t = btf_type_by_id ( btf , cur_id ) ;
if ( ! t )
return - EINVAL ;
}
good_id = i ;
}
return 0 ;
}
2021-05-13 17:36:08 -07:00
static struct btf * btf_parse ( bpfptr_t btf_data , u32 btf_data_size ,
2018-04-18 15:55:57 -07:00
u32 log_level , char __user * log_ubuf , u32 log_size )
{
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
struct btf_struct_metas * struct_meta_tab ;
2018-04-18 15:55:57 -07:00
struct btf_verifier_env * env = NULL ;
struct bpf_verifier_log * log ;
struct btf * btf = NULL ;
u8 * data ;
int err ;
if ( btf_data_size > BTF_MAX_SIZE )
return ERR_PTR ( - E2BIG ) ;
env = kzalloc ( sizeof ( * env ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! env )
return ERR_PTR ( - ENOMEM ) ;
log = & env - > log ;
if ( log_level | | log_ubuf | | log_size ) {
/* user requested verbose verifier output
* and supplied buffer to store the verification trace
*/
log - > level = log_level ;
log - > ubuf = log_ubuf ;
log - > len_total = log_size ;
/* log attributes have to be sane */
2021-12-03 13:30:01 +08:00
if ( ! bpf_verifier_log_attr_valid ( log ) ) {
2018-04-18 15:55:57 -07:00
err = - EINVAL ;
goto errout ;
}
}
btf = kzalloc ( sizeof ( * btf ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! btf ) {
err = - ENOMEM ;
goto errout ;
}
2018-05-22 14:57:18 -07:00
env - > btf = btf ;
2018-04-18 15:55:57 -07:00
data = kvmalloc ( btf_data_size , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! data ) {
err = - ENOMEM ;
goto errout ;
}
btf - > data = data ;
btf - > data_size = btf_data_size ;
2021-05-13 17:36:08 -07:00
if ( copy_from_bpfptr ( data , btf_data , btf_data_size ) ) {
2018-04-18 15:55:57 -07:00
err = - EFAULT ;
goto errout ;
}
bpf, btf: fix a missing check bug in btf_parse
Wenwen Wang reported:
In btf_parse(), the header of the user-space btf data 'btf_data'
is firstly parsed and verified through btf_parse_hdr().
In btf_parse_hdr(), the header is copied from user-space 'btf_data'
to kernel-space 'btf->hdr' and then verified. If no error happens
during the verification process, the whole data of 'btf_data',
including the header, is then copied to 'data' in btf_parse(). It
is obvious that the header is copied twice here. More importantly,
no check is enforced after the second copy to make sure the headers
obtained in these two copies are same. Given that 'btf_data' resides
in the user space, a malicious user can race to modify the header
between these two copies. By doing so, the user can inject
inconsistent data, which can cause undefined behavior of the
kernel and introduce potential security risk.
This issue is similar to the one fixed in commit 8af03d1ae2e1 ("bpf:
btf: Fix a missing check bug"). To fix it, this patch copies the user
'btf_data' *before* parsing / verifying the BTF header.
Fixes: 69b693f0aefa ("bpf: btf: Introduce BPF Type Format (BTF)")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Co-developed-by: Wenwen Wang <wang6495@umn.edu>
Acked-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
2018-10-24 20:42:25 +00:00
err = btf_parse_hdr ( env ) ;
if ( err )
goto errout ;
btf - > nohdr_data = btf - > data + btf - > hdr . hdr_len ;
2018-04-18 15:55:57 -07:00
err = btf_parse_str_sec ( env ) ;
if ( err )
goto errout ;
err = btf_parse_type_sec ( env ) ;
if ( err )
goto errout ;
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-19 22:16:07 +05:30
err = btf_check_type_tags ( env , btf , 1 ) ;
if ( err )
goto errout ;
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
struct_meta_tab = btf_parse_struct_metas ( log , btf ) ;
if ( IS_ERR ( struct_meta_tab ) ) {
err = PTR_ERR ( struct_meta_tab ) ;
goto errout ;
}
btf - > struct_meta_tab = struct_meta_tab ;
2022-11-18 07:25:57 +05:30
if ( struct_meta_tab ) {
int i ;
for ( i = 0 ; i < struct_meta_tab - > cnt ; i + + ) {
err = btf_check_and_fixup_fields ( btf , struct_meta_tab - > types [ i ] . record ) ;
if ( err < 0 )
goto errout_meta ;
}
}
2018-05-22 14:57:18 -07:00
if ( log - > level & & bpf_verifier_log_full ( log ) ) {
2018-04-18 15:55:57 -07:00
err = - ENOSPC ;
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
goto errout_meta ;
2018-04-18 15:55:57 -07:00
}
2018-05-22 14:57:18 -07:00
btf_verifier_env_free ( env ) ;
refcount_set ( & btf - > refcnt , 1 ) ;
return btf ;
2018-04-18 15:55:57 -07:00
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
errout_meta :
btf_free_struct_meta_tab ( btf ) ;
2018-04-18 15:55:57 -07:00
errout :
btf_verifier_env_free ( env ) ;
if ( btf )
btf_free ( btf ) ;
return ERR_PTR ( err ) ;
}
2018-04-18 15:56:00 -07:00
2020-03-18 15:27:46 -07:00
extern char __weak __start_BTF [ ] ;
extern char __weak __stop_BTF [ ] ;
2019-11-14 10:57:15 -08:00
extern struct btf * btf_vmlinux ;
# define BPF_MAP_TYPE(_id, _ops)
2020-04-28 17:16:08 -07:00
# define BPF_LINK_TYPE(_id, _name)
2019-11-14 10:57:15 -08:00
static union {
struct bpf_ctx_convert {
# define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
prog_ctx_type _id # # _prog ; \
kern_ctx_type _id # # _kern ;
# include <linux/bpf_types.h>
# undef BPF_PROG_TYPE
} * __t ;
/* 't' is written once under lock. Read many times. */
const struct btf_type * t ;
} bpf_ctx_convert ;
enum {
# define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
__ctx_convert # # _id ,
# include <linux/bpf_types.h>
# undef BPF_PROG_TYPE
2019-11-27 20:35:08 -08:00
__ctx_convert_unused , /* to avoid empty enum in extreme .config */
2019-11-14 10:57:15 -08:00
} ;
static u8 bpf_ctx_convert_map [ ] = {
# define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
[ _id ] = __ctx_convert # # _id ,
# include <linux/bpf_types.h>
# undef BPF_PROG_TYPE
2019-12-10 21:35:46 +01:00
0 , /* avoid empty array */
2019-11-14 10:57:15 -08:00
} ;
# undef BPF_MAP_TYPE
2020-04-28 17:16:08 -07:00
# undef BPF_LINK_TYPE
2019-11-14 10:57:15 -08:00
static const struct btf_member *
2021-03-24 18:51:36 -07:00
btf_get_prog_ctx_type ( struct bpf_verifier_log * log , const struct btf * btf ,
2020-01-09 22:41:20 -08:00
const struct btf_type * t , enum bpf_prog_type prog_type ,
int arg )
2019-11-14 10:57:15 -08:00
{
const struct btf_type * conv_struct ;
const struct btf_type * ctx_struct ;
const struct btf_member * ctx_type ;
const char * tname , * ctx_tname ;
conv_struct = bpf_ctx_convert . t ;
if ( ! conv_struct ) {
bpf_log ( log , " btf_vmlinux is malformed \n " ) ;
return NULL ;
}
t = btf_type_by_id ( btf , t - > type ) ;
while ( btf_type_is_modifier ( t ) )
t = btf_type_by_id ( btf , t - > type ) ;
if ( ! btf_type_is_struct ( t ) ) {
/* Only pointer to struct is supported for now.
* That means that BPF_PROG_TYPE_TRACEPOINT with BTF
* is not supported yet .
* BPF_PROG_TYPE_RAW_TRACEPOINT is fine .
*/
return NULL ;
}
tname = btf_name_by_offset ( btf , t - > name_off ) ;
if ( ! tname ) {
2020-01-09 22:41:20 -08:00
bpf_log ( log , " arg#%d struct doesn't have a name \n " , arg ) ;
2019-11-14 10:57:15 -08:00
return NULL ;
}
/* prog_type is valid bpf program type. No need for bounds check. */
ctx_type = btf_type_member ( conv_struct ) + bpf_ctx_convert_map [ prog_type ] * 2 ;
/* ctx_struct is a pointer to prog_ctx_type in vmlinux.
* Like ' struct __sk_buff '
*/
ctx_struct = btf_type_by_id ( btf_vmlinux , ctx_type - > type ) ;
if ( ! ctx_struct )
/* should not happen */
return NULL ;
ctx_tname = btf_name_by_offset ( btf_vmlinux , ctx_struct - > name_off ) ;
if ( ! ctx_tname ) {
/* should not happen */
bpf_log ( log , " Please fix kernel include/linux/bpf_types.h \n " ) ;
return NULL ;
}
/* only compare that prog's ctx type name is the same as
* kernel expects . No need to compare field by field .
* It ' s ok for bpf prog to do :
* struct __sk_buff { } ;
* int socket_filter_bpf_prog ( struct __sk_buff * skb )
* { // no fields of skb are ever used }
*/
if ( strcmp ( ctx_tname , tname ) )
return NULL ;
return ctx_type ;
}
2019-10-15 20:24:57 -07:00
2019-11-14 10:57:17 -08:00
static int btf_translate_to_vmlinux ( struct bpf_verifier_log * log ,
struct btf * btf ,
const struct btf_type * t ,
2020-01-09 22:41:20 -08:00
enum bpf_prog_type prog_type ,
int arg )
2019-11-14 10:57:17 -08:00
{
const struct btf_member * prog_ctx_type , * kern_ctx_type ;
2020-01-09 22:41:20 -08:00
prog_ctx_type = btf_get_prog_ctx_type ( log , btf , t , prog_type , arg ) ;
2019-11-14 10:57:17 -08:00
if ( ! prog_ctx_type )
return - ENOENT ;
kern_ctx_type = prog_ctx_type + 1 ;
return kern_ctx_type - > type ;
}
2020-07-11 23:53:26 +02:00
BTF_ID_LIST ( bpf_ctx_convert_btf_id )
BTF_ID ( struct , bpf_ctx_convert )
2019-10-15 20:24:57 -07:00
struct btf * btf_parse_vmlinux ( void )
{
struct btf_verifier_env * env = NULL ;
struct bpf_verifier_log * log ;
struct btf * btf = NULL ;
2020-07-11 23:53:26 +02:00
int err ;
2019-10-15 20:24:57 -07:00
env = kzalloc ( sizeof ( * env ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! env )
return ERR_PTR ( - ENOMEM ) ;
log = & env - > log ;
log - > level = BPF_LOG_KERNEL ;
btf = kzalloc ( sizeof ( * btf ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! btf ) {
err = - ENOMEM ;
goto errout ;
}
env - > btf = btf ;
2020-03-18 15:27:46 -07:00
btf - > data = __start_BTF ;
btf - > data_size = __stop_BTF - __start_BTF ;
2020-11-09 17:19:29 -08:00
btf - > kernel_btf = true ;
snprintf ( btf - > name , sizeof ( btf - > name ) , " vmlinux " ) ;
2019-10-15 20:24:57 -07:00
err = btf_parse_hdr ( env ) ;
if ( err )
goto errout ;
btf - > nohdr_data = btf - > data + btf - > hdr . hdr_len ;
err = btf_parse_str_sec ( env ) ;
if ( err )
goto errout ;
err = btf_check_all_metas ( env ) ;
if ( err )
goto errout ;
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-19 22:16:07 +05:30
err = btf_check_type_tags ( env , btf , 1 ) ;
if ( err )
goto errout ;
2020-06-19 14:11:41 -07:00
/* btf_parse_vmlinux() runs under bpf_verifier_lock */
2020-07-11 23:53:26 +02:00
bpf_ctx_convert . t = btf_type_by_id ( btf , bpf_ctx_convert_btf_id [ 0 ] ) ;
2019-11-14 10:57:15 -08:00
2020-01-27 09:51:45 -08:00
bpf_struct_ops_init ( btf , log ) ;
2020-01-08 16:35:03 -08:00
2019-10-15 20:24:57 -07:00
refcount_set ( & btf - > refcnt , 1 ) ;
2020-11-09 17:19:29 -08:00
err = btf_alloc_id ( btf ) ;
if ( err )
goto errout ;
btf_verifier_env_free ( env ) ;
2019-10-15 20:24:57 -07:00
return btf ;
errout :
btf_verifier_env_free ( env ) ;
if ( btf ) {
kvfree ( btf - > types ) ;
kfree ( btf ) ;
}
return ERR_PTR ( err ) ;
}
2020-11-10 20:06:45 -08:00
# ifdef CONFIG_DEBUG_INFO_BTF_MODULES
2020-11-09 17:19:31 -08:00
static struct btf * btf_parse_module ( const char * module_name , const void * data , unsigned int data_size )
{
struct btf_verifier_env * env = NULL ;
struct bpf_verifier_log * log ;
struct btf * btf = NULL , * base_btf ;
int err ;
base_btf = bpf_get_btf_vmlinux ( ) ;
if ( IS_ERR ( base_btf ) )
return base_btf ;
if ( ! base_btf )
return ERR_PTR ( - EINVAL ) ;
env = kzalloc ( sizeof ( * env ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! env )
return ERR_PTR ( - ENOMEM ) ;
log = & env - > log ;
log - > level = BPF_LOG_KERNEL ;
btf = kzalloc ( sizeof ( * btf ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! btf ) {
err = - ENOMEM ;
goto errout ;
}
env - > btf = btf ;
btf - > base_btf = base_btf ;
btf - > start_id = base_btf - > nr_types ;
btf - > start_str_off = base_btf - > hdr . str_len ;
btf - > kernel_btf = true ;
snprintf ( btf - > name , sizeof ( btf - > name ) , " %s " , module_name ) ;
btf - > data = kvmalloc ( data_size , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! btf - > data ) {
err = - ENOMEM ;
goto errout ;
}
memcpy ( btf - > data , data , data_size ) ;
btf - > data_size = data_size ;
err = btf_parse_hdr ( env ) ;
if ( err )
goto errout ;
btf - > nohdr_data = btf - > data + btf - > hdr . hdr_len ;
err = btf_parse_str_sec ( env ) ;
if ( err )
goto errout ;
err = btf_check_all_metas ( env ) ;
if ( err )
goto errout ;
bpf: Ensure type tags precede modifiers in BTF
It is guaranteed that for modifiers, clang always places type tags
before other modifiers, and then the base type. We would like to rely on
this guarantee inside the kernel to make it simple to parse type tags
from BTF.
However, a user would be allowed to construct a BTF without such
guarantees. Hence, add a pass to check that in modifier chains, type
tags only occur at the head of the chain, and then don't occur later in
the chain.
If we see a type tag, we can have one or more type tags preceding other
modifiers that then never have another type tag. If we see other
modifiers, all modifiers following them should never be a type tag.
Instead of having to walk chains we verified previously, we can remember
the last good modifier type ID which headed a good chain. At that point,
we must have verified all other chains headed by type IDs less than it.
This makes the verification process less costly, and it becomes a simple
O(n) pass.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220419164608.1990559-2-memxor@gmail.com
2022-04-19 22:16:07 +05:30
err = btf_check_type_tags ( env , btf , btf_nr_types ( base_btf ) ) ;
if ( err )
goto errout ;
2020-11-09 17:19:31 -08:00
btf_verifier_env_free ( env ) ;
refcount_set ( & btf - > refcnt , 1 ) ;
return btf ;
errout :
btf_verifier_env_free ( env ) ;
if ( btf ) {
kvfree ( btf - > data ) ;
kvfree ( btf - > types ) ;
kfree ( btf ) ;
}
return ERR_PTR ( err ) ;
}
2020-11-10 20:06:45 -08:00
# endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
2019-11-14 10:57:17 -08:00
struct btf * bpf_prog_get_target_btf ( const struct bpf_prog * prog )
{
2020-09-29 14:45:50 +02:00
struct bpf_prog * tgt_prog = prog - > aux - > dst_prog ;
2019-11-14 10:57:17 -08:00
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
if ( tgt_prog )
2019-11-14 10:57:17 -08:00
return tgt_prog - > aux - > btf ;
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
else
return prog - > aux - > attach_btf ;
2019-11-14 10:57:17 -08:00
}
2021-12-08 20:32:41 +01:00
static bool is_int_ptr ( struct btf * btf , const struct btf_type * t )
2020-01-23 17:15:06 +01:00
{
/* t comes in already as a pointer */
t = btf_type_by_id ( btf , t - > type ) ;
/* allow const */
if ( BTF_INFO_KIND ( t - > info ) = = BTF_KIND_CONST )
t = btf_type_by_id ( btf , t - > type ) ;
2021-12-08 20:32:41 +01:00
return btf_type_is_int ( t ) ;
2020-01-23 17:15:06 +01:00
}
2022-08-31 08:26:46 -07:00
static u32 get_ctx_arg_idx ( struct btf * btf , const struct btf_type * func_proto ,
int off )
{
const struct btf_param * args ;
const struct btf_type * t ;
u32 offset = 0 , nr_args ;
int i ;
if ( ! func_proto )
return off / 8 ;
nr_args = btf_type_vlen ( func_proto ) ;
args = ( const struct btf_param * ) ( func_proto + 1 ) ;
for ( i = 0 ; i < nr_args ; i + + ) {
t = btf_type_skip_modifiers ( btf , args [ i ] . type , NULL ) ;
offset + = btf_type_is_ptr ( t ) ? 8 : roundup ( t - > size , 8 ) ;
if ( off < offset )
return i ;
}
t = btf_type_skip_modifiers ( btf , func_proto - > type , NULL ) ;
offset + = btf_type_is_ptr ( t ) ? 8 : roundup ( t - > size , 8 ) ;
if ( off < offset )
return nr_args ;
return nr_args + 1 ;
}
2019-10-15 20:25:00 -07:00
bool btf_ctx_access ( int off , int size , enum bpf_access_type type ,
const struct bpf_prog * prog ,
struct bpf_insn_access_aux * info )
{
2019-10-24 17:18:11 -07:00
const struct btf_type * t = prog - > aux - > attach_func_proto ;
2020-09-29 14:45:50 +02:00
struct bpf_prog * tgt_prog = prog - > aux - > dst_prog ;
2019-11-14 10:57:17 -08:00
struct btf * btf = bpf_prog_get_target_btf ( prog ) ;
2019-10-24 17:18:11 -07:00
const char * tname = prog - > aux - > attach_func_name ;
2019-10-15 20:25:00 -07:00
struct bpf_verifier_log * log = info - > log ;
const struct btf_param * args ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
const char * tag_value ;
2019-10-15 20:25:00 -07:00
u32 nr_args , arg ;
2020-05-13 11:02:21 -07:00
int i , ret ;
2019-10-15 20:25:00 -07:00
if ( off % 8 ) {
2019-10-24 17:18:11 -07:00
bpf_log ( log , " func '%s' offset %d is not multiple of 8 \n " ,
2019-10-15 20:25:00 -07:00
tname , off ) ;
return false ;
}
2022-08-31 08:26:46 -07:00
arg = get_ctx_arg_idx ( btf , t , off ) ;
2019-10-15 20:25:00 -07:00
args = ( const struct btf_param * ) ( t + 1 ) ;
2021-02-26 00:26:29 +04:00
/* if (t == NULL) Fall back to default BPF prog with
* MAX_BPF_FUNC_REG_ARGS u64 arguments .
*/
nr_args = t ? btf_type_vlen ( t ) : MAX_BPF_FUNC_REG_ARGS ;
2019-10-24 17:18:11 -07:00
if ( prog - > aux - > attach_btf_trace ) {
/* skip first 'void *__data' argument in btf_trace_##name typedef */
args + + ;
nr_args - - ;
}
2019-11-14 10:57:04 -08:00
2020-03-30 16:42:46 +02:00
if ( arg > nr_args ) {
bpf_log ( log , " func '%s' doesn't have %d-th argument \n " ,
tname , arg + 1 ) ;
return false ;
}
2020-03-04 20:18:50 +01:00
if ( arg = = nr_args ) {
2020-03-30 16:42:46 +02:00
switch ( prog - > expected_attach_type ) {
2022-06-28 10:43:06 -07:00
case BPF_LSM_CGROUP :
2020-03-30 16:42:46 +02:00
case BPF_LSM_MAC :
case BPF_TRACE_FEXIT :
2020-03-29 01:43:52 +01:00
/* When LSM programs are attached to void LSM hooks
* they use FEXIT trampolines and when attached to
* int LSM hooks , they use MODIFY_RETURN trampolines .
*
* While the LSM programs are BPF_MODIFY_RETURN - like
* the check :
*
* if ( ret_type ! = ' int ' )
* return - EINVAL ;
*
* is _not_ done here . This is still safe as LSM hooks
* have only void and int return types .
*/
2020-03-04 20:18:50 +01:00
if ( ! t )
return true ;
t = btf_type_by_id ( btf , t - > type ) ;
2020-03-30 16:42:46 +02:00
break ;
case BPF_MODIFY_RETURN :
2020-03-04 20:18:50 +01:00
/* For now the BPF_MODIFY_RETURN can only be attached to
* functions that return an int .
*/
if ( ! t )
return false ;
t = btf_type_skip_modifiers ( btf , t - > type , NULL ) ;
2020-06-24 15:20:39 -07:00
if ( ! btf_type_is_small_int ( t ) ) {
2020-03-04 20:18:50 +01:00
bpf_log ( log ,
" ret type %s not allowed for fmod_ret \n " ,
2022-09-16 13:28:00 -07:00
btf_type_str ( t ) ) ;
2020-03-04 20:18:50 +01:00
return false ;
}
2020-03-30 16:42:46 +02:00
break ;
default :
bpf_log ( log , " func '%s' doesn't have %d-th argument \n " ,
tname , arg + 1 ) ;
return false ;
2020-03-04 20:18:50 +01:00
}
2019-11-14 10:57:04 -08:00
} else {
2019-11-14 10:57:17 -08:00
if ( ! t )
2021-02-26 00:26:29 +04:00
/* Default prog with MAX_BPF_FUNC_REG_ARGS args */
2019-11-14 10:57:17 -08:00
return true ;
t = btf_type_by_id ( btf , args [ arg ] . type ) ;
2019-10-15 20:25:00 -07:00
}
2020-03-30 16:42:46 +02:00
2019-10-15 20:25:00 -07:00
/* skip modifiers */
while ( btf_type_is_modifier ( t ) )
2019-11-14 10:57:17 -08:00
t = btf_type_by_id ( btf , t - > type ) ;
2022-08-31 08:26:46 -07:00
if ( btf_type_is_small_int ( t ) | | btf_is_any_enum ( t ) | | __btf_type_is_struct ( t ) )
2019-10-15 20:25:00 -07:00
/* accessing a scalar */
return true ;
if ( ! btf_type_is_ptr ( t ) ) {
bpf_log ( log ,
2019-10-24 17:18:11 -07:00
" func '%s' arg%d '%s' has type %s. Only pointer access is allowed \n " ,
2019-10-15 20:25:00 -07:00
tname , arg ,
2019-11-14 10:57:17 -08:00
__btf_name_by_offset ( btf , t - > name_off ) ,
2022-09-16 13:28:00 -07:00
btf_type_str ( t ) ) ;
2019-10-15 20:25:00 -07:00
return false ;
}
2020-07-23 11:41:11 -07:00
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */
for ( i = 0 ; i < prog - > aux - > ctx_arg_info_size ; i + + ) {
const struct bpf_ctx_arg_aux * ctx_arg_info = & prog - > aux - > ctx_arg_info [ i ] ;
2021-12-16 16:31:47 -08:00
u32 type , flag ;
2020-07-23 11:41:11 -07:00
2021-12-16 16:31:47 -08:00
type = base_type ( ctx_arg_info - > reg_type ) ;
flag = type_flag ( ctx_arg_info - > reg_type ) ;
2021-12-16 16:31:48 -08:00
if ( ctx_arg_info - > offset = = off & & type = = PTR_TO_BUF & &
2021-12-16 16:31:47 -08:00
( flag & PTR_MAYBE_NULL ) ) {
2020-07-23 11:41:11 -07:00
info - > reg_type = ctx_arg_info - > reg_type ;
return true ;
}
}
2019-10-15 20:25:00 -07:00
if ( t - > type = = 0 )
/* This is a pointer to void.
* It is the same as scalar from the verifier safety pov .
* No further pointer walking is allowed .
*/
return true ;
2021-12-08 20:32:41 +01:00
if ( is_int_ptr ( btf , t ) )
2020-01-23 17:15:06 +01:00
return true ;
2019-10-15 20:25:00 -07:00
/* this is a pointer to another type */
2020-05-13 11:02:21 -07:00
for ( i = 0 ; i < prog - > aux - > ctx_arg_info_size ; i + + ) {
const struct bpf_ctx_arg_aux * ctx_arg_info = & prog - > aux - > ctx_arg_info [ i ] ;
if ( ctx_arg_info - > offset = = off ) {
bpf: Emit better log message if bpf_iter ctx arg btf_id == 0
To avoid kernel build failure due to some missing .BTF-ids referenced
functions/types, the patch ([1]) tries to fill btf_id 0 for
these types.
In bpf verifier, for percpu variable and helper returning btf_id cases,
verifier already emitted proper warning with something like
verbose(env, "Helper has invalid btf_id in R%d\n", regno);
verbose(env, "invalid return type %d of func %s#%d\n",
fn->ret_type, func_id_name(func_id), func_id);
But this is not the case for bpf_iter context arguments.
I hacked resolve_btfids to encode btf_id 0 for struct task_struct.
With `./test_progs -n 7/5`, I got,
0: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_iter_task' arg0 has btf_id 29739 type STRUCT 'bpf_iter_meta'
; struct seq_file *seq = ctx->meta->seq;
1: (79) r6 = *(u64 *)(r2 +0)
; struct task_struct *task = ctx->task;
2: (79) r7 = *(u64 *)(r1 +8)
; if (task == (void *)0) {
3: (55) if r7 != 0x0 goto pc+11
...
; BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
26: (61) r1 = *(u32 *)(r7 +1372)
Type '(anon)' is not a struct
Basically, verifier will return btf_id 0 for task_struct.
Later on, when the code tries to access task->tgid, the
verifier correctly complains the type is '(anon)' and it is
not a struct. Users still need to backtrace to find out
what is going on.
Let us catch the invalid btf_id 0 earlier
and provide better message indicating btf_id is wrong.
The new error message looks like below:
R1 type=ctx expected=fp
; struct seq_file *seq = ctx->meta->seq;
0: (79) r2 = *(u64 *)(r1 +0)
func 'bpf_iter_task' arg0 has btf_id 29739 type STRUCT 'bpf_iter_meta'
; struct seq_file *seq = ctx->meta->seq;
1: (79) r6 = *(u64 *)(r2 +0)
; struct task_struct *task = ctx->task;
2: (79) r7 = *(u64 *)(r1 +8)
invalid btf_id for context argument offset 8
invalid bpf_context access off=8 size=8
[1] https://lore.kernel.org/bpf/20210727132532.2473636-1-hengqi.chen@gmail.com/
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20210728183025.1461750-1-yhs@fb.com
2021-07-28 11:30:25 -07:00
if ( ! ctx_arg_info - > btf_id ) {
bpf_log ( log , " invalid btf_id for context argument offset %u \n " , off ) ;
return false ;
}
2020-05-13 11:02:21 -07:00
info - > reg_type = ctx_arg_info - > reg_type ;
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
info - > btf = btf_vmlinux ;
2020-07-20 09:34:03 -07:00
info - > btf_id = ctx_arg_info - > btf_id ;
return true ;
2020-05-13 11:02:21 -07:00
}
}
2019-10-15 20:25:00 -07:00
2020-07-20 09:34:03 -07:00
info - > reg_type = PTR_TO_BTF_ID ;
2019-11-14 10:57:17 -08:00
if ( tgt_prog ) {
2020-09-29 14:45:52 +02:00
enum bpf_prog_type tgt_type ;
if ( tgt_prog - > type = = BPF_PROG_TYPE_EXT )
tgt_type = tgt_prog - > aux - > saved_dst_prog_type ;
else
tgt_type = tgt_prog - > type ;
ret = btf_translate_to_vmlinux ( log , btf , t , tgt_type , arg ) ;
2019-11-14 10:57:17 -08:00
if ( ret > 0 ) {
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
info - > btf = btf_vmlinux ;
2019-11-14 10:57:17 -08:00
info - > btf_id = ret ;
return true ;
} else {
return false ;
}
}
2020-01-08 16:34:56 -08:00
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
info - > btf = btf ;
2020-01-08 16:34:56 -08:00
info - > btf_id = t - > type ;
2019-11-14 10:57:17 -08:00
t = btf_type_by_id ( btf , t - > type ) ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
if ( btf_type_is_type_tag ( t ) ) {
tag_value = __btf_name_by_offset ( btf , t - > name_off ) ;
if ( strcmp ( tag_value , " user " ) = = 0 )
info - > reg_type | = MEM_USER ;
2022-03-04 11:16:56 -08:00
if ( strcmp ( tag_value , " percpu " ) = = 0 )
info - > reg_type | = MEM_PERCPU ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
}
2019-10-15 20:25:00 -07:00
/* skip modifiers */
2020-01-08 16:34:56 -08:00
while ( btf_type_is_modifier ( t ) ) {
info - > btf_id = t - > type ;
2019-11-14 10:57:17 -08:00
t = btf_type_by_id ( btf , t - > type ) ;
2020-01-08 16:34:56 -08:00
}
2019-10-15 20:25:00 -07:00
if ( ! btf_type_is_struct ( t ) ) {
bpf_log ( log ,
2019-10-24 17:18:11 -07:00
" func '%s' arg%d type %s is not a struct \n " ,
2022-09-16 13:28:00 -07:00
tname , arg , btf_type_str ( t ) ) ;
2019-10-15 20:25:00 -07:00
return false ;
}
2019-10-24 17:18:11 -07:00
bpf_log ( log , " func '%s' arg%d has btf_id %d type %s '%s' \n " ,
2022-09-16 13:28:00 -07:00
tname , arg , info - > btf_id , btf_type_str ( t ) ,
2019-11-14 10:57:17 -08:00
__btf_name_by_offset ( btf , t - > name_off ) ) ;
2019-10-15 20:25:00 -07:00
return true ;
}
2020-08-25 21:21:17 +02:00
enum bpf_struct_walk_result {
/* < 0 error */
WALK_SCALAR = 0 ,
WALK_PTR ,
WALK_STRUCT ,
} ;
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
static int btf_struct_walk ( struct bpf_verifier_log * log , const struct btf * btf ,
2020-08-25 21:21:17 +02:00
const struct btf_type * t , int off , int size ,
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
u32 * next_btf_id , enum bpf_type_flag * flag )
2019-10-15 20:25:00 -07:00
{
2019-11-07 10:09:03 -08:00
u32 i , moff , mtrue_end , msize = 0 , total_nelems = 0 ;
const struct btf_type * mtype , * elem_type = NULL ;
2019-10-15 20:25:00 -07:00
const struct btf_member * member ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
const char * tname , * mname , * tag_value ;
2020-08-25 21:21:17 +02:00
u32 vlen , elem_id , mid ;
2019-10-15 20:25:00 -07:00
again :
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
tname = __btf_name_by_offset ( btf , t - > name_off ) ;
2019-10-15 20:25:00 -07:00
if ( ! btf_type_is_struct ( t ) ) {
2020-01-08 16:34:56 -08:00
bpf_log ( log , " Type '%s' is not a struct \n " , tname ) ;
2019-10-15 20:25:00 -07:00
return - EINVAL ;
}
2020-05-09 10:59:16 -07:00
vlen = btf_type_vlen ( t ) ;
2020-01-08 16:35:01 -08:00
if ( off + size > t - > size ) {
2020-05-09 10:59:16 -07:00
/* If the last element is a variable size array, we may
* need to relax the rule .
*/
struct btf_array * array_elem ;
if ( vlen = = 0 )
goto error ;
member = btf_type_member ( t ) + vlen - 1 ;
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
mtype = btf_type_skip_modifiers ( btf , member - > type ,
2020-05-09 10:59:16 -07:00
NULL ) ;
if ( ! btf_type_is_array ( mtype ) )
goto error ;
array_elem = ( struct btf_array * ) ( mtype + 1 ) ;
if ( array_elem - > nelems ! = 0 )
goto error ;
2021-12-01 10:10:25 -08:00
moff = __btf_member_bit_offset ( t , member ) / 8 ;
2020-05-09 10:59:16 -07:00
if ( off < moff )
goto error ;
/* Only allow structure for now, can be relaxed for
* other types later .
*/
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
t = btf_type_skip_modifiers ( btf , array_elem - > type ,
2020-08-25 21:21:16 +02:00
NULL ) ;
if ( ! btf_type_is_struct ( t ) )
2020-05-09 10:59:16 -07:00
goto error ;
2020-08-25 21:21:16 +02:00
off = ( off - moff ) % t - > size ;
goto again ;
2020-05-09 10:59:16 -07:00
error :
2020-01-08 16:35:01 -08:00
bpf_log ( log , " access beyond struct %s at off %u size %u \n " ,
tname , off , size ) ;
return - EACCES ;
}
2019-10-15 20:25:00 -07:00
2020-01-08 16:35:01 -08:00
for_each_member ( i , t , member ) {
2019-11-07 10:09:03 -08:00
/* offset of the field in bytes */
2021-12-01 10:10:25 -08:00
moff = __btf_member_bit_offset ( t , member ) / 8 ;
2019-11-07 10:09:03 -08:00
if ( off + size < = moff )
2019-10-15 20:25:00 -07:00
/* won't find anything, field is already too far */
break ;
2020-01-08 16:35:01 -08:00
2021-12-01 10:10:25 -08:00
if ( __btf_member_bitfield_size ( t , member ) ) {
u32 end_bit = __btf_member_bit_offset ( t , member ) +
__btf_member_bitfield_size ( t , member ) ;
2020-01-08 16:35:01 -08:00
/* off <= moff instead of off == moff because clang
* does not generate a BTF member for anonymous
* bitfield like the " :16 " here :
* struct {
* int : 16 ;
* int x : 8 ;
* } ;
*/
if ( off < = moff & &
BITS_ROUNDUP_BYTES ( end_bit ) < = off + size )
2020-08-25 21:21:17 +02:00
return WALK_SCALAR ;
2020-01-08 16:35:01 -08:00
/* off may be accessing a following member
*
* or
*
* Doing partial access at either end of this
* bitfield . Continue on this case also to
* treat it as not accessing this bitfield
* and eventually error out as field not
* found to keep it simple .
* It could be relaxed if there was a legit
* partial access case later .
*/
continue ;
}
2019-11-07 10:09:03 -08:00
/* In case of "off" is pointing to holes of a struct */
if ( off < moff )
2020-01-08 16:35:01 -08:00
break ;
2019-10-15 20:25:00 -07:00
/* type of the field */
2020-08-25 21:21:17 +02:00
mid = member - > type ;
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
mtype = btf_type_by_id ( btf , member - > type ) ;
mname = __btf_name_by_offset ( btf , member - > name_off ) ;
2019-10-15 20:25:00 -07:00
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
mtype = __btf_resolve_size ( btf , mtype , & msize ,
2020-08-25 21:21:17 +02:00
& elem_type , & elem_id , & total_nelems ,
& mid ) ;
2019-11-07 10:09:03 -08:00
if ( IS_ERR ( mtype ) ) {
2019-10-15 20:25:00 -07:00
bpf_log ( log , " field %s doesn't have size \n " , mname ) ;
return - EFAULT ;
}
2019-11-07 10:09:03 -08:00
mtrue_end = moff + msize ;
if ( off > = mtrue_end )
2019-10-15 20:25:00 -07:00
/* no overlap with member, keep iterating */
continue ;
2019-11-07 10:09:03 -08:00
if ( btf_type_is_array ( mtype ) ) {
u32 elem_idx ;
2020-08-25 21:21:13 +02:00
/* __btf_resolve_size() above helps to
2019-11-07 10:09:03 -08:00
* linearize a multi - dimensional array .
*
* The logic here is treating an array
* in a struct as the following way :
*
* struct outer {
* struct inner array [ 2 ] [ 2 ] ;
* } ;
*
* looks like :
*
* struct outer {
* struct inner array_elem0 ;
* struct inner array_elem1 ;
* struct inner array_elem2 ;
* struct inner array_elem3 ;
* } ;
*
* When accessing outer - > array [ 1 ] [ 0 ] , it moves
* moff to " array_elem2 " , set mtype to
* " struct inner " , and msize also becomes
* sizeof ( struct inner ) . Then most of the
* remaining logic will fall through without
* caring the current member is an array or
* not .
*
* Unlike mtype / msize / moff , mtrue_end does not
* change . The naming difference ( " _true " ) tells
* that it is not always corresponding to
* the current mtype / msize / moff .
* It is the true end of the current
* member ( i . e . array in this case ) . That
* will allow an int array to be accessed like
* a scratch space ,
* i . e . allow access beyond the size of
* the array ' s element as long as it is
* within the mtrue_end boundary .
*/
/* skip empty array */
if ( moff = = mtrue_end )
continue ;
msize / = total_nelems ;
elem_idx = ( off - moff ) / msize ;
moff + = elem_idx * msize ;
mtype = elem_type ;
2020-08-25 21:21:17 +02:00
mid = elem_id ;
2019-11-07 10:09:03 -08:00
}
2019-10-15 20:25:00 -07:00
/* the 'off' we're looking for is either equal to start
* of this field or inside of this struct
*/
if ( btf_type_is_struct ( mtype ) ) {
/* our field must be inside that union or struct */
t = mtype ;
2020-08-25 21:21:17 +02:00
/* return if the offset matches the member offset */
if ( off = = moff ) {
* next_btf_id = mid ;
return WALK_STRUCT ;
}
2019-10-15 20:25:00 -07:00
/* adjust offset we're looking for */
2019-11-07 10:09:03 -08:00
off - = moff ;
2019-10-15 20:25:00 -07:00
goto again ;
}
if ( btf_type_is_ptr ( mtype ) ) {
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
const struct btf_type * stype , * t ;
enum bpf_type_flag tmp_flag = 0 ;
2020-01-31 16:03:14 -08:00
u32 id ;
2019-10-15 20:25:00 -07:00
2019-11-07 10:09:03 -08:00
if ( msize ! = size | | off ! = moff ) {
bpf_log ( log ,
" cannot access ptr member %s with moff %u in struct %s with off %u size %u \n " ,
mname , moff , tname , off , size ) ;
return - EACCES ;
}
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
2022-03-04 11:16:56 -08:00
/* check type tag */
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
t = btf_type_by_id ( btf , mtype - > type ) ;
if ( btf_type_is_type_tag ( t ) ) {
tag_value = __btf_name_by_offset ( btf , t - > name_off ) ;
2022-03-04 11:16:56 -08:00
/* check __user tag */
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
if ( strcmp ( tag_value , " user " ) = = 0 )
tmp_flag = MEM_USER ;
2022-03-04 11:16:56 -08:00
/* check __percpu tag */
if ( strcmp ( tag_value , " percpu " ) = = 0 )
tmp_flag = MEM_PERCPU ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
}
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
stype = btf_type_skip_modifiers ( btf , mtype - > type , & id ) ;
2019-10-15 20:25:00 -07:00
if ( btf_type_is_struct ( stype ) ) {
2020-01-31 16:03:14 -08:00
* next_btf_id = id ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
* flag = tmp_flag ;
2020-08-25 21:21:17 +02:00
return WALK_PTR ;
2019-10-15 20:25:00 -07:00
}
}
2019-11-07 10:09:03 -08:00
/* Allow more flexible access within an int as long as
* it is within mtrue_end .
* Since mtrue_end could be the end of an array ,
* that also allows using an array of int as a scratch
* space . e . g . skb - > cb [ ] .
*/
if ( off + size > mtrue_end ) {
bpf_log ( log ,
" access beyond the end of member %s (mend:%u) in struct %s with off %u size %u \n " ,
mname , mtrue_end , tname , off , size ) ;
return - EACCES ;
}
2020-08-25 21:21:17 +02:00
return WALK_SCALAR ;
2019-10-15 20:25:00 -07:00
}
bpf_log ( log , " struct %s doesn't have field at offset %d \n " , tname , off ) ;
return - EINVAL ;
}
2022-11-15 00:45:28 +05:30
int btf_struct_access ( struct bpf_verifier_log * log ,
const struct bpf_reg_state * reg ,
int off , int size , enum bpf_access_type atype __maybe_unused ,
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
u32 * next_btf_id , enum bpf_type_flag * flag )
2020-08-25 21:21:17 +02:00
{
2022-11-15 00:45:28 +05:30
const struct btf * btf = reg - > btf ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
enum bpf_type_flag tmp_flag = 0 ;
2022-11-15 00:45:28 +05:30
const struct btf_type * t ;
u32 id = reg - > btf_id ;
2020-08-25 21:21:17 +02:00
int err ;
bpf: Recognize lock and list fields in allocated objects
Allow specifying bpf_spin_lock, bpf_list_head, bpf_list_node fields in a
allocated object.
Also update btf_struct_access to reject direct access to these special
fields.
A bpf_list_head allows implementing map-in-map style use cases, where an
allocated object with bpf_list_head is linked into a list in a map
value. This would require embedding a bpf_list_node, support for which
is also included. The bpf_spin_lock is used to protect the bpf_list_head
and other data.
While we strictly don't require to hold a bpf_spin_lock while touching
the bpf_list_head in such objects, as when have access to it, we have
complete ownership of the object, the locking constraint is still kept
and may be conditionally lifted in the future.
Note that the specification of such types can be done just like map
values, e.g.:
struct bar {
struct bpf_list_node node;
};
struct foo {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(bar, node);
struct bpf_list_node node;
};
struct map_value {
struct bpf_spin_lock lock;
struct bpf_list_head head __contains(foo, node);
};
To recognize such types in user BTF, we build a btf_struct_metas array
of metadata items corresponding to each BTF ID. This is done once during
the btf_parse stage to avoid having to do it each time during the
verification process's requirement to inspect the metadata.
Moreover, the computed metadata needs to be passed to some helpers in
future patches which requires allocating them and storing them in the
BTF that is pinned by the program itself, so that valid access can be
assumed to such data during program runtime.
A key thing to note is that once a btf_struct_meta is available for a
type, both the btf_record and btf_field_offs should be available. It is
critical that btf_field_offs is available in case special fields are
present, as we extensively rely on special fields being zeroed out in
map values and allocated objects in later patches. The code ensures that
by bailing out in case of errors and ensuring both are available
together. If the record is not available, the special fields won't be
recognized, so not having both is also fine (in terms of being a
verification error and not a runtime bug).
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221118015614.2013203-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-18 07:25:56 +05:30
while ( type_is_alloc ( reg - > type ) ) {
struct btf_struct_meta * meta ;
struct btf_record * rec ;
int i ;
meta = btf_find_struct_meta ( btf , id ) ;
if ( ! meta )
break ;
rec = meta - > record ;
for ( i = 0 ; i < rec - > cnt ; i + + ) {
struct btf_field * field = & rec - > fields [ i ] ;
u32 offset = field - > offset ;
if ( off < offset + btf_field_type_size ( field - > type ) & & offset < off + size ) {
bpf_log ( log ,
" direct access to %s is disallowed \n " ,
btf_field_type_name ( field - > type ) ) ;
return - EACCES ;
}
}
break ;
}
2022-11-15 00:45:28 +05:30
t = btf_type_by_id ( btf , id ) ;
2020-08-25 21:21:17 +02:00
do {
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
err = btf_struct_walk ( log , btf , t , off , size , & id , & tmp_flag ) ;
2020-08-25 21:21:17 +02:00
switch ( err ) {
case WALK_PTR :
2022-11-18 07:25:55 +05:30
/* For local types, the destination register cannot
* become a pointer again .
*/
if ( type_is_alloc ( reg - > type ) )
return SCALAR_VALUE ;
2020-08-25 21:21:17 +02:00
/* If we found the pointer or scalar on t+off,
* we ' re done .
*/
* next_btf_id = id ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
* flag = tmp_flag ;
2020-08-25 21:21:17 +02:00
return PTR_TO_BTF_ID ;
case WALK_SCALAR :
return SCALAR_VALUE ;
case WALK_STRUCT :
/* We found nested struct, so continue the search
* by diving in it . At this point the offset is
* aligned with the new type , so set it to 0.
*/
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
t = btf_type_by_id ( btf , id ) ;
2020-08-25 21:21:17 +02:00
off = 0 ;
break ;
default :
/* It's either error or unknown return value..
* scream and leave .
*/
if ( WARN_ONCE ( err > 0 , " unknown btf_struct_walk return value " ) )
return - EINVAL ;
return err ;
}
} while ( t ) ;
return - EINVAL ;
}
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
/* Check that two BTF types, each specified as an BTF object + id, are exactly
* the same . Trivial ID check is not enough due to module BTFs , because we can
* end up with two different module BTFs , but IDs point to the common type in
* vmlinux BTF .
*/
static bool btf_types_are_same ( const struct btf * btf1 , u32 id1 ,
const struct btf * btf2 , u32 id2 )
{
if ( id1 ! = id2 )
return false ;
if ( btf1 = = btf2 )
return true ;
return btf_type_by_id ( btf1 , id1 ) = = btf_type_by_id ( btf2 , id2 ) ;
}
2020-08-25 21:21:18 +02:00
bool btf_struct_ids_match ( struct bpf_verifier_log * log ,
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
const struct btf * btf , u32 id , int off ,
2022-04-25 03:18:57 +05:30
const struct btf * need_btf , u32 need_type_id ,
bool strict )
2020-08-25 21:21:18 +02:00
{
const struct btf_type * type ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
enum bpf_type_flag flag ;
2020-08-25 21:21:18 +02:00
int err ;
/* Are we already done? */
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
if ( off = = 0 & & btf_types_are_same ( btf , id , need_btf , need_type_id ) )
2020-08-25 21:21:18 +02:00
return true ;
2022-04-25 03:18:57 +05:30
/* In case of strict type match, we do not walk struct, the top level
* type match must succeed . When strict is true , off should have already
* been 0.
*/
if ( strict )
return false ;
2020-08-25 21:21:18 +02:00
again :
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
type = btf_type_by_id ( btf , id ) ;
2020-08-25 21:21:18 +02:00
if ( ! type )
return false ;
bpf: reject program if a __user tagged memory accessed in kernel way
BPF verifier supports direct memory access for BPF_PROG_TYPE_TRACING type
of bpf programs, e.g., a->b. If "a" is a pointer
pointing to kernel memory, bpf verifier will allow user to write
code in C like a->b and the verifier will translate it to a kernel
load properly. If "a" is a pointer to user memory, it is expected
that bpf developer should be bpf_probe_read_user() helper to
get the value a->b. Without utilizing BTF __user tagging information,
current verifier will assume that a->b is a kernel memory access
and this may generate incorrect result.
Now BTF contains __user information, it can check whether the
pointer points to a user memory or not. If it is, the verifier
can reject the program and force users to use bpf_probe_read_user()
helper explicitly.
In the future, we can easily extend btf_add_space for other
address space tagging, for example, rcu/percpu etc.
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220127154606.654961-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-27 07:46:06 -08:00
err = btf_struct_walk ( log , btf , type , off , 1 , & id , & flag ) ;
2020-08-25 21:21:18 +02:00
if ( err ! = WALK_STRUCT )
return false ;
/* We found nested struct object. If it matches
* the requested ID , we ' re done . Otherwise let ' s
* continue the search with offset 0 in the new
* type .
*/
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
if ( ! btf_types_are_same ( btf , id , need_btf , need_type_id ) ) {
2020-08-25 21:21:18 +02:00
off = 0 ;
goto again ;
}
return true ;
}
2019-11-14 10:57:04 -08:00
static int __get_type_size ( struct btf * btf , u32 btf_id ,
2022-08-07 10:51:16 -07:00
const struct btf_type * * ret_type )
2019-11-14 10:57:04 -08:00
{
const struct btf_type * t ;
2022-08-07 10:51:16 -07:00
* ret_type = btf_type_by_id ( btf , 0 ) ;
2019-11-14 10:57:04 -08:00
if ( ! btf_id )
/* void */
return 0 ;
t = btf_type_by_id ( btf , btf_id ) ;
while ( t & & btf_type_is_modifier ( t ) )
t = btf_type_by_id ( btf , t - > type ) ;
2022-08-07 10:51:16 -07:00
if ( ! t )
2019-11-14 10:57:04 -08:00
return - EINVAL ;
2022-08-07 10:51:16 -07:00
* ret_type = t ;
2019-11-14 10:57:04 -08:00
if ( btf_type_is_ptr ( t ) )
/* kernel size of pointer. Not BPF's size of pointer*/
return sizeof ( void * ) ;
2022-08-31 08:26:46 -07:00
if ( btf_type_is_int ( t ) | | btf_is_any_enum ( t ) | | __btf_type_is_struct ( t ) )
2019-11-14 10:57:04 -08:00
return t - > size ;
return - EINVAL ;
}
int btf_distill_func_proto ( struct bpf_verifier_log * log ,
struct btf * btf ,
const struct btf_type * func ,
const char * tname ,
struct btf_func_model * m )
{
const struct btf_param * args ;
const struct btf_type * t ;
u32 i , nargs ;
int ret ;
2019-11-14 10:57:17 -08:00
if ( ! func ) {
/* BTF function prototype doesn't match the verifier types.
2021-02-26 00:26:29 +04:00
* Fall back to MAX_BPF_FUNC_REG_ARGS u64 args .
2019-11-14 10:57:17 -08:00
*/
2022-08-31 08:26:46 -07:00
for ( i = 0 ; i < MAX_BPF_FUNC_REG_ARGS ; i + + ) {
2019-11-14 10:57:17 -08:00
m - > arg_size [ i ] = 8 ;
2022-08-31 08:26:46 -07:00
m - > arg_flags [ i ] = 0 ;
}
2019-11-14 10:57:17 -08:00
m - > ret_size = 8 ;
2021-02-26 00:26:29 +04:00
m - > nr_args = MAX_BPF_FUNC_REG_ARGS ;
2019-11-14 10:57:17 -08:00
return 0 ;
}
2019-11-14 10:57:04 -08:00
args = ( const struct btf_param * ) ( func + 1 ) ;
nargs = btf_type_vlen ( func ) ;
2022-03-25 00:42:38 +08:00
if ( nargs > MAX_BPF_FUNC_ARGS ) {
2019-11-14 10:57:04 -08:00
bpf_log ( log ,
" The function %s has %d arguments. Too many. \n " ,
tname , nargs ) ;
return - EINVAL ;
}
ret = __get_type_size ( btf , func - > type , & t ) ;
2022-08-31 08:26:46 -07:00
if ( ret < 0 | | __btf_type_is_struct ( t ) ) {
2019-11-14 10:57:04 -08:00
bpf_log ( log ,
" The function %s return type %s is unsupported. \n " ,
2022-09-16 13:28:00 -07:00
tname , btf_type_str ( t ) ) ;
2019-11-14 10:57:04 -08:00
return - EINVAL ;
}
m - > ret_size = ret ;
for ( i = 0 ; i < nargs ; i + + ) {
2021-05-05 15:25:29 +02:00
if ( i = = nargs - 1 & & args [ i ] . type = = 0 ) {
bpf_log ( log ,
" The function %s with variable args is unsupported. \n " ,
tname ) ;
return - EINVAL ;
}
2019-11-14 10:57:04 -08:00
ret = __get_type_size ( btf , args [ i ] . type , & t ) ;
2022-08-31 08:26:46 -07:00
/* No support of struct argument size greater than 16 bytes */
if ( ret < 0 | | ret > 16 ) {
2019-11-14 10:57:04 -08:00
bpf_log ( log ,
" The function %s arg%d type %s is unsupported. \n " ,
2022-09-16 13:28:00 -07:00
tname , i , btf_type_str ( t ) ) ;
2019-11-14 10:57:04 -08:00
return - EINVAL ;
}
2021-05-05 15:25:29 +02:00
if ( ret = = 0 ) {
bpf_log ( log ,
" The function %s has malformed void argument. \n " ,
tname ) ;
return - EINVAL ;
}
2019-11-14 10:57:04 -08:00
m - > arg_size [ i ] = ret ;
2022-08-31 08:26:46 -07:00
m - > arg_flags [ i ] = __btf_type_is_struct ( t ) ? BTF_FMODEL_STRUCT_ARG : 0 ;
2019-11-14 10:57:04 -08:00
}
m - > nr_args = nargs ;
return 0 ;
}
2020-01-20 16:53:46 -08:00
/* Compare BTFs of two functions assuming only scalars and pointers to context.
* t1 points to BTF_KIND_FUNC in btf1
* t2 points to BTF_KIND_FUNC in btf2
* Returns :
* EINVAL - function prototype mismatch
* EFAULT - verifier bug
* 0 - 99 % match . The last 1 % is validated by the verifier .
*/
2020-02-10 09:14:41 +08:00
static int btf_check_func_type_match ( struct bpf_verifier_log * log ,
struct btf * btf1 , const struct btf_type * t1 ,
struct btf * btf2 , const struct btf_type * t2 )
2020-01-20 16:53:46 -08:00
{
const struct btf_param * args1 , * args2 ;
const char * fn1 , * fn2 , * s1 , * s2 ;
u32 nargs1 , nargs2 , i ;
fn1 = btf_name_by_offset ( btf1 , t1 - > name_off ) ;
fn2 = btf_name_by_offset ( btf2 , t2 - > name_off ) ;
if ( btf_func_linkage ( t1 ) ! = BTF_FUNC_GLOBAL ) {
bpf_log ( log , " %s() is not a global function \n " , fn1 ) ;
return - EINVAL ;
}
if ( btf_func_linkage ( t2 ) ! = BTF_FUNC_GLOBAL ) {
bpf_log ( log , " %s() is not a global function \n " , fn2 ) ;
return - EINVAL ;
}
t1 = btf_type_by_id ( btf1 , t1 - > type ) ;
if ( ! t1 | | ! btf_type_is_func_proto ( t1 ) )
return - EFAULT ;
t2 = btf_type_by_id ( btf2 , t2 - > type ) ;
if ( ! t2 | | ! btf_type_is_func_proto ( t2 ) )
return - EFAULT ;
args1 = ( const struct btf_param * ) ( t1 + 1 ) ;
nargs1 = btf_type_vlen ( t1 ) ;
args2 = ( const struct btf_param * ) ( t2 + 1 ) ;
nargs2 = btf_type_vlen ( t2 ) ;
if ( nargs1 ! = nargs2 ) {
bpf_log ( log , " %s() has %d args while %s() has %d args \n " ,
fn1 , nargs1 , fn2 , nargs2 ) ;
return - EINVAL ;
}
t1 = btf_type_skip_modifiers ( btf1 , t1 - > type , NULL ) ;
t2 = btf_type_skip_modifiers ( btf2 , t2 - > type , NULL ) ;
if ( t1 - > info ! = t2 - > info ) {
bpf_log ( log ,
" Return type %s of %s() doesn't match type %s of %s() \n " ,
btf_type_str ( t1 ) , fn1 ,
btf_type_str ( t2 ) , fn2 ) ;
return - EINVAL ;
}
for ( i = 0 ; i < nargs1 ; i + + ) {
t1 = btf_type_skip_modifiers ( btf1 , args1 [ i ] . type , NULL ) ;
t2 = btf_type_skip_modifiers ( btf2 , args2 [ i ] . type , NULL ) ;
if ( t1 - > info ! = t2 - > info ) {
bpf_log ( log , " arg%d in %s() is %s while %s() has %s \n " ,
i , fn1 , btf_type_str ( t1 ) ,
fn2 , btf_type_str ( t2 ) ) ;
return - EINVAL ;
}
if ( btf_type_has_size ( t1 ) & & t1 - > size ! = t2 - > size ) {
bpf_log ( log ,
" arg%d in %s() has size %d while %s() has %d \n " ,
i , fn1 , t1 - > size ,
fn2 , t2 - > size ) ;
return - EINVAL ;
}
/* global functions are validated with scalars and pointers
* to context only . And only global functions can be replaced .
* Hence type check only those types .
*/
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
if ( btf_type_is_int ( t1 ) | | btf_is_any_enum ( t1 ) )
2020-01-20 16:53:46 -08:00
continue ;
if ( ! btf_type_is_ptr ( t1 ) ) {
bpf_log ( log ,
" arg%d in %s() has unrecognized type \n " ,
i , fn1 ) ;
return - EINVAL ;
}
t1 = btf_type_skip_modifiers ( btf1 , t1 - > type , NULL ) ;
t2 = btf_type_skip_modifiers ( btf2 , t2 - > type , NULL ) ;
if ( ! btf_type_is_struct ( t1 ) ) {
bpf_log ( log ,
" arg%d in %s() is not a pointer to context \n " ,
i , fn1 ) ;
return - EINVAL ;
}
if ( ! btf_type_is_struct ( t2 ) ) {
bpf_log ( log ,
" arg%d in %s() is not a pointer to context \n " ,
i , fn2 ) ;
return - EINVAL ;
}
/* This is an optional check to make program writing easier.
* Compare names of structs and report an error to the user .
* btf_prepare_func_args ( ) already checked that t2 struct
* is a context type . btf_prepare_func_args ( ) will check
* later that t1 struct is a context type as well .
*/
s1 = btf_name_by_offset ( btf1 , t1 - > name_off ) ;
s2 = btf_name_by_offset ( btf2 , t2 - > name_off ) ;
if ( strcmp ( s1 , s2 ) ) {
bpf_log ( log ,
" arg%d %s(struct %s *) doesn't match %s(struct %s *) \n " ,
i , fn1 , s1 , fn2 , s2 ) ;
return - EINVAL ;
}
}
return 0 ;
}
/* Compare BTFs of given program with BTF of target program */
2020-09-25 23:25:01 +02:00
int btf_check_type_match ( struct bpf_verifier_log * log , const struct bpf_prog * prog ,
2020-01-20 16:53:46 -08:00
struct btf * btf2 , const struct btf_type * t2 )
{
struct btf * btf1 = prog - > aux - > btf ;
const struct btf_type * t1 ;
u32 btf_id = 0 ;
if ( ! prog - > aux - > func_info ) {
2020-09-25 23:25:01 +02:00
bpf_log ( log , " Program extension requires BTF \n " ) ;
2020-01-20 16:53:46 -08:00
return - EINVAL ;
}
btf_id = prog - > aux - > func_info [ 0 ] . type_id ;
if ( ! btf_id )
return - EFAULT ;
t1 = btf_type_by_id ( btf1 , btf_id ) ;
if ( ! t1 | | ! btf_type_is_func ( t1 ) )
return - EFAULT ;
2020-09-25 23:25:01 +02:00
return btf_check_func_type_match ( log , btf1 , t1 , btf2 , t2 ) ;
2020-01-20 16:53:46 -08:00
}
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
static u32 * reg2btf_ids [ __BPF_REG_TYPE_MAX ] = {
# ifdef CONFIG_NET
[ PTR_TO_SOCKET ] = & btf_sock_ids [ BTF_SOCK_TYPE_SOCK ] ,
[ PTR_TO_SOCK_COMMON ] = & btf_sock_ids [ BTF_SOCK_TYPE_SOCK_COMMON ] ,
[ PTR_TO_TCP_SOCK ] = & btf_sock_ids [ BTF_SOCK_TYPE_TCP ] ,
# endif
} ;
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
/* Returns true if struct is composed of scalars, 4 levels of nesting allowed */
static bool __btf_type_is_scalar_struct ( struct bpf_verifier_log * log ,
const struct btf * btf ,
const struct btf_type * t , int rec )
{
const struct btf_type * member_type ;
const struct btf_member * member ;
u32 i ;
if ( ! btf_type_is_struct ( t ) )
return false ;
for_each_member ( i , t , member ) {
const struct btf_array * array ;
member_type = btf_type_skip_modifiers ( btf , member - > type , NULL ) ;
if ( btf_type_is_struct ( member_type ) ) {
if ( rec > = 3 ) {
bpf_log ( log , " max struct nesting depth exceeded \n " ) ;
return false ;
}
if ( ! __btf_type_is_scalar_struct ( log , btf , member_type , rec + 1 ) )
return false ;
continue ;
}
if ( btf_type_is_array ( member_type ) ) {
array = btf_type_array ( member_type ) ;
if ( ! array - > nelems )
return false ;
member_type = btf_type_skip_modifiers ( btf , array - > type , NULL ) ;
if ( ! btf_type_is_scalar ( member_type ) )
return false ;
continue ;
}
if ( ! btf_type_is_scalar ( member_type ) )
return false ;
}
return true ;
}
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
static bool is_kfunc_arg_mem_size ( const struct btf * btf ,
const struct btf_param * arg ,
const struct bpf_reg_state * reg )
{
int len , sfx_len = sizeof ( " __sz " ) - 1 ;
const struct btf_type * t ;
const char * param_name ;
t = btf_type_skip_modifiers ( btf , arg - > type , NULL ) ;
if ( ! btf_type_is_scalar ( t ) | | reg - > type ! = SCALAR_VALUE )
return false ;
/* In the future, this can be ported to use BTF tagging */
param_name = btf_name_by_offset ( btf , arg - > name_off ) ;
if ( str_is_empty ( param_name ) )
return false ;
len = strlen ( param_name ) ;
if ( len < sfx_len )
return false ;
param_name + = len - sfx_len ;
if ( strncmp ( param_name , " __sz " , sfx_len ) )
return false ;
return true ;
}
2022-09-06 17:13:02 +02:00
static bool btf_is_kfunc_arg_mem_size ( const struct btf * btf ,
const struct btf_param * arg ,
const struct bpf_reg_state * reg ,
const char * name )
{
int len , target_len = strlen ( name ) ;
const struct btf_type * t ;
const char * param_name ;
t = btf_type_skip_modifiers ( btf , arg - > type , NULL ) ;
if ( ! btf_type_is_scalar ( t ) | | reg - > type ! = SCALAR_VALUE )
return false ;
param_name = btf_name_by_offset ( btf , arg - > name_off ) ;
if ( str_is_empty ( param_name ) )
return false ;
len = strlen ( param_name ) ;
if ( len ! = target_len )
return false ;
if ( strcmp ( param_name , name ) )
return false ;
return true ;
}
2021-03-24 18:51:36 -07:00
static int btf_check_func_arg_match ( struct bpf_verifier_env * env ,
const struct btf * btf , u32 func_id ,
struct bpf_reg_state * regs ,
2022-07-21 15:42:35 +02:00
bool ptr_to_mem_ok ,
2022-09-06 17:13:02 +02:00
struct bpf_kfunc_arg_meta * kfunc_meta ,
2022-09-06 17:12:58 +02:00
bool processing_call )
2019-11-14 10:57:16 -08:00
{
2022-06-06 09:52:51 +02:00
enum bpf_prog_type prog_type = resolve_prog_type ( env - > prog ) ;
2022-09-21 18:48:25 +02:00
bool rel = false , kptr_get = false , trusted_args = false ;
2022-08-05 14:48:14 -07:00
bool sleepable = false ;
2019-11-14 10:57:16 -08:00
struct bpf_verifier_log * log = & env - > log ;
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:48 +05:30
u32 i , nargs , ref_id , ref_obj_id = 0 ;
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
bool is_kfunc = btf_is_kernel ( btf ) ;
2021-03-24 18:51:36 -07:00
const char * func_name , * ref_tname ;
2021-02-13 00:56:41 +04:00
const struct btf_type * t , * ref_t ;
2021-03-24 18:51:36 -07:00
const struct btf_param * args ;
bpf: Fix PTR_TO_BTF_ID var_off check
When kfunc support was added, check_ctx_reg was called for PTR_TO_CTX
register, but no offset checks were made for PTR_TO_BTF_ID. Only
reg->off was taken into account by btf_struct_ids_match, which protected
against type mismatch due to non-zero reg->off, but when reg->off was
zero, a user could set the variable offset of the register and allow it
to be passed to kfunc, leading to bad pointer being passed into the
kernel.
Fix this by reusing the extracted helper check_func_arg_reg_off from
previous commit, and make one call before checking all supported
register types. Since the list is maintained, any future changes will be
taken into account by updating check_func_arg_reg_off. This function
prevents non-zero var_off to be set for PTR_TO_BTF_ID, but still allows
a fixed non-zero reg->off, which is needed for type matching to work
correctly when using pointer arithmetic.
ARG_DONTCARE is passed as arg_type, since kfunc doesn't support
accepting a ARG_PTR_TO_ALLOC_MEM without relying on size of parameter
type from BTF (in case of pointer), or using a mem, len pair. The
forcing of offset check for ARG_PTR_TO_ALLOC_MEM is done because ringbuf
helpers obtain the size from the header located at the beginning of the
memory region, hence any changes to the original pointer shouldn't be
allowed. In case of kfunc, size is always known, either at verification
time, or using the length parameter, hence this forcing is not required.
Since this check will happen once already for PTR_TO_CTX, remove the
check_ptr_off_reg call inside its block.
Fixes: e6ac2450d6de ("bpf: Support bpf program calling kernel function")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220304224645.3677453-3-memxor@gmail.com
2022-03-05 04:16:39 +05:30
int ref_regno = 0 , ret ;
2019-11-14 10:57:16 -08:00
2021-03-24 18:51:36 -07:00
t = btf_type_by_id ( btf , func_id ) ;
2019-11-14 10:57:16 -08:00
if ( ! t | | ! btf_type_is_func ( t ) ) {
2020-01-09 22:41:20 -08:00
/* These checks were already done by the verifier while loading
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
* struct bpf_func_info or in add_kfunc_call ( ) .
2020-01-09 22:41:20 -08:00
*/
2021-03-24 18:51:36 -07:00
bpf_log ( log , " BTF of func_id %u doesn't point to KIND_FUNC \n " ,
func_id ) ;
2020-01-09 22:41:20 -08:00
return - EFAULT ;
2019-11-14 10:57:16 -08:00
}
2021-03-24 18:51:36 -07:00
func_name = btf_name_by_offset ( btf , t - > name_off ) ;
2019-11-14 10:57:16 -08:00
t = btf_type_by_id ( btf , t - > type ) ;
if ( ! t | | ! btf_type_is_func_proto ( t ) ) {
2021-03-24 18:51:36 -07:00
bpf_log ( log , " Invalid BTF of func %s \n " , func_name ) ;
2020-01-09 22:41:20 -08:00
return - EFAULT ;
2019-11-14 10:57:16 -08:00
}
args = ( const struct btf_param * ) ( t + 1 ) ;
nargs = btf_type_vlen ( t ) ;
2021-02-26 00:26:29 +04:00
if ( nargs > MAX_BPF_FUNC_REG_ARGS ) {
2021-03-24 18:51:36 -07:00
bpf_log ( log , " Function %s has %d > %d args \n " , func_name , nargs ,
2021-02-26 00:26:29 +04:00
MAX_BPF_FUNC_REG_ARGS ) ;
2021-03-24 18:51:36 -07:00
return - EINVAL ;
2019-11-14 10:57:16 -08:00
}
2021-02-13 00:56:41 +04:00
2022-09-06 17:13:02 +02:00
if ( is_kfunc & & kfunc_meta ) {
bpf: Teach verifier about kptr_get kfunc helpers
We introduce a new style of kfunc helpers, namely *_kptr_get, where they
take pointer to the map value which points to a referenced kernel
pointer contained in the map. Since this is referenced, only
bpf_kptr_xchg from BPF side and xchg from kernel side is allowed to
change the current value, and each pointer that resides in that location
would be referenced, and RCU protected (this must be kept in mind while
adding kernel types embeddable as reference kptr in BPF maps).
This means that if do the load of the pointer value in an RCU read
section, and find a live pointer, then as long as we hold RCU read lock,
it won't be freed by a parallel xchg + release operation. This allows us
to implement a safe refcount increment scheme. Hence, enforce that first
argument of all such kfunc is a proper PTR_TO_MAP_VALUE pointing at the
right offset to referenced pointer.
For the rest of the arguments, they are subjected to typical kfunc
argument checks, hence allowing some flexibility in passing more intent
into how the reference should be taken.
For instance, in case of struct nf_conn, it is not freed until RCU grace
period ends, but can still be reused for another tuple once refcount has
dropped to zero. Hence, a bpf_ct_kptr_get helper not only needs to call
refcount_inc_not_zero, but also do a tuple match after incrementing the
reference, and when it fails to match it, put the reference again and
return NULL.
This can be implemented easily if we allow passing additional parameters
to the bpf_ct_kptr_get kfunc, like a struct bpf_sock_tuple * and a
tuple__sz pair.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-9-memxor@gmail.com
2022-04-25 03:18:56 +05:30
/* Only kfunc can be release func */
2022-09-06 17:13:02 +02:00
rel = kfunc_meta - > flags & KF_RELEASE ;
kptr_get = kfunc_meta - > flags & KF_KPTR_GET ;
2022-09-21 18:48:25 +02:00
trusted_args = kfunc_meta - > flags & KF_TRUSTED_ARGS ;
2022-09-06 17:13:02 +02:00
sleepable = kfunc_meta - > flags & KF_SLEEPABLE ;
bpf: Teach verifier about kptr_get kfunc helpers
We introduce a new style of kfunc helpers, namely *_kptr_get, where they
take pointer to the map value which points to a referenced kernel
pointer contained in the map. Since this is referenced, only
bpf_kptr_xchg from BPF side and xchg from kernel side is allowed to
change the current value, and each pointer that resides in that location
would be referenced, and RCU protected (this must be kept in mind while
adding kernel types embeddable as reference kptr in BPF maps).
This means that if do the load of the pointer value in an RCU read
section, and find a live pointer, then as long as we hold RCU read lock,
it won't be freed by a parallel xchg + release operation. This allows us
to implement a safe refcount increment scheme. Hence, enforce that first
argument of all such kfunc is a proper PTR_TO_MAP_VALUE pointing at the
right offset to referenced pointer.
For the rest of the arguments, they are subjected to typical kfunc
argument checks, hence allowing some flexibility in passing more intent
into how the reference should be taken.
For instance, in case of struct nf_conn, it is not freed until RCU grace
period ends, but can still be reused for another tuple once refcount has
dropped to zero. Hence, a bpf_ct_kptr_get helper not only needs to call
refcount_inc_not_zero, but also do a tuple match after incrementing the
reference, and when it fails to match it, put the reference again and
return NULL.
This can be implemented easily if we allow passing additional parameters
to the bpf_ct_kptr_get kfunc, like a struct bpf_sock_tuple * and a
tuple__sz pair.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-9-memxor@gmail.com
2022-04-25 03:18:56 +05:30
}
2019-11-14 10:57:16 -08:00
/* check that BTF function arguments match actual types that the
* verifier sees .
*/
for ( i = 0 ; i < nargs ; i + + ) {
bpf: Tag argument to be released in bpf_func_proto
Add a new type flag for bpf_arg_type that when set tells verifier that
for a release function, that argument's register will be the one for
which meta.ref_obj_id will be set, and which will then be released
using release_reference. To capture the regno, introduce a new field
release_regno in bpf_call_arg_meta.
This would be required in the next patch, where we may either pass NULL
or a refcounted pointer as an argument to the release function
bpf_kptr_xchg. Just releasing only when meta.ref_obj_id is set is not
enough, as there is a case where the type of argument needed matches,
but the ref_obj_id is set to 0. Hence, we must enforce that whenever
meta.ref_obj_id is zero, the register that is to be released can only
be NULL for a release function.
Since we now indicate whether an argument is to be released in
bpf_func_proto itself, is_release_function helper has lost its utitlity,
hence refactor code to work without it, and just rely on
meta.release_regno to know when to release state for a ref_obj_id.
Still, the restriction of one release argument and only one ref_obj_id
passed to BPF helper or kfunc remains. This may be lifted in the future.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-3-memxor@gmail.com
2022-04-25 03:18:50 +05:30
enum bpf_arg_type arg_type = ARG_DONTCARE ;
2021-03-24 18:51:36 -07:00
u32 regno = i + 1 ;
struct bpf_reg_state * reg = & regs [ regno ] ;
2022-09-21 18:48:25 +02:00
bool obj_ptr = false ;
2021-02-13 00:56:39 +04:00
2021-03-24 18:51:36 -07:00
t = btf_type_skip_modifiers ( btf , args [ i ] . type , NULL ) ;
if ( btf_type_is_scalar ( t ) ) {
2022-09-06 17:13:02 +02:00
if ( is_kfunc & & kfunc_meta ) {
bool is_buf_size = false ;
/* check for any const scalar parameter of name "rdonly_buf_size"
* or " rdwr_buf_size "
*/
if ( btf_is_kfunc_arg_mem_size ( btf , & args [ i ] , reg ,
" rdonly_buf_size " ) ) {
kfunc_meta - > r0_rdonly = true ;
is_buf_size = true ;
} else if ( btf_is_kfunc_arg_mem_size ( btf , & args [ i ] , reg ,
" rdwr_buf_size " ) )
is_buf_size = true ;
if ( is_buf_size ) {
if ( kfunc_meta - > r0_size ) {
bpf_log ( log , " 2 or more rdonly/rdwr_buf_size parameters for kfunc " ) ;
return - EINVAL ;
}
if ( ! tnum_is_const ( reg - > var_off ) ) {
bpf_log ( log , " R%d is not a const \n " , regno ) ;
return - EINVAL ;
}
kfunc_meta - > r0_size = reg - > var_off . value ;
ret = mark_chain_precision ( env , regno ) ;
if ( ret )
return ret ;
}
}
2021-02-13 00:56:39 +04:00
if ( reg - > type = = SCALAR_VALUE )
2019-11-14 10:57:16 -08:00
continue ;
2021-03-24 18:51:36 -07:00
bpf_log ( log , " R%d is not a scalar \n " , regno ) ;
return - EINVAL ;
2019-11-14 10:57:16 -08:00
}
2021-03-24 18:51:36 -07:00
if ( ! btf_type_is_ptr ( t ) ) {
bpf_log ( log , " Unrecognized arg#%d type %s \n " ,
i , btf_type_str ( t ) ) ;
return - EINVAL ;
}
2022-09-21 18:48:25 +02:00
/* These register types have special constraints wrt ref_obj_id
* and offset checks . The rest of trusted args don ' t .
*/
obj_ptr = reg - > type = = PTR_TO_CTX | | reg - > type = = PTR_TO_BTF_ID | |
reg2btf_ids [ base_type ( reg - > type ) ] ;
2022-07-21 15:42:36 +02:00
/* Check if argument must be a referenced pointer, args + i has
* been verified to be a pointer ( after skipping modifiers ) .
2022-09-21 18:48:25 +02:00
* PTR_TO_CTX is ok without having non - zero ref_obj_id .
2022-07-21 15:42:36 +02:00
*/
2022-09-21 18:48:25 +02:00
if ( is_kfunc & & trusted_args & & ( obj_ptr & & reg - > type ! = PTR_TO_CTX ) & & ! reg - > ref_obj_id ) {
2022-07-21 15:42:36 +02:00
bpf_log ( log , " R%d must be referenced \n " , regno ) ;
return - EINVAL ;
}
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
ref_t = btf_type_skip_modifiers ( btf , t - > type , & ref_id ) ;
2021-03-24 18:51:36 -07:00
ref_tname = btf_name_by_offset ( btf , ref_t - > name_off ) ;
bpf: Fix PTR_TO_BTF_ID var_off check
When kfunc support was added, check_ctx_reg was called for PTR_TO_CTX
register, but no offset checks were made for PTR_TO_BTF_ID. Only
reg->off was taken into account by btf_struct_ids_match, which protected
against type mismatch due to non-zero reg->off, but when reg->off was
zero, a user could set the variable offset of the register and allow it
to be passed to kfunc, leading to bad pointer being passed into the
kernel.
Fix this by reusing the extracted helper check_func_arg_reg_off from
previous commit, and make one call before checking all supported
register types. Since the list is maintained, any future changes will be
taken into account by updating check_func_arg_reg_off. This function
prevents non-zero var_off to be set for PTR_TO_BTF_ID, but still allows
a fixed non-zero reg->off, which is needed for type matching to work
correctly when using pointer arithmetic.
ARG_DONTCARE is passed as arg_type, since kfunc doesn't support
accepting a ARG_PTR_TO_ALLOC_MEM without relying on size of parameter
type from BTF (in case of pointer), or using a mem, len pair. The
forcing of offset check for ARG_PTR_TO_ALLOC_MEM is done because ringbuf
helpers obtain the size from the header located at the beginning of the
memory region, hence any changes to the original pointer shouldn't be
allowed. In case of kfunc, size is always known, either at verification
time, or using the length parameter, hence this forcing is not required.
Since this check will happen once already for PTR_TO_CTX, remove the
check_ptr_off_reg call inside its block.
Fixes: e6ac2450d6de ("bpf: Support bpf program calling kernel function")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220304224645.3677453-3-memxor@gmail.com
2022-03-05 04:16:39 +05:30
2022-07-21 15:42:36 +02:00
/* Trusted args have the same offset checks as release arguments */
2022-09-21 18:48:25 +02:00
if ( ( trusted_args & & obj_ptr ) | | ( rel & & reg - > ref_obj_id ) )
bpf: Tag argument to be released in bpf_func_proto
Add a new type flag for bpf_arg_type that when set tells verifier that
for a release function, that argument's register will be the one for
which meta.ref_obj_id will be set, and which will then be released
using release_reference. To capture the regno, introduce a new field
release_regno in bpf_call_arg_meta.
This would be required in the next patch, where we may either pass NULL
or a refcounted pointer as an argument to the release function
bpf_kptr_xchg. Just releasing only when meta.ref_obj_id is set is not
enough, as there is a case where the type of argument needed matches,
but the ref_obj_id is set to 0. Hence, we must enforce that whenever
meta.ref_obj_id is zero, the register that is to be released can only
be NULL for a release function.
Since we now indicate whether an argument is to be released in
bpf_func_proto itself, is_release_function helper has lost its utitlity,
hence refactor code to work without it, and just rely on
meta.release_regno to know when to release state for a ref_obj_id.
Still, the restriction of one release argument and only one ref_obj_id
passed to BPF helper or kfunc remains. This may be lifted in the future.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-3-memxor@gmail.com
2022-04-25 03:18:50 +05:30
arg_type | = OBJ_RELEASE ;
ret = check_func_arg_reg_off ( env , reg , regno , arg_type ) ;
bpf: Fix PTR_TO_BTF_ID var_off check
When kfunc support was added, check_ctx_reg was called for PTR_TO_CTX
register, but no offset checks were made for PTR_TO_BTF_ID. Only
reg->off was taken into account by btf_struct_ids_match, which protected
against type mismatch due to non-zero reg->off, but when reg->off was
zero, a user could set the variable offset of the register and allow it
to be passed to kfunc, leading to bad pointer being passed into the
kernel.
Fix this by reusing the extracted helper check_func_arg_reg_off from
previous commit, and make one call before checking all supported
register types. Since the list is maintained, any future changes will be
taken into account by updating check_func_arg_reg_off. This function
prevents non-zero var_off to be set for PTR_TO_BTF_ID, but still allows
a fixed non-zero reg->off, which is needed for type matching to work
correctly when using pointer arithmetic.
ARG_DONTCARE is passed as arg_type, since kfunc doesn't support
accepting a ARG_PTR_TO_ALLOC_MEM without relying on size of parameter
type from BTF (in case of pointer), or using a mem, len pair. The
forcing of offset check for ARG_PTR_TO_ALLOC_MEM is done because ringbuf
helpers obtain the size from the header located at the beginning of the
memory region, hence any changes to the original pointer shouldn't be
allowed. In case of kfunc, size is always known, either at verification
time, or using the length parameter, hence this forcing is not required.
Since this check will happen once already for PTR_TO_CTX, remove the
check_ptr_off_reg call inside its block.
Fixes: e6ac2450d6de ("bpf: Support bpf program calling kernel function")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220304224645.3677453-3-memxor@gmail.com
2022-03-05 04:16:39 +05:30
if ( ret < 0 )
return ret ;
2022-09-06 17:13:02 +02:00
if ( is_kfunc & & reg - > ref_obj_id ) {
/* Ensure only one argument is referenced PTR_TO_BTF_ID */
if ( ref_obj_id ) {
bpf_log ( log , " verifier internal error: more than one arg with ref_obj_id R%d %u %u \n " ,
regno , reg - > ref_obj_id , ref_obj_id ) ;
return - EFAULT ;
}
ref_regno = regno ;
ref_obj_id = reg - > ref_obj_id ;
}
bpf: Teach verifier about kptr_get kfunc helpers
We introduce a new style of kfunc helpers, namely *_kptr_get, where they
take pointer to the map value which points to a referenced kernel
pointer contained in the map. Since this is referenced, only
bpf_kptr_xchg from BPF side and xchg from kernel side is allowed to
change the current value, and each pointer that resides in that location
would be referenced, and RCU protected (this must be kept in mind while
adding kernel types embeddable as reference kptr in BPF maps).
This means that if do the load of the pointer value in an RCU read
section, and find a live pointer, then as long as we hold RCU read lock,
it won't be freed by a parallel xchg + release operation. This allows us
to implement a safe refcount increment scheme. Hence, enforce that first
argument of all such kfunc is a proper PTR_TO_MAP_VALUE pointing at the
right offset to referenced pointer.
For the rest of the arguments, they are subjected to typical kfunc
argument checks, hence allowing some flexibility in passing more intent
into how the reference should be taken.
For instance, in case of struct nf_conn, it is not freed until RCU grace
period ends, but can still be reused for another tuple once refcount has
dropped to zero. Hence, a bpf_ct_kptr_get helper not only needs to call
refcount_inc_not_zero, but also do a tuple match after incrementing the
reference, and when it fails to match it, put the reference again and
return NULL.
This can be implemented easily if we allow passing additional parameters
to the bpf_ct_kptr_get kfunc, like a struct bpf_sock_tuple * and a
tuple__sz pair.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-9-memxor@gmail.com
2022-04-25 03:18:56 +05:30
/* kptr_get is only true for kfunc */
if ( i = = 0 & & kptr_get ) {
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
struct btf_field * kptr_field ;
bpf: Teach verifier about kptr_get kfunc helpers
We introduce a new style of kfunc helpers, namely *_kptr_get, where they
take pointer to the map value which points to a referenced kernel
pointer contained in the map. Since this is referenced, only
bpf_kptr_xchg from BPF side and xchg from kernel side is allowed to
change the current value, and each pointer that resides in that location
would be referenced, and RCU protected (this must be kept in mind while
adding kernel types embeddable as reference kptr in BPF maps).
This means that if do the load of the pointer value in an RCU read
section, and find a live pointer, then as long as we hold RCU read lock,
it won't be freed by a parallel xchg + release operation. This allows us
to implement a safe refcount increment scheme. Hence, enforce that first
argument of all such kfunc is a proper PTR_TO_MAP_VALUE pointing at the
right offset to referenced pointer.
For the rest of the arguments, they are subjected to typical kfunc
argument checks, hence allowing some flexibility in passing more intent
into how the reference should be taken.
For instance, in case of struct nf_conn, it is not freed until RCU grace
period ends, but can still be reused for another tuple once refcount has
dropped to zero. Hence, a bpf_ct_kptr_get helper not only needs to call
refcount_inc_not_zero, but also do a tuple match after incrementing the
reference, and when it fails to match it, put the reference again and
return NULL.
This can be implemented easily if we allow passing additional parameters
to the bpf_ct_kptr_get kfunc, like a struct bpf_sock_tuple * and a
tuple__sz pair.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-9-memxor@gmail.com
2022-04-25 03:18:56 +05:30
if ( reg - > type ! = PTR_TO_MAP_VALUE ) {
bpf_log ( log , " arg#0 expected pointer to map value \n " ) ;
return - EINVAL ;
}
/* check_func_arg_reg_off allows var_off for
* PTR_TO_MAP_VALUE , but we need fixed offset to find
* off_desc .
*/
if ( ! tnum_is_const ( reg - > var_off ) ) {
bpf_log ( log , " arg#0 must have constant offset \n " ) ;
return - EINVAL ;
}
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
kptr_field = btf_record_find ( reg - > map_ptr - > record , reg - > off + reg - > var_off . value , BPF_KPTR ) ;
if ( ! kptr_field | | kptr_field - > type ! = BPF_KPTR_REF ) {
bpf: Teach verifier about kptr_get kfunc helpers
We introduce a new style of kfunc helpers, namely *_kptr_get, where they
take pointer to the map value which points to a referenced kernel
pointer contained in the map. Since this is referenced, only
bpf_kptr_xchg from BPF side and xchg from kernel side is allowed to
change the current value, and each pointer that resides in that location
would be referenced, and RCU protected (this must be kept in mind while
adding kernel types embeddable as reference kptr in BPF maps).
This means that if do the load of the pointer value in an RCU read
section, and find a live pointer, then as long as we hold RCU read lock,
it won't be freed by a parallel xchg + release operation. This allows us
to implement a safe refcount increment scheme. Hence, enforce that first
argument of all such kfunc is a proper PTR_TO_MAP_VALUE pointing at the
right offset to referenced pointer.
For the rest of the arguments, they are subjected to typical kfunc
argument checks, hence allowing some flexibility in passing more intent
into how the reference should be taken.
For instance, in case of struct nf_conn, it is not freed until RCU grace
period ends, but can still be reused for another tuple once refcount has
dropped to zero. Hence, a bpf_ct_kptr_get helper not only needs to call
refcount_inc_not_zero, but also do a tuple match after incrementing the
reference, and when it fails to match it, put the reference again and
return NULL.
This can be implemented easily if we allow passing additional parameters
to the bpf_ct_kptr_get kfunc, like a struct bpf_sock_tuple * and a
tuple__sz pair.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-9-memxor@gmail.com
2022-04-25 03:18:56 +05:30
bpf_log ( log , " arg#0 no referenced kptr at map value offset=%llu \n " ,
reg - > off + reg - > var_off . value ) ;
return - EINVAL ;
}
if ( ! btf_type_is_ptr ( ref_t ) ) {
bpf_log ( log , " arg#0 BTF type must be a double pointer \n " ) ;
return - EINVAL ;
}
ref_t = btf_type_skip_modifiers ( btf , ref_t - > type , & ref_id ) ;
ref_tname = btf_name_by_offset ( btf , ref_t - > name_off ) ;
if ( ! btf_type_is_struct ( ref_t ) ) {
bpf_log ( log , " kernel function %s args#%d pointer type %s %s is not supported \n " ,
func_name , i , btf_type_str ( ref_t ) , ref_tname ) ;
return - EINVAL ;
}
bpf: Refactor kptr_off_tab into btf_record
To prepare the BPF verifier to handle special fields in both map values
and program allocated types coming from program BTF, we need to refactor
the kptr_off_tab handling code into something more generic and reusable
across both cases to avoid code duplication.
Later patches also require passing this data to helpers at runtime, so
that they can work on user defined types, initialize them, destruct
them, etc.
The main observation is that both map values and such allocated types
point to a type in program BTF, hence they can be handled similarly. We
can prepare a field metadata table for both cases and store them in
struct bpf_map or struct btf depending on the use case.
Hence, refactor the code into generic btf_record and btf_field member
structs. The btf_record represents the fields of a specific btf_type in
user BTF. The cnt indicates the number of special fields we successfully
recognized, and field_mask is a bitmask of fields that were found, to
enable quick determination of availability of a certain field.
Subsequently, refactor the rest of the code to work with these generic
types, remove assumptions about kptr and kptr_off_tab, rename variables
to more meaningful names, etc.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20221103191013.1236066-7-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-11-04 00:39:55 +05:30
if ( ! btf_struct_ids_match ( log , btf , ref_id , 0 , kptr_field - > kptr . btf ,
kptr_field - > kptr . btf_id , true ) ) {
bpf: Teach verifier about kptr_get kfunc helpers
We introduce a new style of kfunc helpers, namely *_kptr_get, where they
take pointer to the map value which points to a referenced kernel
pointer contained in the map. Since this is referenced, only
bpf_kptr_xchg from BPF side and xchg from kernel side is allowed to
change the current value, and each pointer that resides in that location
would be referenced, and RCU protected (this must be kept in mind while
adding kernel types embeddable as reference kptr in BPF maps).
This means that if do the load of the pointer value in an RCU read
section, and find a live pointer, then as long as we hold RCU read lock,
it won't be freed by a parallel xchg + release operation. This allows us
to implement a safe refcount increment scheme. Hence, enforce that first
argument of all such kfunc is a proper PTR_TO_MAP_VALUE pointing at the
right offset to referenced pointer.
For the rest of the arguments, they are subjected to typical kfunc
argument checks, hence allowing some flexibility in passing more intent
into how the reference should be taken.
For instance, in case of struct nf_conn, it is not freed until RCU grace
period ends, but can still be reused for another tuple once refcount has
dropped to zero. Hence, a bpf_ct_kptr_get helper not only needs to call
refcount_inc_not_zero, but also do a tuple match after incrementing the
reference, and when it fails to match it, put the reference again and
return NULL.
This can be implemented easily if we allow passing additional parameters
to the bpf_ct_kptr_get kfunc, like a struct bpf_sock_tuple * and a
tuple__sz pair.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-9-memxor@gmail.com
2022-04-25 03:18:56 +05:30
bpf_log ( log , " kernel function %s args#%d expected pointer to %s %s \n " ,
func_name , i , btf_type_str ( ref_t ) , ref_tname ) ;
return - EINVAL ;
}
/* rest of the arguments can be anything, like normal kfunc */
2022-06-06 09:52:51 +02:00
} else if ( btf_get_prog_ctx_type ( log , btf , t , prog_type , i ) ) {
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
/* If function expects ctx type in BTF check that caller
* is passing PTR_TO_CTX .
*/
if ( reg - > type ! = PTR_TO_CTX ) {
bpf_log ( log ,
" arg#%d expected pointer to ctx, but got %s \n " ,
i , btf_type_str ( t ) ) ;
return - EINVAL ;
}
2022-02-17 01:49:43 +05:30
} else if ( is_kfunc & & ( reg - > type = = PTR_TO_BTF_ID | |
( reg2btf_ids [ base_type ( reg - > type ) ] & & ! type_flag ( reg - > type ) ) ) ) {
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
const struct btf_type * reg_ref_t ;
const struct btf * reg_btf ;
const char * reg_ref_tname ;
u32 reg_ref_id ;
if ( ! btf_type_is_struct ( ref_t ) ) {
bpf_log ( log , " kernel function %s args#%d pointer type %s %s is not supported \n " ,
func_name , i , btf_type_str ( ref_t ) ,
ref_tname ) ;
return - EINVAL ;
}
if ( reg - > type = = PTR_TO_BTF_ID ) {
reg_btf = reg - > btf ;
reg_ref_id = reg - > btf_id ;
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
} else {
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
reg_btf = btf_vmlinux ;
2022-02-17 01:49:43 +05:30
reg_ref_id = * reg2btf_ids [ base_type ( reg - > type ) ] ;
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
}
reg_ref_t = btf_type_skip_modifiers ( reg_btf , reg_ref_id ,
& reg_ref_id ) ;
reg_ref_tname = btf_name_by_offset ( reg_btf ,
reg_ref_t - > name_off ) ;
if ( ! btf_struct_ids_match ( log , reg_btf , reg_ref_id ,
2022-07-21 15:42:36 +02:00
reg - > off , btf , ref_id ,
2022-09-21 18:48:25 +02:00
trusted_args | | ( rel & & reg - > ref_obj_id ) ) ) {
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
bpf_log ( log , " kernel function %s args#%d expected pointer to %s %s but R%d has a pointer to %s %s \n " ,
func_name , i ,
btf_type_str ( ref_t ) , ref_tname ,
regno , btf_type_str ( reg_ref_t ) ,
reg_ref_tname ) ;
return - EINVAL ;
}
2022-09-06 17:12:58 +02:00
} else if ( ptr_to_mem_ok & & processing_call ) {
2021-03-24 18:51:36 -07:00
const struct btf_type * resolve_ret ;
u32 type_size ;
2021-02-13 00:56:41 +04:00
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
if ( is_kfunc ) {
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
bool arg_mem_size = i + 1 < nargs & & is_kfunc_arg_mem_size ( btf , & args [ i + 1 ] , & regs [ regno + 1 ] ) ;
2022-09-20 09:59:42 +02:00
bool arg_dynptr = btf_type_is_struct ( ref_t ) & &
! strcmp ( ref_tname ,
stringify_struct ( bpf_dynptr_kern ) ) ;
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
/* Permit pointer to mem, but only when argument
* type is pointer to scalar , or struct composed
* ( recursively ) of scalars .
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
* When arg_mem_size is true , the pointer can be
* void * .
2022-09-20 09:59:42 +02:00
* Also permit initialized local dynamic pointers .
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
*/
if ( ! btf_type_is_scalar ( ref_t ) & &
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
! __btf_type_is_scalar_struct ( log , btf , ref_t , 0 ) & &
2022-09-20 09:59:42 +02:00
! arg_dynptr & &
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
( arg_mem_size ? ! btf_type_is_void ( ref_t ) : 1 ) ) {
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
bpf_log ( log ,
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
" arg#%d pointer type %s %s must point to %sscalar, or struct with scalar \n " ,
i , btf_type_str ( ref_t ) , ref_tname , arg_mem_size ? " void, " : " " ) ;
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
return - EINVAL ;
}
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
2022-09-20 09:59:42 +02:00
if ( arg_dynptr ) {
if ( reg - > type ! = PTR_TO_STACK ) {
bpf_log ( log , " arg#%d pointer type %s %s not to stack \n " ,
i , btf_type_str ( ref_t ) ,
ref_tname ) ;
return - EINVAL ;
}
if ( ! is_dynptr_reg_valid_init ( env , reg ) ) {
bpf_log ( log ,
" arg#%d pointer type %s %s must be valid and initialized \n " ,
i , btf_type_str ( ref_t ) ,
ref_tname ) ;
return - EINVAL ;
}
if ( ! is_dynptr_type_expected ( env , reg ,
ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL ) ) {
bpf_log ( log ,
" arg#%d pointer type %s %s points to unsupported dynamic pointer type \n " ,
i , btf_type_str ( ref_t ) ,
ref_tname ) ;
return - EINVAL ;
}
continue ;
}
bpf: Introduce mem, size argument pair support for kfunc
BPF helpers can associate two adjacent arguments together to pass memory
of certain size, using ARG_PTR_TO_MEM and ARG_CONST_SIZE arguments.
Since we don't use bpf_func_proto for kfunc, we need to leverage BTF to
implement similar support.
The ARG_CONST_SIZE processing for helpers is refactored into a common
check_mem_size_reg helper that is shared with kfunc as well. kfunc
ptr_to_mem support follows logic similar to global functions, where
verification is done as if pointer is not null, even when it may be
null.
This leads to a simple to follow rule for writing kfunc: always check
the argument pointer for NULL, except when it is PTR_TO_CTX. Also, the
PTR_TO_CTX case is also only safe when the helper expecting pointer to
program ctx is not exposed to other programs where same struct is not
ctx type. In that case, the type check will fall through to other cases
and would permit passing other types of pointers, possibly NULL at
runtime.
Currently, we require the size argument to be suffixed with "__sz" in
the parameter name. This information is then recorded in kernel BTF and
verified during function argument checking. In the future we can use BTF
tagging instead, and modify the kernel function definitions. This will
be a purely kernel-side change.
This allows us to have some form of backwards compatibility for
structures that are passed in to the kernel function with their size,
and allow variable length structures to be passed in if they are
accompanied by a size parameter.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-5-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:47 +05:30
/* Check for mem, len pair */
if ( arg_mem_size ) {
if ( check_kfunc_mem_size_reg ( env , & regs [ regno + 1 ] , regno + 1 ) ) {
bpf_log ( log , " arg#%d arg#%d memory, len pair leads to invalid memory access \n " ,
i , i + 1 ) ;
return - EINVAL ;
}
i + + ;
continue ;
}
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
}
2021-03-24 18:51:36 -07:00
resolve_ret = btf_resolve_size ( btf , ref_t , & type_size ) ;
if ( IS_ERR ( resolve_ret ) ) {
2021-02-13 00:56:41 +04:00
bpf_log ( log ,
2021-03-24 18:51:36 -07:00
" arg#%d reference type('%s %s') size cannot be determined: %ld \n " ,
i , btf_type_str ( ref_t ) , ref_tname ,
PTR_ERR ( resolve_ret ) ) ;
return - EINVAL ;
2021-02-13 00:56:41 +04:00
}
2021-03-24 18:51:36 -07:00
if ( check_mem_reg ( env , reg , regno , type_size ) )
return - EINVAL ;
} else {
bpf: Extend kfunc with PTR_TO_CTX, PTR_TO_MEM argument support
Allow passing PTR_TO_CTX, if the kfunc expects a matching struct type,
and punt to PTR_TO_MEM block if reg->type does not fall in one of
PTR_TO_BTF_ID or PTR_TO_SOCK* types. This will be used by future commits
to get access to XDP and TC PTR_TO_CTX, and pass various data (flags,
l4proto, netns_id, etc.) encoded in opts struct passed as pointer to
kfunc.
For PTR_TO_MEM support, arguments are currently limited to pointer to
scalar, or pointer to struct composed of scalars. This is done so that
unsafe scenarios (like passing PTR_TO_MEM where PTR_TO_BTF_ID of
in-kernel valid structure is expected, which may have pointers) are
avoided. Since the argument checking happens basd on argument register
type, it is not easy to ascertain what the expected type is. In the
future, support for PTR_TO_MEM for kfunc can be extended to serve other
usecases. The struct type whose pointer is passed in may have maximum
nesting depth of 4, all recursively composed of scalars or struct with
scalars.
Future commits will add negative tests that check whether these
restrictions imposed for kfunc arguments are duly rejected by BPF
verifier or not.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20211217015031.1278167-4-memxor@gmail.com
2021-12-17 07:20:24 +05:30
bpf_log ( log , " reg type unsupported for arg#%d %sfunction %s#%d \n " , i ,
is_kfunc ? " kernel " : " " , func_name , func_id ) ;
2021-03-24 18:51:36 -07:00
return - EINVAL ;
2019-11-14 10:57:16 -08:00
}
}
2021-03-24 18:51:36 -07:00
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:48 +05:30
/* Either both are set, or neither */
WARN_ON_ONCE ( ( ref_obj_id & & ! ref_regno ) | | ( ! ref_obj_id & & ref_regno ) ) ;
2022-03-05 04:16:41 +05:30
/* We already made sure ref_obj_id is set only for one argument. We do
* allow ( ! rel & & ref_obj_id ) , so that passing such referenced
* PTR_TO_BTF_ID to other kfuncs works . Note that rel is only true when
* is_kfunc is true .
*/
if ( rel & & ! ref_obj_id ) {
bpf_log ( log , " release kernel function %s expects refcounted PTR_TO_BTF_ID \n " ,
func_name ) ;
return - EINVAL ;
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:48 +05:30
}
2022-08-05 14:48:14 -07:00
if ( sleepable & & ! env - > prog - > aux - > sleepable ) {
bpf_log ( log , " kernel function %s is sleepable but the program is not \n " ,
func_name ) ;
return - EINVAL ;
}
2022-09-06 17:13:02 +02:00
if ( kfunc_meta & & ref_obj_id )
kfunc_meta - > ref_obj_id = ref_obj_id ;
bpf: Add reference tracking support to kfunc
This patch adds verifier support for PTR_TO_BTF_ID return type of kfunc
to be a reference, by reusing acquire_reference_state/release_reference
support for existing in-kernel bpf helpers.
We make use of the three kfunc types:
- BTF_KFUNC_TYPE_ACQUIRE
Return true if kfunc_btf_id is an acquire kfunc. This will
acquire_reference_state for the returned PTR_TO_BTF_ID (this is the
only allow return value). Note that acquire kfunc must always return a
PTR_TO_BTF_ID{_OR_NULL}, otherwise the program is rejected.
- BTF_KFUNC_TYPE_RELEASE
Return true if kfunc_btf_id is a release kfunc. This will release the
reference to the passed in PTR_TO_BTF_ID which has a reference state
(from earlier acquire kfunc).
The btf_check_func_arg_match returns the regno (of argument register,
hence > 0) if the kfunc is a release kfunc, and a proper referenced
PTR_TO_BTF_ID is being passed to it.
This is similar to how helper call check uses bpf_call_arg_meta to
store the ref_obj_id that is later used to release the reference.
Similar to in-kernel helper, we only allow passing one referenced
PTR_TO_BTF_ID as an argument. It can also be passed in to normal
kfunc, but in case of release kfunc there must always be one
PTR_TO_BTF_ID argument that is referenced.
- BTF_KFUNC_TYPE_RET_NULL
For kfunc returning PTR_TO_BTF_ID, tells if it can be NULL, hence
force caller to mark the pointer not null (using check) before
accessing it. Note that taking into account the case fixed by commit
93c230e3f5bd ("bpf: Enforce id generation for all may-be-null register type")
we assign a non-zero id for mark_ptr_or_null_reg logic. Later, if more
return types are supported by kfunc, which have a _OR_NULL variant, it
might be better to move this id generation under a common
reg_type_may_be_null check, similar to the case in the commit.
Referenced PTR_TO_BTF_ID is currently only limited to kfunc, but can be
extended in the future to other BPF helpers as well. For now, we can
rely on the btf_struct_ids_match check to ensure we get the pointer to
the expected struct type. In the future, care needs to be taken to avoid
ambiguity for reference PTR_TO_BTF_ID passed to release function, in
case multiple candidates can release same BTF ID.
e.g. there might be two release kfuncs (or kfunc and helper):
foo(struct abc *p);
bar(struct abc *p);
... such that both release a PTR_TO_BTF_ID with btf_id of struct abc. In
this case we would need to track the acquire function corresponding to
the release function to avoid type confusion, and store this information
in the register state so that an incorrect program can be rejected. This
is not a problem right now, hence it is left as an exercise for the
future patch introducing such a case in the kernel.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-6-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:48 +05:30
/* returns argument register number > 0 in case of reference release kfunc */
return rel ? ref_regno : 0 ;
2021-03-24 18:51:36 -07:00
}
2022-09-06 17:12:58 +02:00
/* Compare BTF of a function declaration with given bpf_reg_state.
2021-03-24 18:51:36 -07:00
* Returns :
* EFAULT - there is a verifier bug . Abort verification .
* EINVAL - there is a type mismatch or BTF is not available .
* 0 - BTF matches with what bpf_reg_state expects .
* Only PTR_TO_CTX and SCALAR_VALUE states are recognized .
*/
int btf_check_subprog_arg_match ( struct bpf_verifier_env * env , int subprog ,
struct bpf_reg_state * regs )
{
struct bpf_prog * prog = env - > prog ;
struct btf * btf = prog - > aux - > btf ;
bool is_global ;
u32 btf_id ;
int err ;
if ( ! prog - > aux - > func_info )
return - EINVAL ;
btf_id = prog - > aux - > func_info [ subprog ] . type_id ;
if ( ! btf_id )
return - EFAULT ;
if ( prog - > aux - > func_info_aux [ subprog ] . unreliable )
return - EINVAL ;
is_global = prog - > aux - > func_info_aux [ subprog ] . linkage = = BTF_FUNC_GLOBAL ;
2022-09-06 17:13:02 +02:00
err = btf_check_func_arg_match ( env , btf , btf_id , regs , is_global , NULL , false ) ;
2022-09-06 17:12:58 +02:00
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function .
* In such cases mark the function as unreliable from BTF point of view .
*/
if ( err )
prog - > aux - > func_info_aux [ subprog ] . unreliable = true ;
return err ;
}
/* Compare BTF of a function call with given bpf_reg_state.
* Returns :
* EFAULT - there is a verifier bug . Abort verification .
* EINVAL - there is a type mismatch or BTF is not available .
* 0 - BTF matches with what bpf_reg_state expects .
* Only PTR_TO_CTX and SCALAR_VALUE states are recognized .
*
* NOTE : the code is duplicated from btf_check_subprog_arg_match ( )
* because btf_check_func_arg_match ( ) is still doing both . Once that
* function is split in 2 , we can call from here btf_check_subprog_arg_match ( )
* first , and then treat the calling part in a new code path .
*/
int btf_check_subprog_call ( struct bpf_verifier_env * env , int subprog ,
struct bpf_reg_state * regs )
{
struct bpf_prog * prog = env - > prog ;
struct btf * btf = prog - > aux - > btf ;
bool is_global ;
u32 btf_id ;
int err ;
if ( ! prog - > aux - > func_info )
return - EINVAL ;
btf_id = prog - > aux - > func_info [ subprog ] . type_id ;
if ( ! btf_id )
return - EFAULT ;
if ( prog - > aux - > func_info_aux [ subprog ] . unreliable )
return - EINVAL ;
is_global = prog - > aux - > func_info_aux [ subprog ] . linkage = = BTF_FUNC_GLOBAL ;
2022-09-06 17:13:02 +02:00
err = btf_check_func_arg_match ( env , btf , btf_id , regs , is_global , NULL , true ) ;
2021-03-24 18:51:36 -07:00
2020-01-09 22:41:20 -08:00
/* Compiler optimizations can remove arguments from static functions
* or mismatched type can be passed into a global function .
* In such cases mark the function as unreliable from BTF point of view .
*/
2021-03-24 18:51:36 -07:00
if ( err )
prog - > aux - > func_info_aux [ subprog ] . unreliable = true ;
return err ;
2020-01-09 22:41:20 -08:00
}
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
int btf_check_kfunc_arg_match ( struct bpf_verifier_env * env ,
const struct btf * btf , u32 func_id ,
2022-07-21 15:42:35 +02:00
struct bpf_reg_state * regs ,
2022-09-06 17:13:02 +02:00
struct bpf_kfunc_arg_meta * meta )
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
{
2022-09-06 17:13:02 +02:00
return btf_check_func_arg_match ( env , btf , func_id , regs , true , meta , true ) ;
bpf: Support bpf program calling kernel function
This patch adds support to BPF verifier to allow bpf program calling
kernel function directly.
The use case included in this set is to allow bpf-tcp-cc to directly
call some tcp-cc helper functions (e.g. "tcp_cong_avoid_ai()"). Those
functions have already been used by some kernel tcp-cc implementations.
This set will also allow the bpf-tcp-cc program to directly call the
kernel tcp-cc implementation, For example, a bpf_dctcp may only want to
implement its own dctcp_cwnd_event() and reuse other dctcp_*() directly
from the kernel tcp_dctcp.c instead of reimplementing (or
copy-and-pasting) them.
The tcp-cc kernel functions mentioned above will be white listed
for the struct_ops bpf-tcp-cc programs to use in a later patch.
The white listed functions are not bounded to a fixed ABI contract.
Those functions have already been used by the existing kernel tcp-cc.
If any of them has changed, both in-tree and out-of-tree kernel tcp-cc
implementations have to be changed. The same goes for the struct_ops
bpf-tcp-cc programs which have to be adjusted accordingly.
This patch is to make the required changes in the bpf verifier.
First change is in btf.c, it adds a case in "btf_check_func_arg_match()".
When the passed in "btf->kernel_btf == true", it means matching the
verifier regs' states with a kernel function. This will handle the
PTR_TO_BTF_ID reg. It also maps PTR_TO_SOCK_COMMON, PTR_TO_SOCKET,
and PTR_TO_TCP_SOCK to its kernel's btf_id.
In the later libbpf patch, the insn calling a kernel function will
look like:
insn->code == (BPF_JMP | BPF_CALL)
insn->src_reg == BPF_PSEUDO_KFUNC_CALL /* <- new in this patch */
insn->imm == func_btf_id /* btf_id of the running kernel */
[ For the future calling function-in-kernel-module support, an array
of module btf_fds can be passed at the load time and insn->off
can be used to index into this array. ]
At the early stage of verifier, the verifier will collect all kernel
function calls into "struct bpf_kfunc_desc". Those
descriptors are stored in "prog->aux->kfunc_tab" and will
be available to the JIT. Since this "add" operation is similar
to the current "add_subprog()" and looking for the same insn->code,
they are done together in the new "add_subprog_and_kfunc()".
In the "do_check()" stage, the new "check_kfunc_call()" is added
to verify the kernel function call instruction:
1. Ensure the kernel function can be used by a particular BPF_PROG_TYPE.
A new bpf_verifier_ops "check_kfunc_call" is added to do that.
The bpf-tcp-cc struct_ops program will implement this function in
a later patch.
2. Call "btf_check_kfunc_args_match()" to ensure the regs can be
used as the args of a kernel function.
3. Mark the regs' type, subreg_def, and zext_dst.
At the later do_misc_fixups() stage, the new fixup_kfunc_call()
will replace the insn->imm with the function address (relative
to __bpf_call_base). If needed, the jit can find the btf_func_model
by calling the new bpf_jit_find_kfunc_model(prog, insn).
With the imm set to the function address, "bpftool prog dump xlated"
will be able to display the kernel function calls the same way as
it displays other bpf helper calls.
gpl_compatible program is required to call kernel function.
This feature currently requires JIT.
The verifier selftests are adjusted because of the changes in
the verbose log in add_subprog_and_kfunc().
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210325015142.1544736-1-kafai@fb.com
2021-03-24 18:51:42 -07:00
}
2020-01-09 22:41:20 -08:00
/* Convert BTF of a function into bpf_reg_state if possible
* Returns :
* EFAULT - there is a verifier bug . Abort verification .
* EINVAL - cannot convert BTF .
* 0 - Successfully converted BTF into bpf_reg_state
* ( either PTR_TO_CTX or SCALAR_VALUE ) .
*/
int btf_prepare_func_args ( struct bpf_verifier_env * env , int subprog ,
2021-02-13 00:56:39 +04:00
struct bpf_reg_state * regs )
2020-01-09 22:41:20 -08:00
{
struct bpf_verifier_log * log = & env - > log ;
struct bpf_prog * prog = env - > prog ;
2020-01-20 16:53:46 -08:00
enum bpf_prog_type prog_type = prog - > type ;
2020-01-09 22:41:20 -08:00
struct btf * btf = prog - > aux - > btf ;
const struct btf_param * args ;
2021-02-13 00:56:41 +04:00
const struct btf_type * t , * ref_t ;
2020-01-09 22:41:20 -08:00
u32 i , nargs , btf_id ;
const char * tname ;
if ( ! prog - > aux - > func_info | |
prog - > aux - > func_info_aux [ subprog ] . linkage ! = BTF_FUNC_GLOBAL ) {
bpf_log ( log , " Verifier bug \n " ) ;
return - EFAULT ;
}
btf_id = prog - > aux - > func_info [ subprog ] . type_id ;
if ( ! btf_id ) {
bpf_log ( log , " Global functions need valid BTF \n " ) ;
return - EFAULT ;
}
t = btf_type_by_id ( btf , btf_id ) ;
if ( ! t | | ! btf_type_is_func ( t ) ) {
/* These checks were already done by the verifier while loading
* struct bpf_func_info
*/
bpf_log ( log , " BTF of func#%d doesn't point to KIND_FUNC \n " ,
subprog ) ;
return - EFAULT ;
}
tname = btf_name_by_offset ( btf , t - > name_off ) ;
if ( log - > level & BPF_LOG_LEVEL )
bpf_log ( log , " Validating %s() func#%d... \n " ,
tname , subprog ) ;
if ( prog - > aux - > func_info_aux [ subprog ] . unreliable ) {
bpf_log ( log , " Verifier bug in function %s() \n " , tname ) ;
return - EFAULT ;
}
2020-01-20 16:53:46 -08:00
if ( prog_type = = BPF_PROG_TYPE_EXT )
2020-09-29 14:45:50 +02:00
prog_type = prog - > aux - > dst_prog - > type ;
2020-01-09 22:41:20 -08:00
t = btf_type_by_id ( btf , t - > type ) ;
if ( ! t | | ! btf_type_is_func_proto ( t ) ) {
bpf_log ( log , " Invalid type of function %s() \n " , tname ) ;
return - EFAULT ;
}
args = ( const struct btf_param * ) ( t + 1 ) ;
nargs = btf_type_vlen ( t ) ;
2021-02-26 00:26:29 +04:00
if ( nargs > MAX_BPF_FUNC_REG_ARGS ) {
bpf_log ( log , " Global function %s() with %d > %d args. Buggy compiler. \n " ,
tname , nargs , MAX_BPF_FUNC_REG_ARGS ) ;
2020-01-09 22:41:20 -08:00
return - EINVAL ;
}
/* check that function returns int */
t = btf_type_by_id ( btf , t - > type ) ;
while ( btf_type_is_modifier ( t ) )
t = btf_type_by_id ( btf , t - > type ) ;
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
if ( ! btf_type_is_int ( t ) & & ! btf_is_any_enum ( t ) ) {
2020-01-09 22:41:20 -08:00
bpf_log ( log ,
" Global function %s() doesn't return scalar. Only those are supported. \n " ,
tname ) ;
return - EINVAL ;
}
/* Convert BTF function arguments into verifier types.
* Only PTR_TO_CTX and SCALAR are supported atm .
*/
for ( i = 0 ; i < nargs ; i + + ) {
2021-02-13 00:56:39 +04:00
struct bpf_reg_state * reg = & regs [ i + 1 ] ;
2020-01-09 22:41:20 -08:00
t = btf_type_by_id ( btf , args [ i ] . type ) ;
while ( btf_type_is_modifier ( t ) )
t = btf_type_by_id ( btf , t - > type ) ;
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
if ( btf_type_is_int ( t ) | | btf_is_any_enum ( t ) ) {
2021-02-13 00:56:39 +04:00
reg - > type = SCALAR_VALUE ;
2020-01-09 22:41:20 -08:00
continue ;
}
2021-02-13 00:56:41 +04:00
if ( btf_type_is_ptr ( t ) ) {
if ( btf_get_prog_ctx_type ( log , btf , t , prog_type , i ) ) {
reg - > type = PTR_TO_CTX ;
continue ;
}
t = btf_type_skip_modifiers ( btf , t - > type , NULL ) ;
ref_t = btf_resolve_size ( btf , t , & reg - > mem_size ) ;
if ( IS_ERR ( ref_t ) ) {
bpf_log ( log ,
" arg#%d reference type('%s %s') size cannot be determined: %ld \n " ,
i , btf_type_str ( t ) , btf_name_by_offset ( btf , t - > name_off ) ,
PTR_ERR ( ref_t ) ) ;
return - EINVAL ;
}
2021-12-16 16:31:49 -08:00
reg - > type = PTR_TO_MEM | PTR_MAYBE_NULL ;
2021-02-13 00:56:41 +04:00
reg - > id = + + env - > id_gen ;
2020-01-09 22:41:20 -08:00
continue ;
}
bpf_log ( log , " Arg#%d type %s in %s() is not supported yet. \n " ,
2022-09-16 13:28:00 -07:00
i , btf_type_str ( t ) , tname ) ;
2020-01-09 22:41:20 -08:00
return - EINVAL ;
}
2019-11-14 10:57:16 -08:00
return 0 ;
}
2020-09-28 12:31:04 +01:00
static void btf_type_show ( const struct btf * btf , u32 type_id , void * obj ,
struct btf_show * show )
{
const struct btf_type * t = btf_type_by_id ( btf , type_id ) ;
show - > btf = btf ;
memset ( & show - > state , 0 , sizeof ( show - > state ) ) ;
memset ( & show - > obj , 0 , sizeof ( show - > obj ) ) ;
btf_type_ops ( t ) - > show ( btf , t , type_id , obj , 0 , show ) ;
}
static void btf_seq_show ( struct btf_show * show , const char * fmt ,
va_list args )
{
seq_vprintf ( ( struct seq_file * ) show - > target , fmt , args ) ;
}
2020-09-28 12:31:09 +01:00
int btf_type_seq_show_flags ( const struct btf * btf , u32 type_id ,
void * obj , struct seq_file * m , u64 flags )
2020-09-28 12:31:04 +01:00
{
struct btf_show sseq ;
sseq . target = m ;
sseq . showfn = btf_seq_show ;
sseq . flags = flags ;
btf_type_show ( btf , type_id , obj , & sseq ) ;
return sseq . state . status ;
}
2018-04-18 15:56:00 -07:00
void btf_type_seq_show ( const struct btf * btf , u32 type_id , void * obj ,
struct seq_file * m )
{
2020-09-28 12:31:04 +01:00
( void ) btf_type_seq_show_flags ( btf , type_id , obj , m ,
BTF_SHOW_NONAME | BTF_SHOW_COMPACT |
BTF_SHOW_ZERO | BTF_SHOW_UNSAFE ) ;
}
struct btf_show_snprintf {
struct btf_show show ;
int len_left ; /* space left in string */
int len ; /* length we would have written */
} ;
static void btf_snprintf_show ( struct btf_show * show , const char * fmt ,
va_list args )
{
struct btf_show_snprintf * ssnprintf = ( struct btf_show_snprintf * ) show ;
int len ;
len = vsnprintf ( show - > target , ssnprintf - > len_left , fmt , args ) ;
if ( len < 0 ) {
ssnprintf - > len_left = 0 ;
ssnprintf - > len = len ;
2022-07-11 23:13:17 +02:00
} else if ( len > = ssnprintf - > len_left ) {
2020-09-28 12:31:04 +01:00
/* no space, drive on to get length we would have written */
ssnprintf - > len_left = 0 ;
ssnprintf - > len + = len ;
} else {
ssnprintf - > len_left - = len ;
ssnprintf - > len + = len ;
show - > target + = len ;
}
}
int btf_type_snprintf_show ( const struct btf * btf , u32 type_id , void * obj ,
char * buf , int len , u64 flags )
{
struct btf_show_snprintf ssnprintf ;
ssnprintf . show . target = buf ;
ssnprintf . show . flags = flags ;
ssnprintf . show . showfn = btf_snprintf_show ;
ssnprintf . len_left = len ;
ssnprintf . len = 0 ;
btf_type_show ( btf , type_id , obj , ( struct btf_show * ) & ssnprintf ) ;
2022-02-20 10:40:55 -08:00
/* If we encountered an error, return it. */
2020-09-28 12:31:04 +01:00
if ( ssnprintf . show . state . status )
return ssnprintf . show . state . status ;
2018-04-18 15:56:00 -07:00
2020-09-28 12:31:04 +01:00
/* Otherwise return length we would have written */
return ssnprintf . len ;
2018-04-18 15:56:00 -07:00
}
2018-04-18 15:56:01 -07:00
2019-08-20 14:53:46 +01:00
# ifdef CONFIG_PROC_FS
static void bpf_btf_show_fdinfo ( struct seq_file * m , struct file * filp )
{
const struct btf * btf = filp - > private_data ;
seq_printf ( m , " btf_id: \t %u \n " , btf - > id ) ;
}
# endif
2018-04-18 15:56:01 -07:00
static int btf_release ( struct inode * inode , struct file * filp )
{
btf_put ( filp - > private_data ) ;
return 0 ;
}
2018-04-18 15:56:02 -07:00
const struct file_operations btf_fops = {
2019-08-20 14:53:46 +01:00
# ifdef CONFIG_PROC_FS
. show_fdinfo = bpf_btf_show_fdinfo ,
# endif
2018-04-18 15:56:01 -07:00
. release = btf_release ,
} ;
2018-05-04 14:49:51 -07:00
static int __btf_new_fd ( struct btf * btf )
{
return anon_inode_getfd ( " btf " , & btf_fops , btf , O_RDONLY | O_CLOEXEC ) ;
}
2021-05-13 17:36:08 -07:00
int btf_new_fd ( const union bpf_attr * attr , bpfptr_t uattr )
2018-04-18 15:56:01 -07:00
{
struct btf * btf ;
2018-05-04 14:49:51 -07:00
int ret ;
2018-04-18 15:56:01 -07:00
2021-05-13 17:36:08 -07:00
btf = btf_parse ( make_bpfptr ( attr - > btf , uattr . is_kernel ) ,
2018-04-18 15:56:01 -07:00
attr - > btf_size , attr - > btf_log_level ,
u64_to_user_ptr ( attr - > btf_log_buf ) ,
attr - > btf_log_size ) ;
if ( IS_ERR ( btf ) )
return PTR_ERR ( btf ) ;
2018-05-04 14:49:51 -07:00
ret = btf_alloc_id ( btf ) ;
if ( ret ) {
btf_free ( btf ) ;
return ret ;
}
/*
* The BTF ID is published to the userspace .
* All BTF free must go through call_rcu ( ) from
* now on ( i . e . free by calling btf_put ( ) ) .
*/
ret = __btf_new_fd ( btf ) ;
if ( ret < 0 )
2018-04-18 15:56:01 -07:00
btf_put ( btf ) ;
2018-05-04 14:49:51 -07:00
return ret ;
2018-04-18 15:56:01 -07:00
}
struct btf * btf_get_by_fd ( int fd )
{
struct btf * btf ;
struct fd f ;
f = fdget ( fd ) ;
if ( ! f . file )
return ERR_PTR ( - EBADF ) ;
if ( f . file - > f_op ! = & btf_fops ) {
fdput ( f ) ;
return ERR_PTR ( - EINVAL ) ;
}
btf = f . file - > private_data ;
2018-05-04 14:49:51 -07:00
refcount_inc ( & btf - > refcnt ) ;
2018-04-18 15:56:01 -07:00
fdput ( f ) ;
return btf ;
}
2018-04-18 15:56:02 -07:00
int btf_get_info_by_fd ( const struct btf * btf ,
const union bpf_attr * attr ,
union bpf_attr __user * uattr )
{
2018-05-04 14:49:52 -07:00
struct bpf_btf_info __user * uinfo ;
2020-03-20 17:22:58 +01:00
struct bpf_btf_info info ;
2018-05-04 14:49:52 -07:00
u32 info_copy , btf_copy ;
void __user * ubtf ;
2020-11-09 17:19:29 -08:00
char __user * uname ;
u32 uinfo_len , uname_len , name_len ;
int ret = 0 ;
2018-04-18 15:56:02 -07:00
2018-05-04 14:49:52 -07:00
uinfo = u64_to_user_ptr ( attr - > info . info ) ;
uinfo_len = attr - > info . info_len ;
info_copy = min_t ( u32 , uinfo_len , sizeof ( info ) ) ;
2020-03-20 17:22:58 +01:00
memset ( & info , 0 , sizeof ( info ) ) ;
2018-05-04 14:49:52 -07:00
if ( copy_from_user ( & info , uinfo , info_copy ) )
return - EFAULT ;
info . id = btf - > id ;
ubtf = u64_to_user_ptr ( info . btf ) ;
btf_copy = min_t ( u32 , btf - > data_size , info . btf_size ) ;
if ( copy_to_user ( ubtf , btf - > data , btf_copy ) )
return - EFAULT ;
info . btf_size = btf - > data_size ;
2020-11-09 17:19:29 -08:00
info . kernel_btf = btf - > kernel_btf ;
uname = u64_to_user_ptr ( info . name ) ;
uname_len = info . name_len ;
if ( ! uname ^ ! uname_len )
return - EINVAL ;
name_len = strlen ( btf - > name ) ;
info . name_len = name_len ;
if ( uname ) {
if ( uname_len > = name_len + 1 ) {
if ( copy_to_user ( uname , btf - > name , name_len + 1 ) )
return - EFAULT ;
} else {
char zero = ' \0 ' ;
if ( copy_to_user ( uname , btf - > name , uname_len - 1 ) )
return - EFAULT ;
if ( put_user ( zero , uname + uname_len - 1 ) )
return - EFAULT ;
/* let user-space know about too short buffer */
ret = - ENOSPC ;
}
}
2018-05-04 14:49:52 -07:00
if ( copy_to_user ( uinfo , & info , info_copy ) | |
put_user ( info_copy , & uattr - > info . info_len ) )
2018-04-18 15:56:02 -07:00
return - EFAULT ;
2020-11-09 17:19:29 -08:00
return ret ;
2018-04-18 15:56:02 -07:00
}
2018-05-04 14:49:51 -07:00
int btf_get_fd_by_id ( u32 id )
{
struct btf * btf ;
int fd ;
rcu_read_lock ( ) ;
btf = idr_find ( & btf_idr , id ) ;
if ( ! btf | | ! refcount_inc_not_zero ( & btf - > refcnt ) )
btf = ERR_PTR ( - ENOENT ) ;
rcu_read_unlock ( ) ;
if ( IS_ERR ( btf ) )
return PTR_ERR ( btf ) ;
fd = __btf_new_fd ( btf ) ;
if ( fd < 0 )
btf_put ( btf ) ;
return fd ;
}
bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
Remove a permeating assumption thoughout BPF verifier of vmlinux BTF. Instead,
wherever BTF type IDs are involved, also track the instance of struct btf that
goes along with the type ID. This allows to gradually add support for kernel
module BTFs and using/tracking module types across BPF helper calls and
registers.
This patch also renames btf_id() function to btf_obj_id() to minimize naming
clash with using btf_id to denote BTF *type* ID, rather than BTF *object*'s ID.
Also, altough btf_vmlinux can't get destructed and thus doesn't need
refcounting, module BTFs need that, so apply BTF refcounting universally when
BPF program is using BTF-powered attachment (tp_btf, fentry/fexit, etc). This
makes for simpler clean up code.
Now that BTF type ID is not enough to uniquely identify a BTF type, extend BPF
trampoline key to include BTF object ID. To differentiate that from target
program BPF ID, set 31st bit of type ID. BTF type IDs (at least currently) are
not allowed to take full 32 bits, so there is no danger of confusing that bit
with a valid BTF type ID.
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20201203204634.1325171-10-andrii@kernel.org
2020-12-03 12:46:29 -08:00
u32 btf_obj_id ( const struct btf * btf )
2018-05-04 14:49:51 -07:00
{
return btf - > id ;
}
2020-08-25 21:21:19 +02:00
2020-12-03 12:46:30 -08:00
bool btf_is_kernel ( const struct btf * btf )
{
return btf - > kernel_btf ;
}
2021-01-11 23:55:18 -08:00
bool btf_is_module ( const struct btf * btf )
{
return btf - > kernel_btf & & strcmp ( btf - > name , " vmlinux " ) ! = 0 ;
}
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:44 +05:30
enum {
BTF_MODULE_F_LIVE = ( 1 < < 0 ) ,
} ;
2020-11-09 17:19:31 -08:00
# ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module {
struct list_head list ;
struct module * module ;
struct btf * btf ;
struct bin_attribute * sysfs_attr ;
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:44 +05:30
int flags ;
2020-11-09 17:19:31 -08:00
} ;
static LIST_HEAD ( btf_modules ) ;
static DEFINE_MUTEX ( btf_module_mutex ) ;
static ssize_t
btf_module_read ( struct file * file , struct kobject * kobj ,
struct bin_attribute * bin_attr ,
char * buf , loff_t off , size_t len )
{
const struct btf * btf = bin_attr - > private ;
memcpy ( buf , btf - > data + off , len ) ;
return len ;
}
2021-12-01 10:10:31 -08:00
static void purge_cand_cache ( struct btf * btf ) ;
2020-11-09 17:19:31 -08:00
static int btf_module_notify ( struct notifier_block * nb , unsigned long op ,
void * module )
{
struct btf_module * btf_mod , * tmp ;
struct module * mod = module ;
struct btf * btf ;
int err = 0 ;
if ( mod - > btf_data_size = = 0 | |
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:44 +05:30
( op ! = MODULE_STATE_COMING & & op ! = MODULE_STATE_LIVE & &
op ! = MODULE_STATE_GOING ) )
2020-11-09 17:19:31 -08:00
goto out ;
switch ( op ) {
case MODULE_STATE_COMING :
btf_mod = kzalloc ( sizeof ( * btf_mod ) , GFP_KERNEL ) ;
if ( ! btf_mod ) {
err = - ENOMEM ;
goto out ;
}
btf = btf_parse_module ( mod - > name , mod - > btf_data , mod - > btf_data_size ) ;
if ( IS_ERR ( btf ) ) {
pr_warn ( " failed to validate module [%s] BTF: %ld \n " ,
mod - > name , PTR_ERR ( btf ) ) ;
kfree ( btf_mod ) ;
2022-02-23 01:28:14 +00:00
if ( ! IS_ENABLED ( CONFIG_MODULE_ALLOW_BTF_MISMATCH ) )
err = PTR_ERR ( btf ) ;
2020-11-09 17:19:31 -08:00
goto out ;
}
err = btf_alloc_id ( btf ) ;
if ( err ) {
btf_free ( btf ) ;
kfree ( btf_mod ) ;
goto out ;
}
2021-12-01 10:10:31 -08:00
purge_cand_cache ( NULL ) ;
2020-11-09 17:19:31 -08:00
mutex_lock ( & btf_module_mutex ) ;
btf_mod - > module = module ;
btf_mod - > btf = btf ;
list_add ( & btf_mod - > list , & btf_modules ) ;
mutex_unlock ( & btf_module_mutex ) ;
if ( IS_ENABLED ( CONFIG_SYSFS ) ) {
struct bin_attribute * attr ;
attr = kzalloc ( sizeof ( * attr ) , GFP_KERNEL ) ;
if ( ! attr )
goto out ;
sysfs_bin_attr_init ( attr ) ;
attr - > attr . name = btf - > name ;
attr - > attr . mode = 0444 ;
attr - > size = btf - > data_size ;
attr - > private = btf ;
attr - > read = btf_module_read ;
err = sysfs_create_bin_file ( btf_kobj , attr ) ;
if ( err ) {
pr_warn ( " failed to register module [%s] BTF in sysfs: %d \n " ,
mod - > name , err ) ;
kfree ( attr ) ;
err = 0 ;
goto out ;
}
btf_mod - > sysfs_attr = attr ;
}
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:44 +05:30
break ;
case MODULE_STATE_LIVE :
mutex_lock ( & btf_module_mutex ) ;
list_for_each_entry_safe ( btf_mod , tmp , & btf_modules , list ) {
if ( btf_mod - > module ! = module )
continue ;
btf_mod - > flags | = BTF_MODULE_F_LIVE ;
break ;
}
mutex_unlock ( & btf_module_mutex ) ;
2020-11-09 17:19:31 -08:00
break ;
case MODULE_STATE_GOING :
mutex_lock ( & btf_module_mutex ) ;
list_for_each_entry_safe ( btf_mod , tmp , & btf_modules , list ) {
if ( btf_mod - > module ! = module )
continue ;
list_del ( & btf_mod - > list ) ;
if ( btf_mod - > sysfs_attr )
sysfs_remove_bin_file ( btf_kobj , btf_mod - > sysfs_attr ) ;
2021-12-01 10:10:31 -08:00
purge_cand_cache ( btf_mod - > btf ) ;
2020-11-09 17:19:31 -08:00
btf_put ( btf_mod - > btf ) ;
kfree ( btf_mod - > sysfs_attr ) ;
kfree ( btf_mod ) ;
break ;
}
mutex_unlock ( & btf_module_mutex ) ;
break ;
}
out :
return notifier_from_errno ( err ) ;
}
static struct notifier_block btf_module_nb = {
. notifier_call = btf_module_notify ,
} ;
static int __init btf_module_init ( void )
{
register_module_notifier ( & btf_module_nb ) ;
return 0 ;
}
fs_initcall ( btf_module_init ) ;
# endif /* CONFIG_DEBUG_INFO_BTF_MODULES */
2021-01-11 23:55:18 -08:00
struct module * btf_try_get_module ( const struct btf * btf )
{
struct module * res = NULL ;
# ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module * btf_mod , * tmp ;
mutex_lock ( & btf_module_mutex ) ;
list_for_each_entry_safe ( btf_mod , tmp , & btf_modules , list ) {
if ( btf_mod - > btf ! = btf )
continue ;
bpf: Fix UAF due to race between btf_try_get_module and load_module
While working on code to populate kfunc BTF ID sets for module BTF from
its initcall, I noticed that by the time the initcall is invoked, the
module BTF can already be seen by userspace (and the BPF verifier). The
existing btf_try_get_module calls try_module_get which only fails if
mod->state == MODULE_STATE_GOING, i.e. it can increment module reference
when module initcall is happening in parallel.
Currently, BTF parsing happens from MODULE_STATE_COMING notifier
callback. At this point, the module initcalls have not been invoked.
The notifier callback parses and prepares the module BTF, allocates an
ID, which publishes it to userspace, and then adds it to the btf_modules
list allowing the kernel to invoke btf_try_get_module for the BTF.
However, at this point, the module has not been fully initialized (i.e.
its initcalls have not finished). The code in module.c can still fail
and free the module, without caring for other users. However, nothing
stops btf_try_get_module from succeeding between the state transition
from MODULE_STATE_COMING to MODULE_STATE_LIVE.
This leads to a use-after-free issue when BPF program loads
successfully in the state transition, load_module's do_init_module call
fails and frees the module, and BPF program fd on close calls module_put
for the freed module. Future patch has test case to verify we don't
regress in this area in future.
There are multiple points after prepare_coming_module (in load_module)
where failure can occur and module loading can return error. We
illustrate and test for the race using the last point where it can
practically occur (in module __init function).
An illustration of the race:
CPU 0 CPU 1
load_module
notifier_call(MODULE_STATE_COMING)
btf_parse_module
btf_alloc_id // Published to userspace
list_add(&btf_mod->list, btf_modules)
mod->init(...)
... ^
bpf_check |
check_pseudo_btf_id |
btf_try_get_module |
returns true | ...
... | module __init in progress
return prog_fd | ...
... V
if (ret < 0)
free_module(mod)
...
close(prog_fd)
...
bpf_prog_free_deferred
module_put(used_btf.mod) // use-after-free
We fix this issue by setting a flag BTF_MODULE_F_LIVE, from the notifier
callback when MODULE_STATE_LIVE state is reached for the module, so that
we return NULL from btf_try_get_module for modules that are not fully
formed. Since try_module_get already checks that module is not in
MODULE_STATE_GOING state, and that is the only transition a live module
can make before being removed from btf_modules list, this is enough to
close the race and prevent the bug.
A later selftest patch crafts the race condition artifically to verify
that it has been fixed, and that verifier fails to load program (with
ENXIO).
Lastly, a couple of comments:
1. Even if this race didn't exist, it seems more appropriate to only
access resources (ksyms and kfuncs) of a fully formed module which
has been initialized completely.
2. This patch was born out of need for synchronization against module
initcall for the next patch, so it is needed for correctness even
without the aforementioned race condition. The BTF resources
initialized by module initcall are set up once and then only looked
up, so just waiting until the initcall has finished ensures correct
behavior.
Fixes: 541c3bad8dc5 ("bpf: Support BPF ksym variables in kernel modules")
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-2-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:44 +05:30
/* We must only consider module whose __init routine has
* finished , hence we must check for BTF_MODULE_F_LIVE flag ,
* which is set from the notifier callback for
* MODULE_STATE_LIVE .
*/
if ( ( btf_mod - > flags & BTF_MODULE_F_LIVE ) & & try_module_get ( btf_mod - > module ) )
2021-01-11 23:55:18 -08:00
res = btf_mod - > module ;
break ;
}
mutex_unlock ( & btf_module_mutex ) ;
# endif
return res ;
}
2021-05-13 17:36:11 -07:00
2022-03-17 17:29:51 +05:30
/* Returns struct btf corresponding to the struct module.
* This function can return NULL or ERR_PTR .
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
*/
static struct btf * btf_get_module_btf ( const struct module * module )
{
# ifdef CONFIG_DEBUG_INFO_BTF_MODULES
struct btf_module * btf_mod , * tmp ;
# endif
2022-03-17 17:29:51 +05:30
struct btf * btf = NULL ;
if ( ! module ) {
btf = bpf_get_btf_vmlinux ( ) ;
2022-03-20 20:00:03 +05:30
if ( ! IS_ERR_OR_NULL ( btf ) )
2022-03-17 17:29:51 +05:30
btf_get ( btf ) ;
return btf ;
}
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
# ifdef CONFIG_DEBUG_INFO_BTF_MODULES
mutex_lock ( & btf_module_mutex ) ;
list_for_each_entry_safe ( btf_mod , tmp , & btf_modules , list ) {
if ( btf_mod - > module ! = module )
continue ;
btf_get ( btf_mod - > btf ) ;
btf = btf_mod - > btf ;
break ;
}
mutex_unlock ( & btf_module_mutex ) ;
# endif
return btf ;
}
2021-05-13 17:36:11 -07:00
BPF_CALL_4 ( bpf_btf_find_by_name_kind , char * , name , int , name_sz , u32 , kind , int , flags )
{
2022-03-17 17:29:43 +05:30
struct btf * btf = NULL ;
int btf_obj_fd = 0 ;
2021-05-13 17:36:11 -07:00
long ret ;
if ( flags )
return - EINVAL ;
if ( name_sz < = 1 | | name [ name_sz - 1 ] )
return - EINVAL ;
2022-03-17 17:29:43 +05:30
ret = bpf_find_btf_id ( name , kind , & btf ) ;
if ( ret > 0 & & btf_is_module ( btf ) ) {
btf_obj_fd = __btf_new_fd ( btf ) ;
if ( btf_obj_fd < 0 ) {
btf_put ( btf ) ;
return btf_obj_fd ;
2021-05-13 17:36:11 -07:00
}
2022-03-17 17:29:43 +05:30
return ret | ( ( ( u64 ) btf_obj_fd ) < < 32 ) ;
2021-05-13 17:36:11 -07:00
}
2022-03-17 17:29:43 +05:30
if ( ret > 0 )
btf_put ( btf ) ;
2021-05-13 17:36:11 -07:00
return ret ;
}
const struct bpf_func_proto bpf_btf_find_by_name_kind_proto = {
. func = bpf_btf_find_by_name_kind ,
. gpl_only = false ,
. ret_type = RET_INTEGER ,
2021-12-16 16:31:51 -08:00
. arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY ,
2021-05-13 17:36:11 -07:00
. arg2_type = ARG_CONST_SIZE ,
. arg3_type = ARG_ANYTHING ,
. arg4_type = ARG_ANYTHING ,
} ;
2021-08-25 18:48:31 -07:00
2021-11-12 07:02:43 -08:00
BTF_ID_LIST_GLOBAL ( btf_tracing_ids , MAX_BTF_TRACING_TYPE )
# define BTF_TRACING_TYPE(name, type) BTF_ID(struct, type)
BTF_TRACING_TYPE_xxx
# undef BTF_TRACING_TYPE
2021-10-02 06:47:51 +05:30
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
/* Kernel Function (kfunc) BTF ID set registration API */
2021-10-02 06:47:51 +05:30
2022-07-21 15:42:35 +02:00
static int btf_populate_kfunc_set ( struct btf * btf , enum btf_kfunc_hook hook ,
struct btf_id_set8 * add_set )
2021-10-02 06:47:51 +05:30
{
2022-07-21 15:42:35 +02:00
bool vmlinux_set = ! btf_is_module ( btf ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
struct btf_kfunc_set_tab * tab ;
2022-07-21 15:42:35 +02:00
struct btf_id_set8 * set ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
u32 set_cnt ;
int ret ;
2022-07-21 15:42:35 +02:00
if ( hook > = BTF_KFUNC_HOOK_MAX ) {
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
ret = - EINVAL ;
goto end ;
}
if ( ! add_set - > cnt )
return 0 ;
tab = btf - > kfunc_set_tab ;
if ( ! tab ) {
tab = kzalloc ( sizeof ( * tab ) , GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! tab )
return - ENOMEM ;
btf - > kfunc_set_tab = tab ;
}
2022-07-21 15:42:35 +02:00
set = tab - > sets [ hook ] ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
/* Warn when register_btf_kfunc_id_set is called twice for the same hook
* for module sets .
*/
if ( WARN_ON_ONCE ( set & & ! vmlinux_set ) ) {
ret = - EINVAL ;
goto end ;
}
/* We don't need to allocate, concatenate, and sort module sets, because
* only one is allowed per hook . Hence , we can directly assign the
* pointer and return .
*/
if ( ! vmlinux_set ) {
2022-07-21 15:42:35 +02:00
tab - > sets [ hook ] = add_set ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
return 0 ;
}
/* In case of vmlinux sets, there may be more than one set being
* registered per hook . To create a unified set , we allocate a new set
* and concatenate all individual sets being registered . While each set
* is individually sorted , they may become unsorted when concatenated ,
* hence re - sorting the final set again is required to make binary
2022-07-21 15:42:35 +02:00
* searching the set using btf_id_set8_contains function work .
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
*/
set_cnt = set ? set - > cnt : 0 ;
if ( set_cnt > U32_MAX - add_set - > cnt ) {
ret = - EOVERFLOW ;
goto end ;
}
if ( set_cnt + add_set - > cnt > BTF_KFUNC_SET_MAX_CNT ) {
ret = - E2BIG ;
goto end ;
}
/* Grow set */
2022-07-21 15:42:35 +02:00
set = krealloc ( tab - > sets [ hook ] ,
offsetof ( struct btf_id_set8 , pairs [ set_cnt + add_set - > cnt ] ) ,
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! set ) {
ret = - ENOMEM ;
goto end ;
}
/* For newly allocated set, initialize set->cnt to 0 */
2022-07-21 15:42:35 +02:00
if ( ! tab - > sets [ hook ] )
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
set - > cnt = 0 ;
2022-07-21 15:42:35 +02:00
tab - > sets [ hook ] = set ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
/* Concatenate the two sets */
2022-07-21 15:42:35 +02:00
memcpy ( set - > pairs + set - > cnt , add_set - > pairs , add_set - > cnt * sizeof ( set - > pairs [ 0 ] ) ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
set - > cnt + = add_set - > cnt ;
2022-07-21 15:42:35 +02:00
sort ( set - > pairs , set - > cnt , sizeof ( set - > pairs [ 0 ] ) , btf_id_cmp_func , NULL ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
return 0 ;
end :
btf_free_kfunc_set_tab ( btf ) ;
return ret ;
2021-10-02 06:47:51 +05:30
}
2022-07-21 15:42:35 +02:00
static u32 * __btf_kfunc_id_set_contains ( const struct btf * btf ,
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
enum btf_kfunc_hook hook ,
u32 kfunc_btf_id )
2021-10-02 06:47:51 +05:30
{
2022-07-21 15:42:35 +02:00
struct btf_id_set8 * set ;
u32 * id ;
2021-10-02 06:47:51 +05:30
2022-07-21 15:42:35 +02:00
if ( hook > = BTF_KFUNC_HOOK_MAX )
return NULL ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
if ( ! btf - > kfunc_set_tab )
2022-07-21 15:42:35 +02:00
return NULL ;
set = btf - > kfunc_set_tab - > sets [ hook ] ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
if ( ! set )
2022-07-21 15:42:35 +02:00
return NULL ;
id = btf_id_set8_contains ( set , kfunc_btf_id ) ;
if ( ! id )
return NULL ;
/* The flags for BTF ID are located next to it */
return id + 1 ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
}
static int bpf_prog_type_to_kfunc_hook ( enum bpf_prog_type prog_type )
{
switch ( prog_type ) {
case BPF_PROG_TYPE_XDP :
return BTF_KFUNC_HOOK_XDP ;
case BPF_PROG_TYPE_SCHED_CLS :
return BTF_KFUNC_HOOK_TC ;
case BPF_PROG_TYPE_STRUCT_OPS :
return BTF_KFUNC_HOOK_STRUCT_OPS ;
2022-05-18 22:59:08 +02:00
case BPF_PROG_TYPE_TRACING :
2022-09-20 09:59:39 +02:00
case BPF_PROG_TYPE_LSM :
2022-05-18 22:59:08 +02:00
return BTF_KFUNC_HOOK_TRACING ;
case BPF_PROG_TYPE_SYSCALL :
return BTF_KFUNC_HOOK_SYSCALL ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
default :
return BTF_KFUNC_HOOK_MAX ;
2021-10-02 06:47:51 +05:30
}
}
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
/* Caution:
* Reference to the module ( obtained using btf_try_get_module ) corresponding to
* the struct btf * MUST * be held when calling this function from verifier
* context . This is usually true as we stash references in prog ' s kfunc_btf_tab ;
* keeping the reference for the duration of the call provides the necessary
* protection for looking up a well - formed btf - > kfunc_set_tab .
*/
2022-07-21 15:42:35 +02:00
u32 * btf_kfunc_id_set_contains ( const struct btf * btf ,
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
enum bpf_prog_type prog_type ,
2022-07-21 15:42:35 +02:00
u32 kfunc_btf_id )
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
{
enum btf_kfunc_hook hook ;
2021-10-02 06:47:53 +05:30
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
hook = bpf_prog_type_to_kfunc_hook ( prog_type ) ;
2022-07-21 15:42:35 +02:00
return __btf_kfunc_id_set_contains ( btf , hook , kfunc_btf_id ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
}
2021-11-22 20:17:40 +05:30
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
/* This function must be invoked only from initcalls/module init functions */
int register_btf_kfunc_id_set ( enum bpf_prog_type prog_type ,
const struct btf_kfunc_id_set * kset )
{
enum btf_kfunc_hook hook ;
struct btf * btf ;
int ret ;
btf = btf_get_module_btf ( kset - > owner ) ;
2022-01-25 16:13:40 -08:00
if ( ! btf ) {
if ( ! kset - > owner & & IS_ENABLED ( CONFIG_DEBUG_INFO_BTF ) ) {
pr_err ( " missing vmlinux BTF, cannot register kfuncs \n " ) ;
return - ENOENT ;
}
if ( kset - > owner & & IS_ENABLED ( CONFIG_DEBUG_INFO_BTF_MODULES ) ) {
pr_err ( " missing module BTF, cannot register kfuncs \n " ) ;
return - ENOENT ;
}
return 0 ;
}
if ( IS_ERR ( btf ) )
return PTR_ERR ( btf ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
hook = bpf_prog_type_to_kfunc_hook ( prog_type ) ;
2022-07-21 15:42:35 +02:00
ret = btf_populate_kfunc_set ( btf , hook , kset - > set ) ;
2022-03-17 17:29:51 +05:30
btf_put ( btf ) ;
bpf: Populate kfunc BTF ID sets in struct btf
This patch prepares the kernel to support putting all kinds of kfunc BTF
ID sets in the struct btf itself. The various kernel subsystems will
make register_btf_kfunc_id_set call in the initcalls (for built-in code
and modules).
The 'hook' is one of the many program types, e.g. XDP and TC/SCHED_CLS,
STRUCT_OPS, and 'types' are check (allowed or not), acquire, release,
and ret_null (with PTR_TO_BTF_ID_OR_NULL return type).
A maximum of BTF_KFUNC_SET_MAX_CNT (32) kfunc BTF IDs are permitted in a
set of certain hook and type for vmlinux sets, since they are allocated
on demand, and otherwise set as NULL. Module sets can only be registered
once per hook and type, hence they are directly assigned.
A new btf_kfunc_id_set_contains function is exposed for use in verifier,
this new method is faster than the existing list searching method, and
is also automatic. It also lets other code not care whether the set is
unallocated or not.
Note that module code can only do single register_btf_kfunc_id_set call
per hook. This is why sorting is only done for in-kernel vmlinux sets,
because there might be multiple sets for the same hook and type that
must be concatenated, hence sorting them is required to ensure bsearch
in btf_id_set_contains continues to work correctly.
Next commit will update the kernel users to make use of this
infrastructure.
Finally, add __maybe_unused annotation for BTF ID macros for the
!CONFIG_DEBUG_INFO_BTF case, so that they don't produce warnings during
build time.
The previous patch is also needed to provide synchronization against
initialization for module BTF's kfunc_set_tab introduced here, as
described below:
The kfunc_set_tab pointer in struct btf is write-once (if we consider
the registration phase (comprised of multiple register_btf_kfunc_id_set
calls) as a single operation). In this sense, once it has been fully
prepared, it isn't modified, only used for lookup (from the verifier
context).
For btf_vmlinux, it is initialized fully during the do_initcalls phase,
which happens fairly early in the boot process, before any processes are
present. This also eliminates the possibility of bpf_check being called
at that point, thus relieving us of ensuring any synchronization between
the registration and lookup function (btf_kfunc_id_set_contains).
However, the case for module BTF is a bit tricky. The BTF is parsed,
prepared, and published from the MODULE_STATE_COMING notifier callback.
After this, the module initcalls are invoked, where our registration
function will be called to populate the kfunc_set_tab for module BTF.
At this point, BTF may be available to userspace while its corresponding
module is still intializing. A BTF fd can then be passed to verifier
using bpf syscall (e.g. for kfunc call insn).
Hence, there is a race window where verifier may concurrently try to
lookup the kfunc_set_tab. To prevent this race, we must ensure the
operations are serialized, or waiting for the __init functions to
complete.
In the earlier registration API, this race was alleviated as verifier
bpf_check_mod_kfunc_call didn't find the kfunc BTF ID until it was added
by the registration function (called usually at the end of module __init
function after all module resources have been initialized). If the
verifier made the check_kfunc_call before kfunc BTF ID was added to the
list, it would fail verification (saying call isn't allowed). The
access to list was protected using a mutex.
Now, it would still fail verification, but for a different reason
(returning ENXIO due to the failed btf_try_get_module call in
add_kfunc_call), because if the __init call is in progress the module
will be in the middle of MODULE_STATE_COMING -> MODULE_STATE_LIVE
transition, and the BTF_MODULE_LIVE flag for btf_module instance will
not be set, so the btf_try_get_module call will fail.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220114163953.1455836-3-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-01-14 22:09:45 +05:30
return ret ;
}
EXPORT_SYMBOL_GPL ( register_btf_kfunc_id_set ) ;
Merge https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Andrii Nakryiko says:
====================
bpf-next 2021-12-10 v2
We've added 115 non-merge commits during the last 26 day(s) which contain
a total of 182 files changed, 5747 insertions(+), 2564 deletions(-).
The main changes are:
1) Various samples fixes, from Alexander Lobakin.
2) BPF CO-RE support in kernel and light skeleton, from Alexei Starovoitov.
3) A batch of new unified APIs for libbpf, logging improvements, version
querying, etc. Also a batch of old deprecations for old APIs and various
bug fixes, in preparation for libbpf 1.0, from Andrii Nakryiko.
4) BPF documentation reorganization and improvements, from Christoph Hellwig
and Dave Tucker.
5) Support for declarative initialization of BPF_MAP_TYPE_PROG_ARRAY in
libbpf, from Hengqi Chen.
6) Verifier log fixes, from Hou Tao.
7) Runtime-bounded loops support with bpf_loop() helper, from Joanne Koong.
8) Extend branch record capturing to all platforms that support it,
from Kajol Jain.
9) Light skeleton codegen improvements, from Kumar Kartikeya Dwivedi.
10) bpftool doc-generating script improvements, from Quentin Monnet.
11) Two libbpf v0.6 bug fixes, from Shuyi Cheng and Vincent Minet.
12) Deprecation warning fix for perf/bpf_counter, from Song Liu.
13) MAX_TAIL_CALL_CNT unification and MIPS build fix for libbpf,
from Tiezhu Yang.
14) BTF_KING_TYPE_TAG follow-up fixes, from Yonghong Song.
15) Selftests fixes and improvements, from Ilya Leoshkevich, Jean-Philippe
Brucker, Jiri Olsa, Maxim Mikityanskiy, Tirthendu Sarkar, Yucong Sun,
and others.
* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (115 commits)
libbpf: Add "bool skipped" to struct bpf_map
libbpf: Fix typo in btf__dedup@LIBBPF_0.0.2 definition
bpftool: Switch bpf_object__load_xattr() to bpf_object__load()
selftests/bpf: Remove the only use of deprecated bpf_object__load_xattr()
selftests/bpf: Add test for libbpf's custom log_buf behavior
selftests/bpf: Replace all uses of bpf_load_btf() with bpf_btf_load()
libbpf: Deprecate bpf_object__load_xattr()
libbpf: Add per-program log buffer setter and getter
libbpf: Preserve kernel error code and remove kprobe prog type guessing
libbpf: Improve logging around BPF program loading
libbpf: Allow passing user log setting through bpf_object_open_opts
libbpf: Allow passing preallocated log_buf when loading BTF into kernel
libbpf: Add OPTS-based bpf_btf_load() API
libbpf: Fix bpf_prog_load() log_buf logic for log_level 0
samples/bpf: Remove unneeded variable
bpf: Remove redundant assignment to pointer t
selftests/bpf: Fix a compilation warning
perf/bpf_counter: Use bpf_map_create instead of bpf_create_map
samples: bpf: Fix 'unknown warning group' build warning on Clang
samples: bpf: Fix xdp_sample_user.o linking with Clang
...
====================
Link: https://lore.kernel.org/r/20211210234746.2100561-1-andrii@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-12-10 15:56:10 -08:00
2022-04-25 03:18:54 +05:30
s32 btf_find_dtor_kfunc ( struct btf * btf , u32 btf_id )
{
struct btf_id_dtor_kfunc_tab * tab = btf - > dtor_kfunc_tab ;
struct btf_id_dtor_kfunc * dtor ;
if ( ! tab )
return - ENOENT ;
/* Even though the size of tab->dtors[0] is > sizeof(u32), we only need
* to compare the first u32 with btf_id , so we can reuse btf_id_cmp_func .
*/
BUILD_BUG_ON ( offsetof ( struct btf_id_dtor_kfunc , btf_id ) ! = 0 ) ;
dtor = bsearch ( & btf_id , tab - > dtors , tab - > cnt , sizeof ( tab - > dtors [ 0 ] ) , btf_id_cmp_func ) ;
if ( ! dtor )
return - ENOENT ;
return dtor - > kfunc_btf_id ;
}
bpf: Wire up freeing of referenced kptr
A destructor kfunc can be defined as void func(type *), where type may
be void or any other pointer type as per convenience.
In this patch, we ensure that the type is sane and capture the function
pointer into off_desc of ptr_off_tab for the specific pointer offset,
with the invariant that the dtor pointer is always set when 'kptr_ref'
tag is applied to the pointer's pointee type, which is indicated by the
flag BPF_MAP_VALUE_OFF_F_REF.
Note that only BTF IDs whose destructor kfunc is registered, thus become
the allowed BTF IDs for embedding as referenced kptr. Hence it serves
the purpose of finding dtor kfunc BTF ID, as well acting as a check
against the whitelist of allowed BTF IDs for this purpose.
Finally, wire up the actual freeing of the referenced pointer if any at
all available offsets, so that no references are leaked after the BPF
map goes away and the BPF program previously moved the ownership a
referenced pointer into it.
The behavior is similar to BPF timers, where bpf_map_{update,delete}_elem
will free any existing referenced kptr. The same case is with LRU map's
bpf_lru_push_free/htab_lru_push_free functions, which are extended to
reset unreferenced and free referenced kptr.
Note that unlike BPF timers, kptr is not reset or freed when map uref
drops to zero.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-8-memxor@gmail.com
2022-04-25 03:18:55 +05:30
static int btf_check_dtor_kfuncs ( struct btf * btf , const struct btf_id_dtor_kfunc * dtors , u32 cnt )
{
const struct btf_type * dtor_func , * dtor_func_proto , * t ;
const struct btf_param * args ;
s32 dtor_btf_id ;
u32 nr_args , i ;
for ( i = 0 ; i < cnt ; i + + ) {
dtor_btf_id = dtors [ i ] . kfunc_btf_id ;
dtor_func = btf_type_by_id ( btf , dtor_btf_id ) ;
if ( ! dtor_func | | ! btf_type_is_func ( dtor_func ) )
return - EINVAL ;
dtor_func_proto = btf_type_by_id ( btf , dtor_func - > type ) ;
if ( ! dtor_func_proto | | ! btf_type_is_func_proto ( dtor_func_proto ) )
return - EINVAL ;
/* Make sure the prototype of the destructor kfunc is 'void func(type *)' */
t = btf_type_by_id ( btf , dtor_func_proto - > type ) ;
if ( ! t | | ! btf_type_is_void ( t ) )
return - EINVAL ;
nr_args = btf_type_vlen ( dtor_func_proto ) ;
if ( nr_args ! = 1 )
return - EINVAL ;
args = btf_params ( dtor_func_proto ) ;
t = btf_type_by_id ( btf , args [ 0 ] . type ) ;
/* Allow any pointer type, as width on targets Linux supports
* will be same for all pointer types ( i . e . sizeof ( void * ) )
*/
if ( ! t | | ! btf_type_is_ptr ( t ) )
return - EINVAL ;
}
return 0 ;
}
2022-04-25 03:18:54 +05:30
/* This function must be invoked only from initcalls/module init functions */
int register_btf_id_dtor_kfuncs ( const struct btf_id_dtor_kfunc * dtors , u32 add_cnt ,
struct module * owner )
{
struct btf_id_dtor_kfunc_tab * tab ;
struct btf * btf ;
u32 tab_cnt ;
int ret ;
btf = btf_get_module_btf ( owner ) ;
if ( ! btf ) {
if ( ! owner & & IS_ENABLED ( CONFIG_DEBUG_INFO_BTF ) ) {
pr_err ( " missing vmlinux BTF, cannot register dtor kfuncs \n " ) ;
return - ENOENT ;
}
if ( owner & & IS_ENABLED ( CONFIG_DEBUG_INFO_BTF_MODULES ) ) {
pr_err ( " missing module BTF, cannot register dtor kfuncs \n " ) ;
return - ENOENT ;
}
return 0 ;
}
if ( IS_ERR ( btf ) )
return PTR_ERR ( btf ) ;
if ( add_cnt > = BTF_DTOR_KFUNC_MAX_CNT ) {
pr_err ( " cannot register more than %d kfunc destructors \n " , BTF_DTOR_KFUNC_MAX_CNT ) ;
ret = - E2BIG ;
goto end ;
}
bpf: Wire up freeing of referenced kptr
A destructor kfunc can be defined as void func(type *), where type may
be void or any other pointer type as per convenience.
In this patch, we ensure that the type is sane and capture the function
pointer into off_desc of ptr_off_tab for the specific pointer offset,
with the invariant that the dtor pointer is always set when 'kptr_ref'
tag is applied to the pointer's pointee type, which is indicated by the
flag BPF_MAP_VALUE_OFF_F_REF.
Note that only BTF IDs whose destructor kfunc is registered, thus become
the allowed BTF IDs for embedding as referenced kptr. Hence it serves
the purpose of finding dtor kfunc BTF ID, as well acting as a check
against the whitelist of allowed BTF IDs for this purpose.
Finally, wire up the actual freeing of the referenced pointer if any at
all available offsets, so that no references are leaked after the BPF
map goes away and the BPF program previously moved the ownership a
referenced pointer into it.
The behavior is similar to BPF timers, where bpf_map_{update,delete}_elem
will free any existing referenced kptr. The same case is with LRU map's
bpf_lru_push_free/htab_lru_push_free functions, which are extended to
reset unreferenced and free referenced kptr.
Note that unlike BPF timers, kptr is not reset or freed when map uref
drops to zero.
Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20220424214901.2743946-8-memxor@gmail.com
2022-04-25 03:18:55 +05:30
/* Ensure that the prototype of dtor kfuncs being registered is sane */
ret = btf_check_dtor_kfuncs ( btf , dtors , add_cnt ) ;
if ( ret < 0 )
goto end ;
2022-04-25 03:18:54 +05:30
tab = btf - > dtor_kfunc_tab ;
/* Only one call allowed for modules */
if ( WARN_ON_ONCE ( tab & & btf_is_module ( btf ) ) ) {
ret = - EINVAL ;
goto end ;
}
tab_cnt = tab ? tab - > cnt : 0 ;
if ( tab_cnt > U32_MAX - add_cnt ) {
ret = - EOVERFLOW ;
goto end ;
}
if ( tab_cnt + add_cnt > = BTF_DTOR_KFUNC_MAX_CNT ) {
pr_err ( " cannot register more than %d kfunc destructors \n " , BTF_DTOR_KFUNC_MAX_CNT ) ;
ret = - E2BIG ;
goto end ;
}
tab = krealloc ( btf - > dtor_kfunc_tab ,
offsetof ( struct btf_id_dtor_kfunc_tab , dtors [ tab_cnt + add_cnt ] ) ,
GFP_KERNEL | __GFP_NOWARN ) ;
if ( ! tab ) {
ret = - ENOMEM ;
goto end ;
}
if ( ! btf - > dtor_kfunc_tab )
tab - > cnt = 0 ;
btf - > dtor_kfunc_tab = tab ;
memcpy ( tab - > dtors + tab - > cnt , dtors , add_cnt * sizeof ( tab - > dtors [ 0 ] ) ) ;
tab - > cnt + = add_cnt ;
sort ( tab - > dtors , tab - > cnt , sizeof ( tab - > dtors [ 0 ] ) , btf_id_cmp_func , NULL ) ;
return 0 ;
end :
btf_free_dtor_kfunc_tab ( btf ) ;
btf_put ( btf ) ;
return ret ;
}
EXPORT_SYMBOL_GPL ( register_btf_id_dtor_kfuncs ) ;
2022-02-04 01:55:18 +01:00
# define MAX_TYPES_ARE_COMPAT_DEPTH 2
/* Check local and target types for compatibility. This check is used for
* type - based CO - RE relocations and follow slightly different rules than
* field - based relocations . This function assumes that root types were already
* checked for name match . Beyond that initial root - level name check , names
* are completely ignored . Compatibility rules are as follows :
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
* - any two STRUCTs / UNIONs / FWDs / ENUMs / INTs / ENUM64s are considered compatible , but
2022-02-04 01:55:18 +01:00
* kind should match for local and target types ( i . e . , STRUCT is not
* compatible with UNION ) ;
bpf: Add btf enum64 support
Currently, BTF only supports upto 32bit enum value with BTF_KIND_ENUM.
But in kernel, some enum indeed has 64bit values, e.g.,
in uapi bpf.h, we have
enum {
BPF_F_INDEX_MASK = 0xffffffffULL,
BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
};
In this case, BTF_KIND_ENUM will encode the value of BPF_F_CTXLEN_MASK
as 0, which certainly is incorrect.
This patch added a new btf kind, BTF_KIND_ENUM64, which permits
64bit value to cover the above use case. The BTF_KIND_ENUM64 has
the following three fields followed by the common type:
struct bpf_enum64 {
__u32 nume_off;
__u32 val_lo32;
__u32 val_hi32;
};
Currently, btf type section has an alignment of 4 as all element types
are u32. Representing the value with __u64 will introduce a pad
for bpf_enum64 and may also introduce misalignment for the 64bit value.
Hence, two members of val_hi32 and val_lo32 are chosen to avoid these issues.
The kflag is also introduced for BTF_KIND_ENUM and BTF_KIND_ENUM64
to indicate whether the value is signed or unsigned. The kflag intends
to provide consistent output of BTF C fortmat with the original
source code. For example, the original BTF_KIND_ENUM bit value is 0xffffffff.
The format C has two choices, printing out 0xffffffff or -1 and current libbpf
prints out as unsigned value. But if the signedness is preserved in btf,
the value can be printed the same as the original source code.
The kflag value 0 means unsigned values, which is consistent to the default
by libbpf and should also cover most cases as well.
The new BTF_KIND_ENUM64 is intended to support the enum value represented as
64bit value. But it can represent all BTF_KIND_ENUM values as well.
The compiler ([1]) and pahole will generate BTF_KIND_ENUM64 only if the value has
to be represented with 64 bits.
In addition, a static inline function btf_kind_core_compat() is introduced which
will be used later when libbpf relo_core.c changed. Here the kernel shares the
same relo_core.c with libbpf.
[1] https://reviews.llvm.org/D124641
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220607062600.3716578-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2022-06-06 23:26:00 -07:00
* - for ENUMs / ENUM64s , the size is ignored ;
2022-02-04 01:55:18 +01:00
* - for INT , size and signedness are ignored ;
* - for ARRAY , dimensionality is ignored , element types are checked for
* compatibility recursively ;
* - CONST / VOLATILE / RESTRICT modifiers are ignored ;
* - TYPEDEFs / PTRs are compatible if types they pointing to are compatible ;
* - FUNC_PROTOs are compatible if they have compatible signature : same
* number of input args and compatible return and argument types .
* These rules are not set in stone and probably will be adjusted as we get
* more experience with using BPF CO - RE relocations .
*/
2021-12-01 10:10:26 -08:00
int bpf_core_types_are_compat ( const struct btf * local_btf , __u32 local_id ,
const struct btf * targ_btf , __u32 targ_id )
{
2022-06-23 18:29:34 +00:00
return __bpf_core_types_are_compat ( local_btf , local_id , targ_btf , targ_id ,
2022-02-04 01:55:18 +01:00
MAX_TYPES_ARE_COMPAT_DEPTH ) ;
2021-12-01 10:10:26 -08:00
}
2022-06-28 16:01:21 +00:00
# define MAX_TYPES_MATCH_DEPTH 2
int bpf_core_types_match ( const struct btf * local_btf , u32 local_id ,
const struct btf * targ_btf , u32 targ_id )
{
return __bpf_core_types_match ( local_btf , local_id , targ_btf , targ_id , false ,
MAX_TYPES_MATCH_DEPTH ) ;
}
2021-12-01 10:10:26 -08:00
static bool bpf_core_is_flavor_sep ( const char * s )
{
/* check X___Y name pattern, where X and Y are not underscores */
return s [ 0 ] ! = ' _ ' & & /* X */
s [ 1 ] = = ' _ ' & & s [ 2 ] = = ' _ ' & & s [ 3 ] = = ' _ ' & & /* ___ */
s [ 4 ] ! = ' _ ' ; /* Y */
}
size_t bpf_core_essential_name_len ( const char * name )
{
size_t n = strlen ( name ) ;
int i ;
for ( i = n - 5 ; i > = 0 ; i - - ) {
if ( bpf_core_is_flavor_sep ( name + i ) )
return i + 1 ;
}
return n ;
}
2021-12-01 10:10:28 -08:00
2021-12-01 10:10:31 -08:00
struct bpf_cand_cache {
const char * name ;
u32 name_len ;
u16 kind ;
u16 cnt ;
struct {
const struct btf * btf ;
u32 id ;
} cands [ ] ;
} ;
static void bpf_free_cands ( struct bpf_cand_cache * cands )
{
if ( ! cands - > cnt )
/* empty candidate array was allocated on stack */
return ;
kfree ( cands ) ;
}
static void bpf_free_cands_from_cache ( struct bpf_cand_cache * cands )
{
kfree ( cands - > name ) ;
kfree ( cands ) ;
}
# define VMLINUX_CAND_CACHE_SIZE 31
static struct bpf_cand_cache * vmlinux_cand_cache [ VMLINUX_CAND_CACHE_SIZE ] ;
# define MODULE_CAND_CACHE_SIZE 31
static struct bpf_cand_cache * module_cand_cache [ MODULE_CAND_CACHE_SIZE ] ;
static DEFINE_MUTEX ( cand_cache_mutex ) ;
static void __print_cand_cache ( struct bpf_verifier_log * log ,
struct bpf_cand_cache * * cache ,
int cache_size )
{
struct bpf_cand_cache * cc ;
int i , j ;
for ( i = 0 ; i < cache_size ; i + + ) {
cc = cache [ i ] ;
if ( ! cc )
continue ;
bpf_log ( log , " [%d]%s( " , i , cc - > name ) ;
for ( j = 0 ; j < cc - > cnt ; j + + ) {
bpf_log ( log , " %d " , cc - > cands [ j ] . id ) ;
if ( j < cc - > cnt - 1 )
bpf_log ( log , " " ) ;
}
bpf_log ( log , " ), " ) ;
}
}
static void print_cand_cache ( struct bpf_verifier_log * log )
{
mutex_lock ( & cand_cache_mutex ) ;
bpf_log ( log , " vmlinux_cand_cache: " ) ;
__print_cand_cache ( log , vmlinux_cand_cache , VMLINUX_CAND_CACHE_SIZE ) ;
bpf_log ( log , " \n module_cand_cache: " ) ;
__print_cand_cache ( log , module_cand_cache , MODULE_CAND_CACHE_SIZE ) ;
bpf_log ( log , " \n " ) ;
mutex_unlock ( & cand_cache_mutex ) ;
}
static u32 hash_cands ( struct bpf_cand_cache * cands )
{
return jhash ( cands - > name , cands - > name_len , 0 ) ;
}
static struct bpf_cand_cache * check_cand_cache ( struct bpf_cand_cache * cands ,
struct bpf_cand_cache * * cache ,
int cache_size )
{
struct bpf_cand_cache * cc = cache [ hash_cands ( cands ) % cache_size ] ;
if ( cc & & cc - > name_len = = cands - > name_len & &
! strncmp ( cc - > name , cands - > name , cands - > name_len ) )
return cc ;
return NULL ;
}
static size_t sizeof_cands ( int cnt )
{
return offsetof ( struct bpf_cand_cache , cands [ cnt ] ) ;
}
static struct bpf_cand_cache * populate_cand_cache ( struct bpf_cand_cache * cands ,
struct bpf_cand_cache * * cache ,
int cache_size )
{
struct bpf_cand_cache * * cc = & cache [ hash_cands ( cands ) % cache_size ] , * new_cands ;
if ( * cc ) {
bpf_free_cands_from_cache ( * cc ) ;
* cc = NULL ;
}
2021-12-09 14:21:22 +08:00
new_cands = kmemdup ( cands , sizeof_cands ( cands - > cnt ) , GFP_KERNEL ) ;
2021-12-01 10:10:31 -08:00
if ( ! new_cands ) {
bpf_free_cands ( cands ) ;
return ERR_PTR ( - ENOMEM ) ;
}
/* strdup the name, since it will stay in cache.
* the cands - > name points to strings in prog ' s BTF and the prog can be unloaded .
*/
new_cands - > name = kmemdup_nul ( cands - > name , cands - > name_len , GFP_KERNEL ) ;
bpf_free_cands ( cands ) ;
if ( ! new_cands - > name ) {
kfree ( new_cands ) ;
return ERR_PTR ( - ENOMEM ) ;
}
* cc = new_cands ;
return new_cands ;
}
2021-12-06 17:48:39 -08:00
# ifdef CONFIG_DEBUG_INFO_BTF_MODULES
2021-12-01 10:10:31 -08:00
static void __purge_cand_cache ( struct btf * btf , struct bpf_cand_cache * * cache ,
int cache_size )
{
struct bpf_cand_cache * cc ;
int i , j ;
for ( i = 0 ; i < cache_size ; i + + ) {
cc = cache [ i ] ;
if ( ! cc )
continue ;
if ( ! btf ) {
/* when new module is loaded purge all of module_cand_cache,
* since new module might have candidates with the name
* that matches cached cands .
*/
bpf_free_cands_from_cache ( cc ) ;
cache [ i ] = NULL ;
continue ;
}
/* when module is unloaded purge cache entries
* that match module ' s btf
*/
for ( j = 0 ; j < cc - > cnt ; j + + )
if ( cc - > cands [ j ] . btf = = btf ) {
bpf_free_cands_from_cache ( cc ) ;
cache [ i ] = NULL ;
break ;
}
}
}
static void purge_cand_cache ( struct btf * btf )
{
mutex_lock ( & cand_cache_mutex ) ;
__purge_cand_cache ( btf , module_cand_cache , MODULE_CAND_CACHE_SIZE ) ;
mutex_unlock ( & cand_cache_mutex ) ;
}
2021-12-06 17:48:39 -08:00
# endif
2021-12-01 10:10:31 -08:00
static struct bpf_cand_cache *
bpf_core_add_cands ( struct bpf_cand_cache * cands , const struct btf * targ_btf ,
int targ_start_id )
{
struct bpf_cand_cache * new_cands ;
const struct btf_type * t ;
const char * targ_name ;
size_t targ_essent_len ;
int n , i ;
n = btf_nr_types ( targ_btf ) ;
for ( i = targ_start_id ; i < n ; i + + ) {
t = btf_type_by_id ( targ_btf , i ) ;
if ( btf_kind ( t ) ! = cands - > kind )
continue ;
targ_name = btf_name_by_offset ( targ_btf , t - > name_off ) ;
if ( ! targ_name )
continue ;
/* the resched point is before strncmp to make sure that search
* for non - existing name will have a chance to schedule ( ) .
*/
cond_resched ( ) ;
if ( strncmp ( cands - > name , targ_name , cands - > name_len ) ! = 0 )
continue ;
targ_essent_len = bpf_core_essential_name_len ( targ_name ) ;
if ( targ_essent_len ! = cands - > name_len )
continue ;
/* most of the time there is only one candidate for a given kind+name pair */
new_cands = kmalloc ( sizeof_cands ( cands - > cnt + 1 ) , GFP_KERNEL ) ;
if ( ! new_cands ) {
bpf_free_cands ( cands ) ;
return ERR_PTR ( - ENOMEM ) ;
}
memcpy ( new_cands , cands , sizeof_cands ( cands - > cnt ) ) ;
bpf_free_cands ( cands ) ;
cands = new_cands ;
cands - > cands [ cands - > cnt ] . btf = targ_btf ;
cands - > cands [ cands - > cnt ] . id = i ;
cands - > cnt + + ;
}
return cands ;
}
static struct bpf_cand_cache *
bpf_core_find_cands ( struct bpf_core_ctx * ctx , u32 local_type_id )
{
struct bpf_cand_cache * cands , * cc , local_cand = { } ;
const struct btf * local_btf = ctx - > btf ;
const struct btf_type * local_type ;
const struct btf * main_btf ;
size_t local_essent_len ;
struct btf * mod_btf ;
const char * name ;
int id ;
main_btf = bpf_get_btf_vmlinux ( ) ;
if ( IS_ERR ( main_btf ) )
2021-12-11 18:08:19 -08:00
return ERR_CAST ( main_btf ) ;
2022-03-20 20:00:03 +05:30
if ( ! main_btf )
return ERR_PTR ( - EINVAL ) ;
2021-12-01 10:10:31 -08:00
local_type = btf_type_by_id ( local_btf , local_type_id ) ;
if ( ! local_type )
return ERR_PTR ( - EINVAL ) ;
name = btf_name_by_offset ( local_btf , local_type - > name_off ) ;
if ( str_is_empty ( name ) )
return ERR_PTR ( - EINVAL ) ;
local_essent_len = bpf_core_essential_name_len ( name ) ;
cands = & local_cand ;
cands - > name = name ;
cands - > kind = btf_kind ( local_type ) ;
cands - > name_len = local_essent_len ;
cc = check_cand_cache ( cands , vmlinux_cand_cache , VMLINUX_CAND_CACHE_SIZE ) ;
/* cands is a pointer to stack here */
if ( cc ) {
if ( cc - > cnt )
return cc ;
goto check_modules ;
}
/* Attempt to find target candidates in vmlinux BTF first */
cands = bpf_core_add_cands ( cands , main_btf , 1 ) ;
if ( IS_ERR ( cands ) )
2021-12-11 18:08:19 -08:00
return ERR_CAST ( cands ) ;
2021-12-01 10:10:31 -08:00
/* cands is a pointer to kmalloced memory here if cands->cnt > 0 */
/* populate cache even when cands->cnt == 0 */
cc = populate_cand_cache ( cands , vmlinux_cand_cache , VMLINUX_CAND_CACHE_SIZE ) ;
if ( IS_ERR ( cc ) )
2021-12-11 18:08:19 -08:00
return ERR_CAST ( cc ) ;
2021-12-01 10:10:31 -08:00
/* if vmlinux BTF has any candidate, don't go for module BTFs */
if ( cc - > cnt )
return cc ;
check_modules :
/* cands is a pointer to stack here and cands->cnt == 0 */
cc = check_cand_cache ( cands , module_cand_cache , MODULE_CAND_CACHE_SIZE ) ;
if ( cc )
/* if cache has it return it even if cc->cnt == 0 */
return cc ;
/* If candidate is not found in vmlinux's BTF then search in module's BTFs */
spin_lock_bh ( & btf_idr_lock ) ;
idr_for_each_entry ( & btf_idr , mod_btf , id ) {
if ( ! btf_is_module ( mod_btf ) )
continue ;
/* linear search could be slow hence unlock/lock
* the IDR to avoiding holding it for too long
*/
btf_get ( mod_btf ) ;
spin_unlock_bh ( & btf_idr_lock ) ;
cands = bpf_core_add_cands ( cands , mod_btf , btf_nr_types ( main_btf ) ) ;
if ( IS_ERR ( cands ) ) {
btf_put ( mod_btf ) ;
2021-12-11 18:08:19 -08:00
return ERR_CAST ( cands ) ;
2021-12-01 10:10:31 -08:00
}
spin_lock_bh ( & btf_idr_lock ) ;
btf_put ( mod_btf ) ;
}
spin_unlock_bh ( & btf_idr_lock ) ;
/* cands is a pointer to kmalloced memory here if cands->cnt > 0
* or pointer to stack if cands - > cnd = = 0.
* Copy it into the cache even when cands - > cnt = = 0 and
* return the result .
*/
return populate_cand_cache ( cands , module_cand_cache , MODULE_CAND_CACHE_SIZE ) ;
}
2021-12-01 10:10:28 -08:00
int bpf_core_apply ( struct bpf_core_ctx * ctx , const struct bpf_core_relo * relo ,
int relo_idx , void * insn )
{
2021-12-01 10:10:31 -08:00
bool need_cands = relo - > kind ! = BPF_CORE_TYPE_ID_LOCAL ;
struct bpf_core_cand_list cands = { } ;
2022-02-15 17:58:50 -05:00
struct bpf_core_relo_res targ_res ;
2021-12-03 10:28:36 -08:00
struct bpf_core_spec * specs ;
2021-12-01 10:10:31 -08:00
int err ;
2021-12-03 10:28:36 -08:00
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5"
* into arrays of btf_ids of struct fields and array indices .
*/
specs = kcalloc ( 3 , sizeof ( * specs ) , GFP_KERNEL ) ;
if ( ! specs )
return - ENOMEM ;
2021-12-01 10:10:31 -08:00
if ( need_cands ) {
struct bpf_cand_cache * cc ;
int i ;
mutex_lock ( & cand_cache_mutex ) ;
cc = bpf_core_find_cands ( ctx , relo - > type_id ) ;
if ( IS_ERR ( cc ) ) {
bpf_log ( ctx - > log , " target candidate search failed for %d \n " ,
relo - > type_id ) ;
err = PTR_ERR ( cc ) ;
goto out ;
}
if ( cc - > cnt ) {
cands . cands = kcalloc ( cc - > cnt , sizeof ( * cands . cands ) , GFP_KERNEL ) ;
if ( ! cands . cands ) {
err = - ENOMEM ;
goto out ;
}
}
for ( i = 0 ; i < cc - > cnt ; i + + ) {
bpf_log ( ctx - > log ,
" CO-RE relocating %s %s: found target candidate [%d] \n " ,
btf_kind_str [ cc - > kind ] , cc - > name , cc - > cands [ i ] . id ) ;
cands . cands [ i ] . btf = cc - > cands [ i ] . btf ;
cands . cands [ i ] . id = cc - > cands [ i ] . id ;
}
cands . len = cc - > cnt ;
/* cand_cache_mutex needs to span the cache lookup and
* copy of btf pointer into bpf_core_cand_list ,
2022-02-15 17:58:50 -05:00
* since module can be unloaded while bpf_core_calc_relo_insn
2021-12-01 10:10:31 -08:00
* is working with module ' s btf .
*/
}
2022-02-15 17:58:50 -05:00
err = bpf_core_calc_relo_insn ( ( void * ) ctx - > log , relo , relo_idx , ctx - > btf , & cands , specs ,
& targ_res ) ;
if ( err )
goto out ;
err = bpf_core_patch_insn ( ( void * ) ctx - > log , insn , relo - > insn_off / 8 , relo , relo_idx ,
& targ_res ) ;
2021-12-01 10:10:31 -08:00
out :
2021-12-03 10:28:36 -08:00
kfree ( specs ) ;
2021-12-01 10:10:31 -08:00
if ( need_cands ) {
kfree ( cands . cands ) ;
mutex_unlock ( & cand_cache_mutex ) ;
if ( ctx - > log - > level & BPF_LOG_LEVEL2 )
print_cand_cache ( ctx - > log ) ;
}
return err ;
2021-12-01 10:10:28 -08:00
}