2018-10-06 02:40:00 +03:00
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
2018-01-30 23:55:03 +03:00
2015-07-01 05:14:03 +03:00
/*
* common eBPF ELF operations .
*
* Copyright ( C ) 2013 - 2015 Alexei Starovoitov < ast @ kernel . org >
* Copyright ( C ) 2015 Wang Nan < wangnan0 @ huawei . com >
* Copyright ( C ) 2015 Huawei Inc .
2016-07-04 14:02:42 +03:00
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation ;
* version 2.1 of the License ( not later ! )
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU Lesser General Public License for more details .
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program ; if not , see < http : //www.gnu.org/licenses>
2015-07-01 05:14:03 +03:00
*/
2018-10-04 01:26:42 +03:00
# ifndef __LIBBPF_BPF_H
# define __LIBBPF_BPF_H
2015-07-01 05:14:03 +03:00
# include <linux/bpf.h>
2018-04-20 11:05:16 +03:00
# include <stdbool.h>
2017-02-07 23:56:05 +03:00
# include <stddef.h>
2015-07-01 05:14:03 +03:00
2018-11-21 20:29:44 +03:00
# ifdef __cplusplus
extern " C " {
# endif
2018-10-16 08:50:34 +03:00
# ifndef LIBBPF_API
# define LIBBPF_API __attribute__((visibility("default")))
# endif
2018-04-19 01:56:05 +03:00
struct bpf_create_map_attr {
const char * name ;
enum bpf_map_type map_type ;
__u32 map_flags ;
__u32 key_size ;
__u32 value_size ;
__u32 max_entries ;
__u32 numa_node ;
__u32 btf_fd ;
2018-05-23 01:04:24 +03:00
__u32 btf_key_type_id ;
__u32 btf_value_type_id ;
2018-05-17 00:02:49 +03:00
__u32 map_ifindex ;
2018-08-08 11:01:31 +03:00
__u32 inner_map_fd ;
2018-04-19 01:56:05 +03:00
} ;
2018-10-16 08:50:34 +03:00
LIBBPF_API int
bpf_create_map_xattr ( const struct bpf_create_map_attr * create_attr ) ;
LIBBPF_API int bpf_create_map_node ( enum bpf_map_type map_type , const char * name ,
int key_size , int value_size ,
int max_entries , __u32 map_flags , int node ) ;
LIBBPF_API int bpf_create_map_name ( enum bpf_map_type map_type , const char * name ,
int key_size , int value_size ,
int max_entries , __u32 map_flags ) ;
LIBBPF_API int bpf_create_map ( enum bpf_map_type map_type , int key_size ,
int value_size , int max_entries , __u32 map_flags ) ;
LIBBPF_API int bpf_create_map_in_map_node ( enum bpf_map_type map_type ,
const char * name , int key_size ,
int inner_map_fd , int max_entries ,
__u32 map_flags , int node ) ;
LIBBPF_API int bpf_create_map_in_map ( enum bpf_map_type map_type ,
const char * name , int key_size ,
int inner_map_fd , int max_entries ,
__u32 map_flags ) ;
2015-07-01 05:14:03 +03:00
2018-03-31 01:08:01 +03:00
struct bpf_load_program_attr {
enum bpf_prog_type prog_type ;
enum bpf_attach_type expected_attach_type ;
const char * name ;
const struct bpf_insn * insns ;
size_t insns_cnt ;
const char * license ;
__u32 kern_version ;
2018-05-17 00:02:49 +03:00
__u32 prog_ifindex ;
2018-11-20 02:29:14 +03:00
__u32 prog_btf_fd ;
__u32 func_info_rec_size ;
const void * func_info ;
__u32 func_info_cnt ;
2018-12-08 03:42:31 +03:00
__u32 line_info_rec_size ;
const void * line_info ;
__u32 line_info_cnt ;
tools/bpf: add log_level to bpf_load_program_attr
The kernel verifier has three levels of logs:
0: no logs
1: logs mostly useful
> 1: verbose
Current libbpf API functions bpf_load_program_xattr() and
bpf_load_program() cannot specify log_level.
The bcc, however, provides an interface for user to
specify log_level 2 for verbose output.
This patch added log_level into structure
bpf_load_program_attr, so users, including bcc, can use
bpf_load_program_xattr() to change log_level. The
supported log_level is 0, 1, and 2.
The bpf selftest test_sock.c is modified to enable log_level = 2.
If the "verbose" in test_sock.c is changed to true,
the test will output logs like below:
$ ./test_sock
func#0 @0
0: R1=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
0: (bf) r6 = r1
1: R1=ctx(id=0,off=0,imm=0) R6_w=ctx(id=0,off=0,imm=0) R10=fp0,call_-1
1: (61) r7 = *(u32 *)(r6 +28)
invalid bpf_context access off=28 size=4
Test case: bind4 load with invalid access: src_ip6 .. [PASS]
...
Test case: bind6 allow all .. [PASS]
Summary: 16 PASSED, 0 FAILED
Some test_sock tests are negative tests and verbose verifier
log will be printed out as shown in the above.
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-02-07 20:34:51 +03:00
__u32 log_level ;
2018-03-31 01:08:01 +03:00
} ;
2018-10-15 21:19:55 +03:00
/* Flags to direct loading requirements */
# define MAPS_RELAX_COMPAT 0x01
2015-07-01 05:14:06 +03:00
/* Recommend log buffer size */
2019-04-02 07:27:47 +03:00
# define BPF_LOG_BUF_SIZE (16 * 1024 * 1024) /* verifier maximum in kernels <= 5.1 */
2018-10-16 08:50:34 +03:00
LIBBPF_API int
bpf_load_program_xattr ( const struct bpf_load_program_attr * load_attr ,
char * log_buf , size_t log_buf_sz ) ;
LIBBPF_API int bpf_load_program ( enum bpf_prog_type type ,
const struct bpf_insn * insns , size_t insns_cnt ,
const char * license , __u32 kern_version ,
char * log_buf , size_t log_buf_sz ) ;
LIBBPF_API int bpf_verify_program ( enum bpf_prog_type type ,
const struct bpf_insn * insns ,
2018-12-01 08:08:14 +03:00
size_t insns_cnt , __u32 prog_flags ,
2018-10-16 08:50:34 +03:00
const char * license , __u32 kern_version ,
char * log_buf , size_t log_buf_sz ,
int log_level ) ;
2015-07-01 05:14:06 +03:00
2018-10-16 08:50:34 +03:00
LIBBPF_API int bpf_map_update_elem ( int fd , const void * key , const void * value ,
__u64 flags ) ;
2016-11-26 10:03:25 +03:00
2018-10-16 08:50:34 +03:00
LIBBPF_API int bpf_map_lookup_elem ( int fd , const void * key , void * value ) ;
2019-02-01 02:40:11 +03:00
LIBBPF_API int bpf_map_lookup_elem_flags ( int fd , const void * key , void * value ,
__u64 flags ) ;
2018-10-18 16:16:41 +03:00
LIBBPF_API int bpf_map_lookup_and_delete_elem ( int fd , const void * key ,
void * value ) ;
2018-10-16 08:50:34 +03:00
LIBBPF_API int bpf_map_delete_elem ( int fd , const void * key ) ;
LIBBPF_API int bpf_map_get_next_key ( int fd , const void * key , void * next_key ) ;
bpf, libbpf: support global data/bss/rodata sections
This work adds BPF loader support for global data sections
to libbpf. This allows to write BPF programs in more natural
C-like way by being able to define global variables and const
data.
Back at LPC 2018 [0] we presented a first prototype which
implemented support for global data sections by extending BPF
syscall where union bpf_attr would get additional memory/size
pair for each section passed during prog load in order to later
add this base address into the ldimm64 instruction along with
the user provided offset when accessing a variable. Consensus
from LPC was that for proper upstream support, it would be
more desirable to use maps instead of bpf_attr extension as
this would allow for introspection of these sections as well
as potential live updates of their content. This work follows
this path by taking the following steps from loader side:
1) In bpf_object__elf_collect() step we pick up ".data",
".rodata", and ".bss" section information.
2) If present, in bpf_object__init_internal_map() we add
maps to the obj's map array that corresponds to each
of the present sections. Given section size and access
properties can differ, a single entry array map is
created with value size that is corresponding to the
ELF section size of .data, .bss or .rodata. These
internal maps are integrated into the normal map
handling of libbpf such that when user traverses all
obj maps, they can be differentiated from user-created
ones via bpf_map__is_internal(). In later steps when
we actually create these maps in the kernel via
bpf_object__create_maps(), then for .data and .rodata
sections their content is copied into the map through
bpf_map_update_elem(). For .bss this is not necessary
since array map is already zero-initialized by default.
Additionally, for .rodata the map is frozen as read-only
after setup, such that neither from program nor syscall
side writes would be possible.
3) In bpf_program__collect_reloc() step, we record the
corresponding map, insn index, and relocation type for
the global data.
4) And last but not least in the actual relocation step in
bpf_program__relocate(), we mark the ldimm64 instruction
with src_reg = BPF_PSEUDO_MAP_VALUE where in the first
imm field the map's file descriptor is stored as similarly
done as in BPF_PSEUDO_MAP_FD, and in the second imm field
(as ldimm64 is 2-insn wide) we store the access offset
into the section. Given these maps have only single element
ldimm64's off remains zero in both parts.
5) On kernel side, this special marked BPF_PSEUDO_MAP_VALUE
load will then store the actual target address in order
to have a 'map-lookup'-free access. That is, the actual
map value base address + offset. The destination register
in the verifier will then be marked as PTR_TO_MAP_VALUE,
containing the fixed offset as reg->off and backing BPF
map as reg->map_ptr. Meaning, it's treated as any other
normal map value from verification side, only with
efficient, direct value access instead of actual call to
map lookup helper as in the typical case.
Currently, only support for static global variables has been
added, and libbpf rejects non-static global variables from
loading. This can be lifted until we have proper semantics
for how BPF will treat multi-object BPF loads. From BTF side,
libbpf will set the value type id of the types corresponding
to the ".bss", ".data" and ".rodata" names which LLVM will
emit without the object name prefix. The key type will be
left as zero, thus making use of the key-less BTF option in
array maps.
Simple example dump of program using globals vars in each
section:
# bpftool prog
[...]
6784: sched_cls name load_static_dat tag a7e1291567277844 gpl
loaded_at 2019-03-11T15:39:34+0000 uid 0
xlated 1776B jited 993B memlock 4096B map_ids 2238,2237,2235,2236,2239,2240
# bpftool map show id 2237
2237: array name test_glo.bss flags 0x0
key 4B value 64B max_entries 1 memlock 4096B
# bpftool map show id 2235
2235: array name test_glo.data flags 0x0
key 4B value 64B max_entries 1 memlock 4096B
# bpftool map show id 2236
2236: array name test_glo.rodata flags 0x80
key 4B value 96B max_entries 1 memlock 4096B
# bpftool prog dump xlated id 6784
int load_static_data(struct __sk_buff * skb):
; int load_static_data(struct __sk_buff *skb)
0: (b7) r6 = 0
; test_reloc(number, 0, &num0);
1: (63) *(u32 *)(r10 -4) = r6
2: (bf) r2 = r10
; int load_static_data(struct __sk_buff *skb)
3: (07) r2 += -4
; test_reloc(number, 0, &num0);
4: (18) r1 = map[id:2238]
6: (18) r3 = map[id:2237][0]+0 <-- direct addr in .bss area
8: (b7) r4 = 0
9: (85) call array_map_update_elem#100464
10: (b7) r1 = 1
; test_reloc(number, 1, &num1);
[...]
; test_reloc(string, 2, str2);
120: (18) r8 = map[id:2237][0]+16 <-- same here at offset +16
122: (18) r1 = map[id:2239]
124: (18) r3 = map[id:2237][0]+16
126: (b7) r4 = 0
127: (85) call array_map_update_elem#100464
128: (b7) r1 = 120
; str1[5] = 'x';
129: (73) *(u8 *)(r9 +5) = r1
; test_reloc(string, 3, str1);
130: (b7) r1 = 3
131: (63) *(u32 *)(r10 -4) = r1
132: (b7) r9 = 3
133: (bf) r2 = r10
; int load_static_data(struct __sk_buff *skb)
134: (07) r2 += -4
; test_reloc(string, 3, str1);
135: (18) r1 = map[id:2239]
137: (18) r3 = map[id:2235][0]+16 <-- direct addr in .data area
139: (b7) r4 = 0
140: (85) call array_map_update_elem#100464
141: (b7) r1 = 111
; __builtin_memcpy(&str2[2], "hello", sizeof("hello"));
142: (73) *(u8 *)(r8 +6) = r1 <-- further access based on .bss data
143: (b7) r1 = 108
144: (73) *(u8 *)(r8 +5) = r1
[...]
For Cilium use-case in particular, this enables migrating configuration
constants from Cilium daemon's generated header defines into global
data sections such that expensive runtime recompilations with LLVM can
be avoided altogether. Instead, the ELF file becomes effectively a
"template", meaning, it is compiled only once (!) and the Cilium daemon
will then rewrite relevant configuration data from the ELF's .data or
.rodata sections directly instead of recompiling the program. The
updated ELF is then loaded into the kernel and atomically replaces
the existing program in the networking datapath. More info in [0].
Based upon recent fix in LLVM, commit c0db6b6bd444 ("[BPF] Don't fail
for static variables").
[0] LPC 2018, BPF track, "ELF relocation for static data in BPF",
http://vger.kernel.org/lpc-bpf2018.html#session-3
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-04-10 00:20:13 +03:00
LIBBPF_API int bpf_map_freeze ( int fd ) ;
2018-10-16 08:50:34 +03:00
LIBBPF_API int bpf_obj_pin ( int fd , const char * pathname ) ;
LIBBPF_API int bpf_obj_get ( const char * pathname ) ;
LIBBPF_API int bpf_prog_attach ( int prog_fd , int attachable_fd ,
enum bpf_attach_type type , unsigned int flags ) ;
LIBBPF_API int bpf_prog_detach ( int attachable_fd , enum bpf_attach_type type ) ;
LIBBPF_API int bpf_prog_detach2 ( int prog_fd , int attachable_fd ,
enum bpf_attach_type type ) ;
2018-12-03 14:31:25 +03:00
struct bpf_prog_test_run_attr {
int prog_fd ;
int repeat ;
const void * data_in ;
__u32 data_size_in ;
void * data_out ; /* optional */
__u32 data_size_out ; /* in: max length of data_out
* out : length of data_out */
__u32 retval ; /* out: return code of the BPF program */
__u32 duration ; /* out: average per repetition in ns */
} ;
LIBBPF_API int bpf_prog_test_run_xattr ( struct bpf_prog_test_run_attr * test_attr ) ;
/*
* bpf_prog_test_run does not check that data_out is large enough . Consider
* using bpf_prog_test_run_xattr instead .
*/
2018-10-16 08:50:34 +03:00
LIBBPF_API int bpf_prog_test_run ( int prog_fd , int repeat , void * data ,
__u32 size , void * data_out , __u32 * size_out ,
__u32 * retval , __u32 * duration ) ;
LIBBPF_API int bpf_prog_get_next_id ( __u32 start_id , __u32 * next_id ) ;
LIBBPF_API int bpf_map_get_next_id ( __u32 start_id , __u32 * next_id ) ;
LIBBPF_API int bpf_prog_get_fd_by_id ( __u32 id ) ;
LIBBPF_API int bpf_map_get_fd_by_id ( __u32 id ) ;
LIBBPF_API int bpf_btf_get_fd_by_id ( __u32 id ) ;
LIBBPF_API int bpf_obj_get_info_by_fd ( int prog_fd , void * info , __u32 * info_len ) ;
LIBBPF_API int bpf_prog_query ( int target_fd , enum bpf_attach_type type ,
__u32 query_flags , __u32 * attach_flags ,
__u32 * prog_ids , __u32 * prog_cnt ) ;
LIBBPF_API int bpf_raw_tracepoint_open ( const char * name , int prog_fd ) ;
LIBBPF_API int bpf_load_btf ( void * btf , __u32 btf_size , char * log_buf ,
__u32 log_buf_size , bool do_log ) ;
LIBBPF_API int bpf_task_fd_query ( int pid , int fd , __u32 flags , char * buf ,
__u32 * buf_len , __u32 * prog_id , __u32 * fd_type ,
__u64 * probe_offset , __u64 * probe_addr ) ;
2018-11-21 20:29:44 +03:00
# ifdef __cplusplus
} /* extern "C" */
# endif
2018-10-04 01:26:42 +03:00
# endif /* __LIBBPF_BPF_H */