2018-10-18 15:16:25 +02:00
// SPDX-License-Identifier: GPL-2.0
/*
* queue_stack_maps . c : BPF queue and stack maps
*
* Copyright ( c ) 2018 Politecnico di Torino
*/
# include <linux/bpf.h>
# include <linux/list.h>
# include <linux/slab.h>
2018-11-22 10:49:56 -08:00
# include <linux/capability.h>
2018-10-18 15:16:25 +02:00
# include "percpu_freelist.h"
# define QUEUE_STACK_CREATE_FLAG_MASK \
bpf: add program side {rd, wr}only support for maps
This work adds two new map creation flags BPF_F_RDONLY_PROG
and BPF_F_WRONLY_PROG in order to allow for read-only or
write-only BPF maps from a BPF program side.
Today we have BPF_F_RDONLY and BPF_F_WRONLY, but this only
applies to system call side, meaning the BPF program has full
read/write access to the map as usual while bpf(2) calls with
map fd can either only read or write into the map depending
on the flags. BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG allows
for the exact opposite such that verifier is going to reject
program loads if write into a read-only map or a read into a
write-only map is detected. For read-only map case also some
helpers are forbidden for programs that would alter the map
state such as map deletion, update, etc. As opposed to the two
BPF_F_RDONLY / BPF_F_WRONLY flags, BPF_F_RDONLY_PROG as well
as BPF_F_WRONLY_PROG really do correspond to the map lifetime.
We've enabled this generic map extension to various non-special
maps holding normal user data: array, hash, lru, lpm, local
storage, queue and stack. Further generic map types could be
followed up in future depending on use-case. Main use case
here is to forbid writes into .rodata map values from verifier
side.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-04-09 23:20:05 +02:00
( BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK )
2018-10-18 15:16:25 +02:00
struct bpf_queue_stack {
struct bpf_map map ;
raw_spinlock_t lock ;
u32 head , tail ;
u32 size ; /* max_entries + 1 */
bpf, libbpf: Replace zero-length array with flexible-array
The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:
struct foo {
int stuff;
struct boo array[];
};
By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.
Also, notice that, dynamic memory allocations won't be affected by
this change:
"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]
sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.
This issue was found with the help of Coccinelle.
[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200507185057.GA13981@embeddedor
2020-05-07 13:50:57 -05:00
char elements [ ] __aligned ( 8 ) ;
2018-10-18 15:16:25 +02:00
} ;
static struct bpf_queue_stack * bpf_queue_stack ( struct bpf_map * map )
{
return container_of ( map , struct bpf_queue_stack , map ) ;
}
static bool queue_stack_map_is_empty ( struct bpf_queue_stack * qs )
{
return qs - > head = = qs - > tail ;
}
static bool queue_stack_map_is_full ( struct bpf_queue_stack * qs )
{
u32 head = qs - > head + 1 ;
if ( unlikely ( head > = qs - > size ) )
head = 0 ;
return head = = qs - > tail ;
}
/* Called from syscall */
static int queue_stack_map_alloc_check ( union bpf_attr * attr )
{
2020-05-13 16:03:54 -07:00
if ( ! bpf_capable ( ) )
2018-11-22 10:49:56 -08:00
return - EPERM ;
2018-10-18 15:16:25 +02:00
/* check sanity of attributes */
if ( attr - > max_entries = = 0 | | attr - > key_size ! = 0 | |
2018-11-22 10:49:56 -08:00
attr - > value_size = = 0 | |
bpf: add program side {rd, wr}only support for maps
This work adds two new map creation flags BPF_F_RDONLY_PROG
and BPF_F_WRONLY_PROG in order to allow for read-only or
write-only BPF maps from a BPF program side.
Today we have BPF_F_RDONLY and BPF_F_WRONLY, but this only
applies to system call side, meaning the BPF program has full
read/write access to the map as usual while bpf(2) calls with
map fd can either only read or write into the map depending
on the flags. BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG allows
for the exact opposite such that verifier is going to reject
program loads if write into a read-only map or a read into a
write-only map is detected. For read-only map case also some
helpers are forbidden for programs that would alter the map
state such as map deletion, update, etc. As opposed to the two
BPF_F_RDONLY / BPF_F_WRONLY flags, BPF_F_RDONLY_PROG as well
as BPF_F_WRONLY_PROG really do correspond to the map lifetime.
We've enabled this generic map extension to various non-special
maps holding normal user data: array, hash, lru, lpm, local
storage, queue and stack. Further generic map types could be
followed up in future depending on use-case. Main use case
here is to forbid writes into .rodata map values from verifier
side.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-04-09 23:20:05 +02:00
attr - > map_flags & ~ QUEUE_STACK_CREATE_FLAG_MASK | |
! bpf_map_flags_access_ok ( attr - > map_flags ) )
2018-10-18 15:16:25 +02:00
return - EINVAL ;
if ( attr - > value_size > KMALLOC_MAX_SIZE )
/* if value_size is bigger, the user space won't be able to
* access the elements .
*/
return - E2BIG ;
return 0 ;
}
static struct bpf_map * queue_stack_map_alloc ( union bpf_attr * attr )
{
int ret , numa_node = bpf_map_attr_numa_node ( attr ) ;
2019-05-29 18:03:58 -07:00
struct bpf_map_memory mem = { 0 } ;
2018-10-18 15:16:25 +02:00
struct bpf_queue_stack * qs ;
2018-11-22 10:49:56 -08:00
u64 size , queue_size , cost ;
2018-10-18 15:16:25 +02:00
2018-11-22 10:49:56 -08:00
size = ( u64 ) attr - > max_entries + 1 ;
cost = queue_size = sizeof ( * qs ) + size * attr - > value_size ;
2018-10-18 15:16:25 +02:00
2019-05-29 18:03:58 -07:00
ret = bpf_map_charge_init ( & mem , cost ) ;
2018-10-18 15:16:25 +02:00
if ( ret < 0 )
return ERR_PTR ( ret ) ;
qs = bpf_map_area_alloc ( queue_size , numa_node ) ;
2019-05-29 18:03:58 -07:00
if ( ! qs ) {
bpf_map_charge_finish ( & mem ) ;
2018-10-18 15:16:25 +02:00
return ERR_PTR ( - ENOMEM ) ;
2019-05-29 18:03:58 -07:00
}
2018-10-18 15:16:25 +02:00
memset ( qs , 0 , sizeof ( * qs ) ) ;
bpf_map_init_from_attr ( & qs - > map , attr ) ;
2019-05-29 18:03:58 -07:00
bpf_map_charge_move ( & qs - > map . memory , & mem ) ;
2018-10-18 15:16:25 +02:00
qs - > size = size ;
raw_spin_lock_init ( & qs - > lock ) ;
return & qs - > map ;
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void queue_stack_map_free ( struct bpf_map * map )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
bpf_map_area_free ( qs ) ;
}
static int __queue_map_get ( struct bpf_map * map , void * value , bool delete )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
unsigned long flags ;
int err = 0 ;
void * ptr ;
raw_spin_lock_irqsave ( & qs - > lock , flags ) ;
if ( queue_stack_map_is_empty ( qs ) ) {
2018-10-24 22:05:48 +02:00
memset ( value , 0 , qs - > map . value_size ) ;
2018-10-18 15:16:25 +02:00
err = - ENOENT ;
goto out ;
}
ptr = & qs - > elements [ qs - > tail * qs - > map . value_size ] ;
memcpy ( value , ptr , qs - > map . value_size ) ;
if ( delete ) {
if ( unlikely ( + + qs - > tail > = qs - > size ) )
qs - > tail = 0 ;
}
out :
raw_spin_unlock_irqrestore ( & qs - > lock , flags ) ;
return err ;
}
static int __stack_map_get ( struct bpf_map * map , void * value , bool delete )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
unsigned long flags ;
int err = 0 ;
void * ptr ;
u32 index ;
raw_spin_lock_irqsave ( & qs - > lock , flags ) ;
if ( queue_stack_map_is_empty ( qs ) ) {
2018-10-24 22:05:48 +02:00
memset ( value , 0 , qs - > map . value_size ) ;
2018-10-18 15:16:25 +02:00
err = - ENOENT ;
goto out ;
}
index = qs - > head - 1 ;
if ( unlikely ( index > = qs - > size ) )
index = qs - > size - 1 ;
ptr = & qs - > elements [ index * qs - > map . value_size ] ;
memcpy ( value , ptr , qs - > map . value_size ) ;
if ( delete )
qs - > head = index ;
out :
raw_spin_unlock_irqrestore ( & qs - > lock , flags ) ;
return err ;
}
/* Called from syscall or from eBPF program */
static int queue_map_peek_elem ( struct bpf_map * map , void * value )
{
return __queue_map_get ( map , value , false ) ;
}
/* Called from syscall or from eBPF program */
static int stack_map_peek_elem ( struct bpf_map * map , void * value )
{
return __stack_map_get ( map , value , false ) ;
}
/* Called from syscall or from eBPF program */
static int queue_map_pop_elem ( struct bpf_map * map , void * value )
{
return __queue_map_get ( map , value , true ) ;
}
/* Called from syscall or from eBPF program */
static int stack_map_pop_elem ( struct bpf_map * map , void * value )
{
return __stack_map_get ( map , value , true ) ;
}
/* Called from syscall or from eBPF program */
static int queue_stack_map_push_elem ( struct bpf_map * map , void * value ,
u64 flags )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
unsigned long irq_flags ;
int err = 0 ;
void * dst ;
/* BPF_EXIST is used to force making room for a new element in case the
* map is full
*/
bool replace = ( flags & BPF_EXIST ) ;
/* Check supported flags for queue and stack maps */
if ( flags & BPF_NOEXIST | | flags > BPF_EXIST )
return - EINVAL ;
raw_spin_lock_irqsave ( & qs - > lock , irq_flags ) ;
if ( queue_stack_map_is_full ( qs ) ) {
if ( ! replace ) {
err = - E2BIG ;
goto out ;
}
/* advance tail pointer to overwrite oldest element */
if ( unlikely ( + + qs - > tail > = qs - > size ) )
qs - > tail = 0 ;
}
dst = & qs - > elements [ qs - > head * qs - > map . value_size ] ;
memcpy ( dst , value , qs - > map . value_size ) ;
if ( unlikely ( + + qs - > head > = qs - > size ) )
qs - > head = 0 ;
out :
raw_spin_unlock_irqrestore ( & qs - > lock , irq_flags ) ;
return err ;
}
/* Called from syscall or from eBPF program */
static void * queue_stack_map_lookup_elem ( struct bpf_map * map , void * key )
{
return NULL ;
}
/* Called from syscall or from eBPF program */
static int queue_stack_map_update_elem ( struct bpf_map * map , void * key ,
void * value , u64 flags )
{
return - EINVAL ;
}
/* Called from syscall or from eBPF program */
static int queue_stack_map_delete_elem ( struct bpf_map * map , void * key )
{
return - EINVAL ;
}
/* Called from syscall */
static int queue_stack_map_get_next_key ( struct bpf_map * map , void * key ,
void * next_key )
{
return - EINVAL ;
}
2020-06-19 14:11:44 -07:00
static int queue_map_btf_id ;
2018-10-18 15:16:25 +02:00
const struct bpf_map_ops queue_map_ops = {
2020-08-27 18:18:06 -07:00
. map_meta_equal = bpf_map_meta_equal ,
2018-10-18 15:16:25 +02:00
. map_alloc_check = queue_stack_map_alloc_check ,
. map_alloc = queue_stack_map_alloc ,
. map_free = queue_stack_map_free ,
. map_lookup_elem = queue_stack_map_lookup_elem ,
. map_update_elem = queue_stack_map_update_elem ,
. map_delete_elem = queue_stack_map_delete_elem ,
. map_push_elem = queue_stack_map_push_elem ,
. map_pop_elem = queue_map_pop_elem ,
. map_peek_elem = queue_map_peek_elem ,
. map_get_next_key = queue_stack_map_get_next_key ,
2020-06-19 14:11:44 -07:00
. map_btf_name = " bpf_queue_stack " ,
. map_btf_id = & queue_map_btf_id ,
2018-10-18 15:16:25 +02:00
} ;
2020-06-19 14:11:44 -07:00
static int stack_map_btf_id ;
2018-10-18 15:16:25 +02:00
const struct bpf_map_ops stack_map_ops = {
2020-08-27 18:18:06 -07:00
. map_meta_equal = bpf_map_meta_equal ,
2018-10-18 15:16:25 +02:00
. map_alloc_check = queue_stack_map_alloc_check ,
. map_alloc = queue_stack_map_alloc ,
. map_free = queue_stack_map_free ,
. map_lookup_elem = queue_stack_map_lookup_elem ,
. map_update_elem = queue_stack_map_update_elem ,
. map_delete_elem = queue_stack_map_delete_elem ,
. map_push_elem = queue_stack_map_push_elem ,
. map_pop_elem = stack_map_pop_elem ,
. map_peek_elem = stack_map_peek_elem ,
. map_get_next_key = queue_stack_map_get_next_key ,
2020-06-19 14:11:44 -07:00
. map_btf_name = " bpf_queue_stack " ,
. map_btf_id = & stack_map_btf_id ,
2018-10-18 15:16:25 +02:00
} ;