2018-10-18 15:16:25 +02:00
// SPDX-License-Identifier: GPL-2.0
/*
* queue_stack_maps . c : BPF queue and stack maps
*
* Copyright ( c ) 2018 Politecnico di Torino
*/
# include <linux/bpf.h>
# include <linux/list.h>
# include <linux/slab.h>
2018-11-22 10:49:56 -08:00
# include <linux/capability.h>
2018-10-18 15:16:25 +02:00
# include "percpu_freelist.h"
# define QUEUE_STACK_CREATE_FLAG_MASK \
bpf: add program side {rd, wr}only support for maps
This work adds two new map creation flags BPF_F_RDONLY_PROG
and BPF_F_WRONLY_PROG in order to allow for read-only or
write-only BPF maps from a BPF program side.
Today we have BPF_F_RDONLY and BPF_F_WRONLY, but this only
applies to system call side, meaning the BPF program has full
read/write access to the map as usual while bpf(2) calls with
map fd can either only read or write into the map depending
on the flags. BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG allows
for the exact opposite such that verifier is going to reject
program loads if write into a read-only map or a read into a
write-only map is detected. For read-only map case also some
helpers are forbidden for programs that would alter the map
state such as map deletion, update, etc. As opposed to the two
BPF_F_RDONLY / BPF_F_WRONLY flags, BPF_F_RDONLY_PROG as well
as BPF_F_WRONLY_PROG really do correspond to the map lifetime.
We've enabled this generic map extension to various non-special
maps holding normal user data: array, hash, lru, lpm, local
storage, queue and stack. Further generic map types could be
followed up in future depending on use-case. Main use case
here is to forbid writes into .rodata map values from verifier
side.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-04-09 23:20:05 +02:00
( BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK )
2018-10-18 15:16:25 +02:00
struct bpf_queue_stack {
struct bpf_map map ;
raw_spinlock_t lock ;
u32 head , tail ;
u32 size ; /* max_entries + 1 */
char elements [ 0 ] __aligned ( 8 ) ;
} ;
static struct bpf_queue_stack * bpf_queue_stack ( struct bpf_map * map )
{
return container_of ( map , struct bpf_queue_stack , map ) ;
}
static bool queue_stack_map_is_empty ( struct bpf_queue_stack * qs )
{
return qs - > head = = qs - > tail ;
}
static bool queue_stack_map_is_full ( struct bpf_queue_stack * qs )
{
u32 head = qs - > head + 1 ;
if ( unlikely ( head > = qs - > size ) )
head = 0 ;
return head = = qs - > tail ;
}
/* Called from syscall */
static int queue_stack_map_alloc_check ( union bpf_attr * attr )
{
2018-11-22 10:49:56 -08:00
if ( ! capable ( CAP_SYS_ADMIN ) )
return - EPERM ;
2018-10-18 15:16:25 +02:00
/* check sanity of attributes */
if ( attr - > max_entries = = 0 | | attr - > key_size ! = 0 | |
2018-11-22 10:49:56 -08:00
attr - > value_size = = 0 | |
bpf: add program side {rd, wr}only support for maps
This work adds two new map creation flags BPF_F_RDONLY_PROG
and BPF_F_WRONLY_PROG in order to allow for read-only or
write-only BPF maps from a BPF program side.
Today we have BPF_F_RDONLY and BPF_F_WRONLY, but this only
applies to system call side, meaning the BPF program has full
read/write access to the map as usual while bpf(2) calls with
map fd can either only read or write into the map depending
on the flags. BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG allows
for the exact opposite such that verifier is going to reject
program loads if write into a read-only map or a read into a
write-only map is detected. For read-only map case also some
helpers are forbidden for programs that would alter the map
state such as map deletion, update, etc. As opposed to the two
BPF_F_RDONLY / BPF_F_WRONLY flags, BPF_F_RDONLY_PROG as well
as BPF_F_WRONLY_PROG really do correspond to the map lifetime.
We've enabled this generic map extension to various non-special
maps holding normal user data: array, hash, lru, lpm, local
storage, queue and stack. Further generic map types could be
followed up in future depending on use-case. Main use case
here is to forbid writes into .rodata map values from verifier
side.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
2019-04-09 23:20:05 +02:00
attr - > map_flags & ~ QUEUE_STACK_CREATE_FLAG_MASK | |
! bpf_map_flags_access_ok ( attr - > map_flags ) )
2018-10-18 15:16:25 +02:00
return - EINVAL ;
if ( attr - > value_size > KMALLOC_MAX_SIZE )
/* if value_size is bigger, the user space won't be able to
* access the elements .
*/
return - E2BIG ;
return 0 ;
}
static struct bpf_map * queue_stack_map_alloc ( union bpf_attr * attr )
{
int ret , numa_node = bpf_map_attr_numa_node ( attr ) ;
2019-05-29 18:03:58 -07:00
struct bpf_map_memory mem = { 0 } ;
2018-10-18 15:16:25 +02:00
struct bpf_queue_stack * qs ;
2018-11-22 10:49:56 -08:00
u64 size , queue_size , cost ;
2018-10-18 15:16:25 +02:00
2018-11-22 10:49:56 -08:00
size = ( u64 ) attr - > max_entries + 1 ;
cost = queue_size = sizeof ( * qs ) + size * attr - > value_size ;
2018-10-18 15:16:25 +02:00
2019-05-29 18:03:58 -07:00
ret = bpf_map_charge_init ( & mem , cost ) ;
2018-10-18 15:16:25 +02:00
if ( ret < 0 )
return ERR_PTR ( ret ) ;
qs = bpf_map_area_alloc ( queue_size , numa_node ) ;
2019-05-29 18:03:58 -07:00
if ( ! qs ) {
bpf_map_charge_finish ( & mem ) ;
2018-10-18 15:16:25 +02:00
return ERR_PTR ( - ENOMEM ) ;
2019-05-29 18:03:58 -07:00
}
2018-10-18 15:16:25 +02:00
memset ( qs , 0 , sizeof ( * qs ) ) ;
bpf_map_init_from_attr ( & qs - > map , attr ) ;
2019-05-29 18:03:58 -07:00
bpf_map_charge_move ( & qs - > map . memory , & mem ) ;
2018-10-18 15:16:25 +02:00
qs - > size = size ;
raw_spin_lock_init ( & qs - > lock ) ;
return & qs - > map ;
}
/* Called when map->refcnt goes to zero, either from workqueue or from syscall */
static void queue_stack_map_free ( struct bpf_map * map )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
* so the programs ( can be more than one that used this map ) were
* disconnected from events . Wait for outstanding critical sections in
* these programs to complete
*/
synchronize_rcu ( ) ;
bpf_map_area_free ( qs ) ;
}
static int __queue_map_get ( struct bpf_map * map , void * value , bool delete )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
unsigned long flags ;
int err = 0 ;
void * ptr ;
raw_spin_lock_irqsave ( & qs - > lock , flags ) ;
if ( queue_stack_map_is_empty ( qs ) ) {
2018-10-24 22:05:48 +02:00
memset ( value , 0 , qs - > map . value_size ) ;
2018-10-18 15:16:25 +02:00
err = - ENOENT ;
goto out ;
}
ptr = & qs - > elements [ qs - > tail * qs - > map . value_size ] ;
memcpy ( value , ptr , qs - > map . value_size ) ;
if ( delete ) {
if ( unlikely ( + + qs - > tail > = qs - > size ) )
qs - > tail = 0 ;
}
out :
raw_spin_unlock_irqrestore ( & qs - > lock , flags ) ;
return err ;
}
static int __stack_map_get ( struct bpf_map * map , void * value , bool delete )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
unsigned long flags ;
int err = 0 ;
void * ptr ;
u32 index ;
raw_spin_lock_irqsave ( & qs - > lock , flags ) ;
if ( queue_stack_map_is_empty ( qs ) ) {
2018-10-24 22:05:48 +02:00
memset ( value , 0 , qs - > map . value_size ) ;
2018-10-18 15:16:25 +02:00
err = - ENOENT ;
goto out ;
}
index = qs - > head - 1 ;
if ( unlikely ( index > = qs - > size ) )
index = qs - > size - 1 ;
ptr = & qs - > elements [ index * qs - > map . value_size ] ;
memcpy ( value , ptr , qs - > map . value_size ) ;
if ( delete )
qs - > head = index ;
out :
raw_spin_unlock_irqrestore ( & qs - > lock , flags ) ;
return err ;
}
/* Called from syscall or from eBPF program */
static int queue_map_peek_elem ( struct bpf_map * map , void * value )
{
return __queue_map_get ( map , value , false ) ;
}
/* Called from syscall or from eBPF program */
static int stack_map_peek_elem ( struct bpf_map * map , void * value )
{
return __stack_map_get ( map , value , false ) ;
}
/* Called from syscall or from eBPF program */
static int queue_map_pop_elem ( struct bpf_map * map , void * value )
{
return __queue_map_get ( map , value , true ) ;
}
/* Called from syscall or from eBPF program */
static int stack_map_pop_elem ( struct bpf_map * map , void * value )
{
return __stack_map_get ( map , value , true ) ;
}
/* Called from syscall or from eBPF program */
static int queue_stack_map_push_elem ( struct bpf_map * map , void * value ,
u64 flags )
{
struct bpf_queue_stack * qs = bpf_queue_stack ( map ) ;
unsigned long irq_flags ;
int err = 0 ;
void * dst ;
/* BPF_EXIST is used to force making room for a new element in case the
* map is full
*/
bool replace = ( flags & BPF_EXIST ) ;
/* Check supported flags for queue and stack maps */
if ( flags & BPF_NOEXIST | | flags > BPF_EXIST )
return - EINVAL ;
raw_spin_lock_irqsave ( & qs - > lock , irq_flags ) ;
if ( queue_stack_map_is_full ( qs ) ) {
if ( ! replace ) {
err = - E2BIG ;
goto out ;
}
/* advance tail pointer to overwrite oldest element */
if ( unlikely ( + + qs - > tail > = qs - > size ) )
qs - > tail = 0 ;
}
dst = & qs - > elements [ qs - > head * qs - > map . value_size ] ;
memcpy ( dst , value , qs - > map . value_size ) ;
if ( unlikely ( + + qs - > head > = qs - > size ) )
qs - > head = 0 ;
out :
raw_spin_unlock_irqrestore ( & qs - > lock , irq_flags ) ;
return err ;
}
/* Called from syscall or from eBPF program */
static void * queue_stack_map_lookup_elem ( struct bpf_map * map , void * key )
{
return NULL ;
}
/* Called from syscall or from eBPF program */
static int queue_stack_map_update_elem ( struct bpf_map * map , void * key ,
void * value , u64 flags )
{
return - EINVAL ;
}
/* Called from syscall or from eBPF program */
static int queue_stack_map_delete_elem ( struct bpf_map * map , void * key )
{
return - EINVAL ;
}
/* Called from syscall */
static int queue_stack_map_get_next_key ( struct bpf_map * map , void * key ,
void * next_key )
{
return - EINVAL ;
}
const struct bpf_map_ops queue_map_ops = {
. map_alloc_check = queue_stack_map_alloc_check ,
. map_alloc = queue_stack_map_alloc ,
. map_free = queue_stack_map_free ,
. map_lookup_elem = queue_stack_map_lookup_elem ,
. map_update_elem = queue_stack_map_update_elem ,
. map_delete_elem = queue_stack_map_delete_elem ,
. map_push_elem = queue_stack_map_push_elem ,
. map_pop_elem = queue_map_pop_elem ,
. map_peek_elem = queue_map_peek_elem ,
. map_get_next_key = queue_stack_map_get_next_key ,
} ;
const struct bpf_map_ops stack_map_ops = {
. map_alloc_check = queue_stack_map_alloc_check ,
. map_alloc = queue_stack_map_alloc ,
. map_free = queue_stack_map_free ,
. map_lookup_elem = queue_stack_map_lookup_elem ,
. map_update_elem = queue_stack_map_update_elem ,
. map_delete_elem = queue_stack_map_delete_elem ,
. map_push_elem = queue_stack_map_push_elem ,
. map_pop_elem = stack_map_pop_elem ,
. map_peek_elem = stack_map_peek_elem ,
. map_get_next_key = queue_stack_map_get_next_key ,
} ;