2017-03-31 07:45:38 +03:00
/* Copyright (c) 2017 Facebook
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation .
*/
# include <linux/bpf.h>
# include <linux/slab.h>
# include <linux/vmalloc.h>
# include <linux/etherdevice.h>
# include <linux/filter.h>
# include <linux/sched/signal.h>
2018-10-19 19:57:58 +03:00
# include <net/sock.h>
# include <net/tcp.h>
2017-03-31 07:45:38 +03:00
2019-02-13 02:42:38 +03:00
static int bpf_test_run ( struct bpf_prog * prog , void * ctx , u32 repeat ,
u32 * retval , u32 * time )
2017-03-31 07:45:38 +03:00
{
2018-09-28 17:45:36 +03:00
struct bpf_cgroup_storage * storage [ MAX_BPF_CGROUP_STORAGE_TYPE ] = { 0 } ;
enum bpf_cgroup_storage_type stype ;
2017-03-31 07:45:38 +03:00
u64 time_start , time_spent = 0 ;
2019-02-13 02:42:38 +03:00
int ret = 0 ;
2018-12-01 21:39:44 +03:00
u32 i ;
2017-03-31 07:45:38 +03:00
2018-09-28 17:45:36 +03:00
for_each_cgroup_storage_type ( stype ) {
storage [ stype ] = bpf_cgroup_storage_alloc ( prog , stype ) ;
if ( IS_ERR ( storage [ stype ] ) ) {
storage [ stype ] = NULL ;
for_each_cgroup_storage_type ( stype )
bpf_cgroup_storage_free ( storage [ stype ] ) ;
return - ENOMEM ;
}
}
2018-08-03 00:27:27 +03:00
2017-03-31 07:45:38 +03:00
if ( ! repeat )
repeat = 1 ;
2019-02-13 02:42:38 +03:00
rcu_read_lock ( ) ;
preempt_disable ( ) ;
2017-03-31 07:45:38 +03:00
time_start = ktime_get_ns ( ) ;
for ( i = 0 ; i < repeat ; i + + ) {
2019-02-13 02:42:38 +03:00
bpf_cgroup_storage_set ( storage ) ;
* retval = BPF_PROG_RUN ( prog , ctx ) ;
if ( signal_pending ( current ) ) {
ret = - EINTR ;
break ;
}
2017-03-31 07:45:38 +03:00
if ( need_resched ( ) ) {
time_spent + = ktime_get_ns ( ) - time_start ;
2019-02-13 02:42:38 +03:00
preempt_enable ( ) ;
rcu_read_unlock ( ) ;
2017-03-31 07:45:38 +03:00
cond_resched ( ) ;
2019-02-13 02:42:38 +03:00
rcu_read_lock ( ) ;
preempt_disable ( ) ;
2017-03-31 07:45:38 +03:00
time_start = ktime_get_ns ( ) ;
}
}
time_spent + = ktime_get_ns ( ) - time_start ;
2019-02-13 02:42:38 +03:00
preempt_enable ( ) ;
rcu_read_unlock ( ) ;
2017-03-31 07:45:38 +03:00
do_div ( time_spent , repeat ) ;
* time = time_spent > U32_MAX ? U32_MAX : ( u32 ) time_spent ;
2018-09-28 17:45:36 +03:00
for_each_cgroup_storage_type ( stype )
bpf_cgroup_storage_free ( storage [ stype ] ) ;
2018-08-03 00:27:27 +03:00
2019-02-13 02:42:38 +03:00
return ret ;
2017-03-31 07:45:38 +03:00
}
2017-05-02 18:36:33 +03:00
static int bpf_test_finish ( const union bpf_attr * kattr ,
union bpf_attr __user * uattr , const void * data ,
2017-03-31 07:45:38 +03:00
u32 size , u32 retval , u32 duration )
{
2017-05-02 18:36:33 +03:00
void __user * data_out = u64_to_user_ptr ( kattr - > test . data_out ) ;
2017-03-31 07:45:38 +03:00
int err = - EFAULT ;
2018-12-03 14:31:23 +03:00
u32 copy_size = size ;
2017-03-31 07:45:38 +03:00
2018-12-03 14:31:23 +03:00
/* Clamp copy if the user has provided a size hint, but copy the full
* buffer if not to retain old behaviour .
*/
if ( kattr - > test . data_size_out & &
copy_size > kattr - > test . data_size_out ) {
copy_size = kattr - > test . data_size_out ;
err = - ENOSPC ;
}
if ( data_out & & copy_to_user ( data_out , data , copy_size ) )
2017-03-31 07:45:38 +03:00
goto out ;
if ( copy_to_user ( & uattr - > test . data_size_out , & size , sizeof ( size ) ) )
goto out ;
if ( copy_to_user ( & uattr - > test . retval , & retval , sizeof ( retval ) ) )
goto out ;
if ( copy_to_user ( & uattr - > test . duration , & duration , sizeof ( duration ) ) )
goto out ;
2018-12-03 14:31:23 +03:00
if ( err ! = - ENOSPC )
err = 0 ;
2017-03-31 07:45:38 +03:00
out :
return err ;
}
static void * bpf_test_init ( const union bpf_attr * kattr , u32 size ,
u32 headroom , u32 tailroom )
{
void __user * data_in = u64_to_user_ptr ( kattr - > test . data_in ) ;
void * data ;
if ( size < ETH_HLEN | | size > PAGE_SIZE - headroom - tailroom )
return ERR_PTR ( - EINVAL ) ;
data = kzalloc ( size + headroom + tailroom , GFP_USER ) ;
if ( ! data )
return ERR_PTR ( - ENOMEM ) ;
if ( copy_from_user ( data + headroom , data_in , size ) ) {
kfree ( data ) ;
return ERR_PTR ( - EFAULT ) ;
}
return data ;
}
int bpf_prog_test_run_skb ( struct bpf_prog * prog , const union bpf_attr * kattr ,
union bpf_attr __user * uattr )
{
bool is_l2 = false , is_direct_pkt_access = false ;
u32 size = kattr - > test . data_size_in ;
u32 repeat = kattr - > test . repeat ;
u32 retval , duration ;
2018-07-11 16:30:14 +03:00
int hh_len = ETH_HLEN ;
2017-03-31 07:45:38 +03:00
struct sk_buff * skb ;
2018-10-19 19:57:58 +03:00
struct sock * sk ;
2017-03-31 07:45:38 +03:00
void * data ;
int ret ;
2017-05-02 18:36:45 +03:00
data = bpf_test_init ( kattr , size , NET_SKB_PAD + NET_IP_ALIGN ,
2017-03-31 07:45:38 +03:00
SKB_DATA_ALIGN ( sizeof ( struct skb_shared_info ) ) ) ;
if ( IS_ERR ( data ) )
return PTR_ERR ( data ) ;
switch ( prog - > type ) {
case BPF_PROG_TYPE_SCHED_CLS :
case BPF_PROG_TYPE_SCHED_ACT :
is_l2 = true ;
/* fall through */
case BPF_PROG_TYPE_LWT_IN :
case BPF_PROG_TYPE_LWT_OUT :
case BPF_PROG_TYPE_LWT_XMIT :
is_direct_pkt_access = true ;
break ;
default :
break ;
}
2018-10-19 19:57:58 +03:00
sk = kzalloc ( sizeof ( struct sock ) , GFP_USER ) ;
if ( ! sk ) {
kfree ( data ) ;
return - ENOMEM ;
}
sock_net_set ( sk , current - > nsproxy - > net_ns ) ;
sock_init_data ( NULL , sk ) ;
2017-03-31 07:45:38 +03:00
skb = build_skb ( data , 0 ) ;
if ( ! skb ) {
kfree ( data ) ;
2018-10-19 19:57:58 +03:00
kfree ( sk ) ;
2017-03-31 07:45:38 +03:00
return - ENOMEM ;
}
2018-10-19 19:57:58 +03:00
skb - > sk = sk ;
2017-03-31 07:45:38 +03:00
2017-05-02 18:36:45 +03:00
skb_reserve ( skb , NET_SKB_PAD + NET_IP_ALIGN ) ;
2017-03-31 07:45:38 +03:00
__skb_put ( skb , size ) ;
skb - > protocol = eth_type_trans ( skb , current - > nsproxy - > net_ns - > loopback_dev ) ;
skb_reset_network_header ( skb ) ;
if ( is_l2 )
2018-07-11 16:30:14 +03:00
__skb_push ( skb , hh_len ) ;
2017-03-31 07:45:38 +03:00
if ( is_direct_pkt_access )
2017-09-25 03:25:50 +03:00
bpf_compute_data_pointers ( skb ) ;
2018-12-01 21:39:44 +03:00
ret = bpf_test_run ( prog , skb , repeat , & retval , & duration ) ;
if ( ret ) {
kfree_skb ( skb ) ;
kfree ( sk ) ;
return ret ;
}
2018-07-11 16:30:14 +03:00
if ( ! is_l2 ) {
if ( skb_headroom ( skb ) < hh_len ) {
int nhead = HH_DATA_ALIGN ( hh_len - skb_headroom ( skb ) ) ;
if ( pskb_expand_head ( skb , nhead , 0 , GFP_USER ) ) {
kfree_skb ( skb ) ;
2018-10-19 19:57:58 +03:00
kfree ( sk ) ;
2018-07-11 16:30:14 +03:00
return - ENOMEM ;
}
}
memset ( __skb_push ( skb , hh_len ) , 0 , hh_len ) ;
}
2017-03-31 07:45:38 +03:00
size = skb - > len ;
/* bpf program can never convert linear skb to non-linear */
if ( WARN_ON_ONCE ( skb_is_nonlinear ( skb ) ) )
size = skb_headlen ( skb ) ;
2017-05-02 18:36:33 +03:00
ret = bpf_test_finish ( kattr , uattr , skb - > data , size , retval , duration ) ;
2017-03-31 07:45:38 +03:00
kfree_skb ( skb ) ;
2018-10-19 19:57:58 +03:00
kfree ( sk ) ;
2017-03-31 07:45:38 +03:00
return ret ;
}
int bpf_prog_test_run_xdp ( struct bpf_prog * prog , const union bpf_attr * kattr ,
union bpf_attr __user * uattr )
{
u32 size = kattr - > test . data_size_in ;
u32 repeat = kattr - > test . repeat ;
2018-01-31 14:58:56 +03:00
struct netdev_rx_queue * rxqueue ;
2017-03-31 07:45:38 +03:00
struct xdp_buff xdp = { } ;
u32 retval , duration ;
void * data ;
int ret ;
2017-05-02 18:36:45 +03:00
data = bpf_test_init ( kattr , size , XDP_PACKET_HEADROOM + NET_IP_ALIGN , 0 ) ;
2017-03-31 07:45:38 +03:00
if ( IS_ERR ( data ) )
return PTR_ERR ( data ) ;
xdp . data_hard_start = data ;
2017-05-02 18:36:45 +03:00
xdp . data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ;
bpf: add meta pointer for direct access
This work enables generic transfer of metadata from XDP into skb. The
basic idea is that we can make use of the fact that the resulting skb
must be linear and already comes with a larger headroom for supporting
bpf_xdp_adjust_head(), which mangles xdp->data. Here, we base our work
on a similar principle and introduce a small helper bpf_xdp_adjust_meta()
for adjusting a new pointer called xdp->data_meta. Thus, the packet has
a flexible and programmable room for meta data, followed by the actual
packet data. struct xdp_buff is therefore laid out that we first point
to data_hard_start, then data_meta directly prepended to data followed
by data_end marking the end of packet. bpf_xdp_adjust_head() takes into
account whether we have meta data already prepended and if so, memmove()s
this along with the given offset provided there's enough room.
xdp->data_meta is optional and programs are not required to use it. The
rationale is that when we process the packet in XDP (e.g. as DoS filter),
we can push further meta data along with it for the XDP_PASS case, and
give the guarantee that a clsact ingress BPF program on the same device
can pick this up for further post-processing. Since we work with skb
there, we can also set skb->mark, skb->priority or other skb meta data
out of BPF, thus having this scratch space generic and programmable
allows for more flexibility than defining a direct 1:1 transfer of
potentially new XDP members into skb (it's also more efficient as we
don't need to initialize/handle each of such new members). The facility
also works together with GRO aggregation. The scratch space at the head
of the packet can be multiple of 4 byte up to 32 byte large. Drivers not
yet supporting xdp->data_meta can simply be set up with xdp->data_meta
as xdp->data + 1 as bpf_xdp_adjust_meta() will detect this and bail out,
such that the subsequent match against xdp->data for later access is
guaranteed to fail.
The verifier treats xdp->data_meta/xdp->data the same way as we treat
xdp->data/xdp->data_end pointer comparisons. The requirement for doing
the compare against xdp->data is that it hasn't been modified from it's
original address we got from ctx access. It may have a range marking
already from prior successful xdp->data/xdp->data_end pointer comparisons
though.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-09-25 03:25:51 +03:00
xdp . data_meta = xdp . data ;
2017-03-31 07:45:38 +03:00
xdp . data_end = xdp . data + size ;
2018-01-31 14:58:56 +03:00
rxqueue = __netif_get_rx_queue ( current - > nsproxy - > net_ns - > loopback_dev , 0 ) ;
xdp . rxq = & rxqueue - > xdp_rxq ;
2018-12-01 21:39:44 +03:00
ret = bpf_test_run ( prog , & xdp , repeat , & retval , & duration ) ;
if ( ret )
goto out ;
2018-04-18 07:42:21 +03:00
if ( xdp . data ! = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN | |
xdp . data_end ! = xdp . data + size )
2017-03-31 07:45:38 +03:00
size = xdp . data_end - xdp . data ;
2017-05-02 18:36:33 +03:00
ret = bpf_test_finish ( kattr , uattr , xdp . data , size , retval , duration ) ;
2018-12-01 21:39:44 +03:00
out :
2017-03-31 07:45:38 +03:00
kfree ( data ) ;
return ret ;
}
2019-01-28 19:53:54 +03:00
int bpf_prog_test_run_flow_dissector ( struct bpf_prog * prog ,
const union bpf_attr * kattr ,
union bpf_attr __user * uattr )
{
u32 size = kattr - > test . data_size_in ;
u32 repeat = kattr - > test . repeat ;
struct bpf_flow_keys flow_keys ;
u64 time_start , time_spent = 0 ;
struct bpf_skb_data_end * cb ;
u32 retval , duration ;
struct sk_buff * skb ;
struct sock * sk ;
void * data ;
int ret ;
u32 i ;
if ( prog - > type ! = BPF_PROG_TYPE_FLOW_DISSECTOR )
return - EINVAL ;
data = bpf_test_init ( kattr , size , NET_SKB_PAD + NET_IP_ALIGN ,
SKB_DATA_ALIGN ( sizeof ( struct skb_shared_info ) ) ) ;
if ( IS_ERR ( data ) )
return PTR_ERR ( data ) ;
sk = kzalloc ( sizeof ( * sk ) , GFP_USER ) ;
if ( ! sk ) {
kfree ( data ) ;
return - ENOMEM ;
}
sock_net_set ( sk , current - > nsproxy - > net_ns ) ;
sock_init_data ( NULL , sk ) ;
skb = build_skb ( data , 0 ) ;
if ( ! skb ) {
kfree ( data ) ;
kfree ( sk ) ;
return - ENOMEM ;
}
skb - > sk = sk ;
skb_reserve ( skb , NET_SKB_PAD + NET_IP_ALIGN ) ;
__skb_put ( skb , size ) ;
skb - > protocol = eth_type_trans ( skb ,
current - > nsproxy - > net_ns - > loopback_dev ) ;
skb_reset_network_header ( skb ) ;
cb = ( struct bpf_skb_data_end * ) skb - > cb ;
cb - > qdisc_cb . flow_keys = & flow_keys ;
if ( ! repeat )
repeat = 1 ;
2019-02-19 21:54:17 +03:00
rcu_read_lock ( ) ;
preempt_disable ( ) ;
2019-01-28 19:53:54 +03:00
time_start = ktime_get_ns ( ) ;
for ( i = 0 ; i < repeat ; i + + ) {
retval = __skb_flow_bpf_dissect ( prog , skb ,
& flow_keys_dissector ,
& flow_keys ) ;
2019-02-19 21:54:17 +03:00
if ( signal_pending ( current ) ) {
preempt_enable ( ) ;
rcu_read_unlock ( ) ;
ret = - EINTR ;
goto out ;
}
2019-01-28 19:53:54 +03:00
if ( need_resched ( ) ) {
time_spent + = ktime_get_ns ( ) - time_start ;
2019-02-19 21:54:17 +03:00
preempt_enable ( ) ;
rcu_read_unlock ( ) ;
2019-01-28 19:53:54 +03:00
cond_resched ( ) ;
2019-02-19 21:54:17 +03:00
rcu_read_lock ( ) ;
preempt_disable ( ) ;
2019-01-28 19:53:54 +03:00
time_start = ktime_get_ns ( ) ;
}
}
time_spent + = ktime_get_ns ( ) - time_start ;
2019-02-19 21:54:17 +03:00
preempt_enable ( ) ;
rcu_read_unlock ( ) ;
2019-01-28 19:53:54 +03:00
do_div ( time_spent , repeat ) ;
duration = time_spent > U32_MAX ? U32_MAX : ( u32 ) time_spent ;
ret = bpf_test_finish ( kattr , uattr , & flow_keys , sizeof ( flow_keys ) ,
retval , duration ) ;
2019-02-19 21:54:17 +03:00
out :
2019-01-28 19:53:54 +03:00
kfree_skb ( skb ) ;
kfree ( sk ) ;
return ret ;
}