2017-01-23 13:07:09 +03:00
/*
* net / sched / act_sample . c - Packet sampling tc action
* Copyright ( c ) 2017 Yotam Gigi < yotamg @ mellanox . com >
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*/
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/string.h>
# include <linux/errno.h>
# include <linux/skbuff.h>
# include <linux/rtnetlink.h>
# include <linux/module.h>
# include <linux/init.h>
# include <linux/gfp.h>
# include <net/net_namespace.h>
# include <net/netlink.h>
# include <net/pkt_sched.h>
# include <linux/tc_act/tc_sample.h>
# include <net/tc_act/tc_sample.h>
# include <net/psample.h>
2019-03-20 17:00:09 +03:00
# include <net/pkt_cls.h>
2017-01-23 13:07:09 +03:00
# include <linux/if_arp.h>
static unsigned int sample_net_id ;
static struct tc_action_ops act_sample_ops ;
static const struct nla_policy sample_policy [ TCA_SAMPLE_MAX + 1 ] = {
[ TCA_SAMPLE_PARMS ] = { . len = sizeof ( struct tc_sample ) } ,
[ TCA_SAMPLE_RATE ] = { . type = NLA_U32 } ,
[ TCA_SAMPLE_TRUNC_SIZE ] = { . type = NLA_U32 } ,
[ TCA_SAMPLE_PSAMPLE_GROUP ] = { . type = NLA_U32 } ,
} ;
static int tcf_sample_init ( struct net * net , struct nlattr * nla ,
struct nlattr * est , struct tc_action * * a , int ovr ,
net/sched: prepare TC actions to properly validate the control action
- pass a pointer to struct tcf_proto in each actions's init() handler,
to allow validating the control action, checking whether the chain
exists and (eventually) refcounting it.
- remove code that validates the control action after a successful call
to the action's init() handler, and replace it with a test that forbids
addition of actions having 'goto_chain' and NULL goto_chain pointer at
the same time.
- add tcf_action_check_ctrlact(), that will validate the control action
and eventually allocate the action 'goto_chain' within the init()
handler.
- add tcf_action_set_ctrlact(), that will assign the control action and
swap the current 'goto_chain' pointer with the new given one.
This disallows 'goto_chain' on actions that don't initialize it properly
in their init() handler, i.e. calling tcf_action_check_ctrlact() after
successful IDR reservation and then calling tcf_action_set_ctrlact()
to assign 'goto_chain' and 'tcf_action' consistently.
By doing this, the kernel does not leak anymore refcounts when a valid
'goto chain' handle is replaced in TC actions, causing kmemleak splats
like the following one:
# tc chain add dev dd0 chain 42 ingress protocol ip flower \
> ip_proto tcp action drop
# tc chain add dev dd0 chain 43 ingress protocol ip flower \
> ip_proto udp action drop
# tc filter add dev dd0 ingress matchall \
> action gact goto chain 42 index 66
# tc filter replace dev dd0 ingress matchall \
> action gact goto chain 43 index 66
# echo scan >/sys/kernel/debug/kmemleak
<...>
unreferenced object 0xffff93c0ee09f000 (size 1024):
comm "tc", pid 2565, jiffies 4295339808 (age 65.426s)
hex dump (first 32 bytes):
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................
00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................
backtrace:
[<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0
[<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0
[<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110
[<000000006126a348>] netlink_unicast+0x1a0/0x250
[<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0
[<00000000a25a2171>] sock_sendmsg+0x36/0x40
[<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0
[<00000000d0422042>] __sys_sendmsg+0x5e/0xa0
[<000000007a6c61f9>] do_syscall_64+0x5b/0x180
[<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
[<0000000013eaa334>] 0xffffffffffffffff
Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-03-20 16:59:59 +03:00
int bind , bool rtnl_held , struct tcf_proto * tp ,
2018-07-05 17:24:25 +03:00
struct netlink_ext_ack * extack )
2017-01-23 13:07:09 +03:00
{
struct tc_action_net * tn = net_generic ( net , sample_net_id ) ;
struct nlattr * tb [ TCA_SAMPLE_MAX + 1 ] ;
struct psample_group * psample_group ;
2019-03-20 17:00:09 +03:00
struct tcf_chain * goto_ch = NULL ;
2017-01-23 13:07:09 +03:00
struct tc_sample * parm ;
2018-08-14 21:46:16 +03:00
u32 psample_group_num ;
2017-01-23 13:07:09 +03:00
struct tcf_sample * s ;
bool exists = false ;
2018-07-05 17:24:32 +03:00
int ret , err ;
2017-01-23 13:07:09 +03:00
if ( ! nla )
return - EINVAL ;
2017-04-12 15:34:07 +03:00
ret = nla_parse_nested ( tb , TCA_SAMPLE_MAX , nla , sample_policy , NULL ) ;
2017-01-23 13:07:09 +03:00
if ( ret < 0 )
return ret ;
if ( ! tb [ TCA_SAMPLE_PARMS ] | | ! tb [ TCA_SAMPLE_RATE ] | |
! tb [ TCA_SAMPLE_PSAMPLE_GROUP ] )
return - EINVAL ;
parm = nla_data ( tb [ TCA_SAMPLE_PARMS ] ) ;
2018-07-05 17:24:32 +03:00
err = tcf_idr_check_alloc ( tn , & parm - > index , a , bind ) ;
if ( err < 0 )
return err ;
exists = err ;
2017-01-23 13:07:09 +03:00
if ( exists & & bind )
return 0 ;
if ( ! exists ) {
2017-08-30 09:31:59 +03:00
ret = tcf_idr_create ( tn , parm - > index , est , a ,
2018-09-14 13:03:18 +03:00
& act_sample_ops , bind , true ) ;
2018-07-05 17:24:32 +03:00
if ( ret ) {
tcf_idr_cleanup ( tn , parm - > index ) ;
2017-01-23 13:07:09 +03:00
return ret ;
2018-07-05 17:24:32 +03:00
}
2017-01-23 13:07:09 +03:00
ret = ACT_P_CREATED ;
2018-07-05 17:24:30 +03:00
} else if ( ! ovr ) {
2017-08-30 09:31:59 +03:00
tcf_idr_release ( * a , bind ) ;
2018-07-05 17:24:30 +03:00
return - EEXIST ;
2017-01-23 13:07:09 +03:00
}
2019-03-20 17:00:09 +03:00
err = tcf_action_check_ctrlact ( parm - > action , tp , & goto_ch , extack ) ;
if ( err < 0 )
goto release_idr ;
2017-01-23 13:07:09 +03:00
2018-08-14 21:46:16 +03:00
psample_group_num = nla_get_u32 ( tb [ TCA_SAMPLE_PSAMPLE_GROUP ] ) ;
psample_group = psample_group_get ( net , psample_group_num ) ;
2017-01-31 12:33:53 +03:00
if ( ! psample_group ) {
2019-03-20 17:00:09 +03:00
err = - ENOMEM ;
goto put_chain ;
2017-01-31 12:33:53 +03:00
}
2018-08-14 21:46:16 +03:00
s = to_sample ( * a ) ;
spin_lock_bh ( & s - > tcf_lock ) ;
2019-03-20 17:00:09 +03:00
goto_ch = tcf_action_set_ctrlact ( * a , parm - > action , goto_ch ) ;
2018-08-14 21:46:16 +03:00
s - > rate = nla_get_u32 ( tb [ TCA_SAMPLE_RATE ] ) ;
s - > psample_group_num = psample_group_num ;
2017-01-23 13:07:09 +03:00
RCU_INIT_POINTER ( s - > psample_group , psample_group ) ;
if ( tb [ TCA_SAMPLE_TRUNC_SIZE ] ) {
s - > truncate = true ;
s - > trunc_size = nla_get_u32 ( tb [ TCA_SAMPLE_TRUNC_SIZE ] ) ;
}
2018-08-14 21:46:16 +03:00
spin_unlock_bh ( & s - > tcf_lock ) ;
2019-03-20 17:00:09 +03:00
if ( goto_ch )
tcf_chain_put_by_act ( goto_ch ) ;
2017-01-23 13:07:09 +03:00
if ( ret = = ACT_P_CREATED )
2017-08-30 09:31:59 +03:00
tcf_idr_insert ( tn , * a ) ;
2017-01-23 13:07:09 +03:00
return ret ;
2019-03-20 17:00:09 +03:00
put_chain :
if ( goto_ch )
tcf_chain_put_by_act ( goto_ch ) ;
release_idr :
tcf_idr_release ( * a , bind ) ;
return err ;
2017-01-23 13:07:09 +03:00
}
2017-12-05 23:53:07 +03:00
static void tcf_sample_cleanup ( struct tc_action * a )
2017-01-23 13:07:09 +03:00
{
2017-11-30 03:07:51 +03:00
struct tcf_sample * s = to_sample ( a ) ;
2017-01-23 13:07:09 +03:00
struct psample_group * psample_group ;
2018-08-10 20:51:47 +03:00
/* last reference to action, no need to lock */
psample_group = rcu_dereference_protected ( s - > psample_group , 1 ) ;
2017-01-23 13:07:09 +03:00
RCU_INIT_POINTER ( s - > psample_group , NULL ) ;
2018-03-16 02:00:56 +03:00
if ( psample_group )
psample_group_put ( psample_group ) ;
2017-01-23 13:07:09 +03:00
}
static bool tcf_sample_dev_ok_push ( struct net_device * dev )
{
switch ( dev - > type ) {
case ARPHRD_TUNNEL :
case ARPHRD_TUNNEL6 :
case ARPHRD_SIT :
case ARPHRD_IPGRE :
case ARPHRD_VOID :
case ARPHRD_NONE :
return false ;
default :
return true ;
}
}
static int tcf_sample_act ( struct sk_buff * skb , const struct tc_action * a ,
struct tcf_result * res )
{
struct tcf_sample * s = to_sample ( a ) ;
struct psample_group * psample_group ;
int retval ;
int size ;
int iif ;
int oif ;
tcf_lastuse_update ( & s - > tcf_tm ) ;
bstats_cpu_update ( this_cpu_ptr ( s - > common . cpu_bstats ) , skb ) ;
retval = READ_ONCE ( s - > tcf_action ) ;
2018-07-30 15:30:43 +03:00
psample_group = rcu_dereference_bh ( s - > psample_group ) ;
2017-01-23 13:07:09 +03:00
/* randomly sample packets according to rate */
if ( psample_group & & ( prandom_u32 ( ) % s - > rate = = 0 ) ) {
if ( ! skb_at_tc_ingress ( skb ) ) {
iif = skb - > skb_iif ;
oif = skb - > dev - > ifindex ;
} else {
iif = skb - > dev - > ifindex ;
oif = 0 ;
}
/* on ingress, the mac header gets popped, so push it back */
if ( skb_at_tc_ingress ( skb ) & & tcf_sample_dev_ok_push ( skb - > dev ) )
skb_push ( skb , skb - > mac_len ) ;
size = s - > truncate ? s - > trunc_size : skb - > len ;
psample_sample_packet ( psample_group , skb , size , iif , oif ,
s - > rate ) ;
if ( skb_at_tc_ingress ( skb ) & & tcf_sample_dev_ok_push ( skb - > dev ) )
skb_pull ( skb , skb - > mac_len ) ;
}
return retval ;
}
static int tcf_sample_dump ( struct sk_buff * skb , struct tc_action * a ,
int bind , int ref )
{
unsigned char * b = skb_tail_pointer ( skb ) ;
struct tcf_sample * s = to_sample ( a ) ;
struct tc_sample opt = {
. index = s - > tcf_index ,
2018-07-05 17:24:24 +03:00
. refcnt = refcount_read ( & s - > tcf_refcnt ) - ref ,
. bindcnt = atomic_read ( & s - > tcf_bindcnt ) - bind ,
2017-01-23 13:07:09 +03:00
} ;
struct tcf_t t ;
2018-08-14 21:46:16 +03:00
spin_lock_bh ( & s - > tcf_lock ) ;
2018-08-10 20:51:47 +03:00
opt . action = s - > tcf_action ;
2017-01-23 13:07:09 +03:00
if ( nla_put ( skb , TCA_SAMPLE_PARMS , sizeof ( opt ) , & opt ) )
goto nla_put_failure ;
tcf_tm_dump ( & t , & s - > tcf_tm ) ;
if ( nla_put_64bit ( skb , TCA_SAMPLE_TM , sizeof ( t ) , & t , TCA_SAMPLE_PAD ) )
goto nla_put_failure ;
if ( nla_put_u32 ( skb , TCA_SAMPLE_RATE , s - > rate ) )
goto nla_put_failure ;
if ( s - > truncate )
if ( nla_put_u32 ( skb , TCA_SAMPLE_TRUNC_SIZE , s - > trunc_size ) )
goto nla_put_failure ;
if ( nla_put_u32 ( skb , TCA_SAMPLE_PSAMPLE_GROUP , s - > psample_group_num ) )
goto nla_put_failure ;
2018-08-14 21:46:16 +03:00
spin_unlock_bh ( & s - > tcf_lock ) ;
2018-08-10 20:51:47 +03:00
2017-01-23 13:07:09 +03:00
return skb - > len ;
nla_put_failure :
2018-08-14 21:46:16 +03:00
spin_unlock_bh ( & s - > tcf_lock ) ;
2017-01-23 13:07:09 +03:00
nlmsg_trim ( skb , b ) ;
return - 1 ;
}
static int tcf_sample_walker ( struct net * net , struct sk_buff * skb ,
struct netlink_callback * cb , int type ,
2018-02-15 18:54:58 +03:00
const struct tc_action_ops * ops ,
struct netlink_ext_ack * extack )
2017-01-23 13:07:09 +03:00
{
struct tc_action_net * tn = net_generic ( net , sample_net_id ) ;
2018-02-15 18:54:59 +03:00
return tcf_generic_walker ( tn , skb , cb , type , ops , extack ) ;
2017-01-23 13:07:09 +03:00
}
2018-08-29 20:15:35 +03:00
static int tcf_sample_search ( struct net * net , struct tc_action * * a , u32 index )
2017-01-23 13:07:09 +03:00
{
struct tc_action_net * tn = net_generic ( net , sample_net_id ) ;
2017-08-30 09:31:59 +03:00
return tcf_idr_search ( tn , a , index ) ;
2017-01-23 13:07:09 +03:00
}
static struct tc_action_ops act_sample_ops = {
. kind = " sample " ,
2019-02-10 15:25:00 +03:00
. id = TCA_ID_SAMPLE ,
2017-01-23 13:07:09 +03:00
. owner = THIS_MODULE ,
. act = tcf_sample_act ,
. dump = tcf_sample_dump ,
. init = tcf_sample_init ,
. cleanup = tcf_sample_cleanup ,
. walk = tcf_sample_walker ,
. lookup = tcf_sample_search ,
. size = sizeof ( struct tcf_sample ) ,
} ;
static __net_init int sample_init_net ( struct net * net )
{
struct tc_action_net * tn = net_generic ( net , sample_net_id ) ;
2017-11-07 00:47:18 +03:00
return tc_action_net_init ( tn , & act_sample_ops ) ;
2017-01-23 13:07:09 +03:00
}
2017-12-12 02:35:03 +03:00
static void __net_exit sample_exit_net ( struct list_head * net_list )
2017-01-23 13:07:09 +03:00
{
2017-12-12 02:35:03 +03:00
tc_action_net_exit ( net_list , sample_net_id ) ;
2017-01-23 13:07:09 +03:00
}
static struct pernet_operations sample_net_ops = {
. init = sample_init_net ,
2017-12-12 02:35:03 +03:00
. exit_batch = sample_exit_net ,
2017-01-23 13:07:09 +03:00
. id = & sample_net_id ,
. size = sizeof ( struct tc_action_net ) ,
} ;
static int __init sample_init_module ( void )
{
return tcf_register_action ( & act_sample_ops , & sample_net_ops ) ;
}
static void __exit sample_cleanup_module ( void )
{
tcf_unregister_action ( & act_sample_ops , & sample_net_ops ) ;
}
module_init ( sample_init_module ) ;
module_exit ( sample_cleanup_module ) ;
2017-10-30 12:41:36 +03:00
MODULE_AUTHOR ( " Yotam Gigi <yotam.gi@gmail.com> " ) ;
2017-01-23 13:07:09 +03:00
MODULE_DESCRIPTION ( " Packet sampling action " ) ;
MODULE_LICENSE ( " GPL v2 " ) ;