2016-02-27 08:08:54 -05:00
/*
* net / sched / ife . c Inter - FE action based on ForCES WG InterFE LFB
*
* Refer to :
* draft - ietf - forces - interfelfb - 03
* and
* netdev01 paper :
* " Distributing Linux Traffic Control Classifier-Action
* Subsystem "
* Authors : Jamal Hadi Salim and Damascene M . Joachimpillai
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* copyright Jamal Hadi Salim ( 2015 )
*
*/
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/string.h>
# include <linux/errno.h>
# include <linux/skbuff.h>
# include <linux/rtnetlink.h>
# include <linux/module.h>
# include <linux/init.h>
# include <net/net_namespace.h>
# include <net/netlink.h>
# include <net/pkt_sched.h>
# include <uapi/linux/tc_act/tc_ife.h>
# include <net/tc_act/tc_ife.h>
# include <linux/etherdevice.h>
2017-02-01 15:30:03 +02:00
# include <net/ife.h>
2016-02-27 08:08:54 -05:00
netns: make struct pernet_operations::id unsigned int
Make struct pernet_operations::id unsigned.
There are 2 reasons to do so:
1)
This field is really an index into an zero based array and
thus is unsigned entity. Using negative value is out-of-bound
access by definition.
2)
On x86_64 unsigned 32-bit data which are mixed with pointers
via array indexing or offsets added or subtracted to pointers
are preffered to signed 32-bit data.
"int" being used as an array index needs to be sign-extended
to 64-bit before being used.
void f(long *p, int i)
{
g(p[i]);
}
roughly translates to
movsx rsi, esi
mov rdi, [rsi+...]
call g
MOVSX is 3 byte instruction which isn't necessary if the variable is
unsigned because x86_64 is zero extending by default.
Now, there is net_generic() function which, you guessed it right, uses
"int" as an array index:
static inline void *net_generic(const struct net *net, int id)
{
...
ptr = ng->ptr[id - 1];
...
}
And this function is used a lot, so those sign extensions add up.
Patch snipes ~1730 bytes on allyesconfig kernel (without all junk
messing with code generation):
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
Unfortunately some functions actually grow bigger.
This is a semmingly random artefact of code generation with register
allocator being used differently. gcc decides that some variable
needs to live in new r8+ registers and every access now requires REX
prefix. Or it is shifted into r12, so [r12+0] addressing mode has to be
used which is longer than [r8]
However, overall balance is in negative direction:
add/remove: 0/0 grow/shrink: 70/598 up/down: 396/-2126 (-1730)
function old new delta
nfsd4_lock 3886 3959 +73
tipc_link_build_proto_msg 1096 1140 +44
mac80211_hwsim_new_radio 2776 2808 +32
tipc_mon_rcv 1032 1058 +26
svcauth_gss_legacy_init 1413 1429 +16
tipc_bcbase_select_primary 379 392 +13
nfsd4_exchange_id 1247 1260 +13
nfsd4_setclientid_confirm 782 793 +11
...
put_client_renew_locked 494 480 -14
ip_set_sockfn_get 730 716 -14
geneve_sock_add 829 813 -16
nfsd4_sequence_done 721 703 -18
nlmclnt_lookup_host 708 686 -22
nfsd4_lockt 1085 1063 -22
nfs_get_client 1077 1050 -27
tcf_bpf_init 1106 1076 -30
nfsd4_encode_fattr 5997 5930 -67
Total: Before=154856051, After=154854321, chg -0.00%
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-11-17 04:58:21 +03:00
static unsigned int ife_net_id ;
2016-02-27 08:08:54 -05:00
static int max_metacnt = IFE_META_MAX + 1 ;
2016-07-25 16:09:41 -07:00
static struct tc_action_ops act_ife_ops ;
2016-02-27 08:08:54 -05:00
static const struct nla_policy ife_policy [ TCA_IFE_MAX + 1 ] = {
[ TCA_IFE_PARMS ] = { . len = sizeof ( struct tc_ife ) } ,
[ TCA_IFE_DMAC ] = { . len = ETH_ALEN } ,
[ TCA_IFE_SMAC ] = { . len = ETH_ALEN } ,
[ TCA_IFE_TYPE ] = { . type = NLA_U16 } ,
} ;
2016-09-18 07:31:42 -04:00
int ife_encode_meta_u16 ( u16 metaval , void * skbdata , struct tcf_meta_info * mi )
{
u16 edata = 0 ;
if ( mi - > metaval )
edata = * ( u16 * ) mi - > metaval ;
else if ( metaval )
edata = metaval ;
if ( ! edata ) /* will not encode */
return 0 ;
edata = htons ( edata ) ;
return ife_tlv_meta_encode ( skbdata , mi - > metaid , 2 , & edata ) ;
}
EXPORT_SYMBOL_GPL ( ife_encode_meta_u16 ) ;
2016-02-27 08:08:54 -05:00
int ife_get_meta_u32 ( struct sk_buff * skb , struct tcf_meta_info * mi )
{
if ( mi - > metaval )
return nla_put_u32 ( skb , mi - > metaid , * ( u32 * ) mi - > metaval ) ;
else
return nla_put ( skb , mi - > metaid , 0 , NULL ) ;
}
EXPORT_SYMBOL_GPL ( ife_get_meta_u32 ) ;
int ife_check_meta_u32 ( u32 metaval , struct tcf_meta_info * mi )
{
if ( metaval | | mi - > metaval )
return 8 ; /* T+L+V == 2+2+4 */
return 0 ;
}
EXPORT_SYMBOL_GPL ( ife_check_meta_u32 ) ;
2016-09-18 07:31:42 -04:00
int ife_check_meta_u16 ( u16 metaval , struct tcf_meta_info * mi )
{
if ( metaval | | mi - > metaval )
return 8 ; /* T+L+(V) == 2+2+(2+2bytepad) */
return 0 ;
}
EXPORT_SYMBOL_GPL ( ife_check_meta_u16 ) ;
2016-02-27 08:08:54 -05:00
int ife_encode_meta_u32 ( u32 metaval , void * skbdata , struct tcf_meta_info * mi )
{
u32 edata = metaval ;
if ( mi - > metaval )
edata = * ( u32 * ) mi - > metaval ;
else if ( metaval )
edata = metaval ;
if ( ! edata ) /* will not encode */
return 0 ;
edata = htonl ( edata ) ;
return ife_tlv_meta_encode ( skbdata , mi - > metaid , 4 , & edata ) ;
}
EXPORT_SYMBOL_GPL ( ife_encode_meta_u32 ) ;
int ife_get_meta_u16 ( struct sk_buff * skb , struct tcf_meta_info * mi )
{
if ( mi - > metaval )
return nla_put_u16 ( skb , mi - > metaid , * ( u16 * ) mi - > metaval ) ;
else
return nla_put ( skb , mi - > metaid , 0 , NULL ) ;
}
EXPORT_SYMBOL_GPL ( ife_get_meta_u16 ) ;
2016-06-20 13:37:18 -07:00
int ife_alloc_meta_u32 ( struct tcf_meta_info * mi , void * metaval , gfp_t gfp )
2016-02-27 08:08:54 -05:00
{
2016-06-20 13:37:18 -07:00
mi - > metaval = kmemdup ( metaval , sizeof ( u32 ) , gfp ) ;
2016-02-27 08:08:54 -05:00
if ( ! mi - > metaval )
return - ENOMEM ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( ife_alloc_meta_u32 ) ;
2016-06-20 13:37:18 -07:00
int ife_alloc_meta_u16 ( struct tcf_meta_info * mi , void * metaval , gfp_t gfp )
2016-02-27 08:08:54 -05:00
{
2016-06-20 13:37:18 -07:00
mi - > metaval = kmemdup ( metaval , sizeof ( u16 ) , gfp ) ;
2016-02-27 08:08:54 -05:00
if ( ! mi - > metaval )
return - ENOMEM ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( ife_alloc_meta_u16 ) ;
void ife_release_meta_gen ( struct tcf_meta_info * mi )
{
kfree ( mi - > metaval ) ;
}
EXPORT_SYMBOL_GPL ( ife_release_meta_gen ) ;
int ife_validate_meta_u32 ( void * val , int len )
{
2016-08-22 07:10:20 -04:00
if ( len = = sizeof ( u32 ) )
2016-02-27 08:08:54 -05:00
return 0 ;
return - EINVAL ;
}
EXPORT_SYMBOL_GPL ( ife_validate_meta_u32 ) ;
int ife_validate_meta_u16 ( void * val , int len )
{
2016-08-22 07:10:20 -04:00
/* length will not include padding */
if ( len = = sizeof ( u16 ) )
2016-02-27 08:08:54 -05:00
return 0 ;
return - EINVAL ;
}
EXPORT_SYMBOL_GPL ( ife_validate_meta_u16 ) ;
static LIST_HEAD ( ifeoplist ) ;
static DEFINE_RWLOCK ( ife_mod_lock ) ;
static struct tcf_meta_ops * find_ife_oplist ( u16 metaid )
{
struct tcf_meta_ops * o ;
read_lock ( & ife_mod_lock ) ;
list_for_each_entry ( o , & ifeoplist , list ) {
if ( o - > metaid = = metaid ) {
if ( ! try_module_get ( o - > owner ) )
o = NULL ;
read_unlock ( & ife_mod_lock ) ;
return o ;
}
}
read_unlock ( & ife_mod_lock ) ;
return NULL ;
}
int register_ife_op ( struct tcf_meta_ops * mops )
{
struct tcf_meta_ops * m ;
if ( ! mops - > metaid | | ! mops - > metatype | | ! mops - > name | |
! mops - > check_presence | | ! mops - > encode | | ! mops - > decode | |
! mops - > get | | ! mops - > alloc )
return - EINVAL ;
write_lock ( & ife_mod_lock ) ;
list_for_each_entry ( m , & ifeoplist , list ) {
if ( m - > metaid = = mops - > metaid | |
( strcmp ( mops - > name , m - > name ) = = 0 ) ) {
write_unlock ( & ife_mod_lock ) ;
return - EEXIST ;
}
}
if ( ! mops - > release )
mops - > release = ife_release_meta_gen ;
list_add_tail ( & mops - > list , & ifeoplist ) ;
write_unlock ( & ife_mod_lock ) ;
return 0 ;
}
EXPORT_SYMBOL_GPL ( unregister_ife_op ) ;
int unregister_ife_op ( struct tcf_meta_ops * mops )
{
struct tcf_meta_ops * m ;
int err = - ENOENT ;
write_lock ( & ife_mod_lock ) ;
list_for_each_entry ( m , & ifeoplist , list ) {
if ( m - > metaid = = mops - > metaid ) {
list_del ( & mops - > list ) ;
err = 0 ;
break ;
}
}
write_unlock ( & ife_mod_lock ) ;
return err ;
}
EXPORT_SYMBOL_GPL ( register_ife_op ) ;
static int ife_validate_metatype ( struct tcf_meta_ops * ops , void * val , int len )
{
int ret = 0 ;
/* XXX: unfortunately cant use nla_policy at this point
* because a length of 0 is valid in the case of
* " allow " . " use " semantics do enforce for proper
* length and i couldve use nla_policy but it makes it hard
* to use it just for that . .
*/
if ( ops - > validate )
return ops - > validate ( val , len ) ;
if ( ops - > metatype = = NLA_U32 )
ret = ife_validate_meta_u32 ( val , len ) ;
else if ( ops - > metatype = = NLA_U16 )
ret = ife_validate_meta_u16 ( val , len ) ;
return ret ;
}
2017-10-13 12:58:13 -07:00
# ifdef CONFIG_MODULES
2017-10-11 10:50:30 -04:00
static const char * ife_meta_id2name ( u32 metaid )
{
switch ( metaid ) {
case IFE_META_SKBMARK :
return " skbmark " ;
case IFE_META_PRIO :
return " skbprio " ;
case IFE_META_TCINDEX :
return " tcindex " ;
default :
return " unknown " ;
}
}
2017-10-13 12:58:13 -07:00
# endif
2017-10-11 10:50:30 -04:00
2016-02-27 08:08:54 -05:00
/* called when adding new meta information
2016-06-20 13:37:18 -07:00
* under ife - > tcf_lock for existing action
2016-02-27 08:08:54 -05:00
*/
static int load_metaops_and_vet ( struct tcf_ife_info * ife , u32 metaid ,
2016-06-20 13:37:18 -07:00
void * val , int len , bool exists )
2016-02-27 08:08:54 -05:00
{
struct tcf_meta_ops * ops = find_ife_oplist ( metaid ) ;
int ret = 0 ;
if ( ! ops ) {
ret = - ENOENT ;
# ifdef CONFIG_MODULES
2016-06-20 13:37:18 -07:00
if ( exists )
spin_unlock_bh ( & ife - > tcf_lock ) ;
2016-02-27 08:08:54 -05:00
rtnl_unlock ( ) ;
2017-10-11 10:50:30 -04:00
request_module ( " ife-meta-%s " , ife_meta_id2name ( metaid ) ) ;
2016-02-27 08:08:54 -05:00
rtnl_lock ( ) ;
2016-06-20 13:37:18 -07:00
if ( exists )
spin_lock_bh ( & ife - > tcf_lock ) ;
2016-02-27 08:08:54 -05:00
ops = find_ife_oplist ( metaid ) ;
# endif
}
if ( ops ) {
ret = 0 ;
if ( len )
ret = ife_validate_metatype ( ops , val , len ) ;
module_put ( ops - > owner ) ;
}
return ret ;
}
/* called when adding new meta information
2016-06-20 13:37:18 -07:00
* under ife - > tcf_lock for existing action
2016-02-27 08:08:54 -05:00
*/
static int add_metainfo ( struct tcf_ife_info * ife , u32 metaid , void * metaval ,
2016-06-20 13:37:19 -07:00
int len , bool atomic )
2016-02-27 08:08:54 -05:00
{
struct tcf_meta_info * mi = NULL ;
struct tcf_meta_ops * ops = find_ife_oplist ( metaid ) ;
int ret = 0 ;
if ( ! ops )
return - ENOENT ;
2016-06-20 13:37:19 -07:00
mi = kzalloc ( sizeof ( * mi ) , atomic ? GFP_ATOMIC : GFP_KERNEL ) ;
2016-02-27 08:08:54 -05:00
if ( ! mi ) {
/*put back what find_ife_oplist took */
module_put ( ops - > owner ) ;
return - ENOMEM ;
}
mi - > metaid = metaid ;
mi - > ops = ops ;
if ( len > 0 ) {
2016-06-20 13:37:19 -07:00
ret = ops - > alloc ( mi , metaval , atomic ? GFP_ATOMIC : GFP_KERNEL ) ;
2016-02-27 08:08:54 -05:00
if ( ret ! = 0 ) {
kfree ( mi ) ;
module_put ( ops - > owner ) ;
return ret ;
}
}
list_add_tail ( & mi - > metalist , & ife - > metalist ) ;
return ret ;
}
static int use_all_metadata ( struct tcf_ife_info * ife )
{
struct tcf_meta_ops * o ;
int rc = 0 ;
int installed = 0 ;
2016-06-20 13:37:19 -07:00
read_lock ( & ife_mod_lock ) ;
2016-02-27 08:08:54 -05:00
list_for_each_entry ( o , & ifeoplist , list ) {
2016-06-20 13:37:19 -07:00
rc = add_metainfo ( ife , o - > metaid , NULL , 0 , true ) ;
2016-02-27 08:08:54 -05:00
if ( rc = = 0 )
installed + = 1 ;
}
2016-06-20 13:37:19 -07:00
read_unlock ( & ife_mod_lock ) ;
2016-02-27 08:08:54 -05:00
if ( installed )
return 0 ;
else
return - EINVAL ;
}
static int dump_metalist ( struct sk_buff * skb , struct tcf_ife_info * ife )
{
struct tcf_meta_info * e ;
struct nlattr * nest ;
unsigned char * b = skb_tail_pointer ( skb ) ;
int total_encoded = 0 ;
/*can only happen on decode */
if ( list_empty ( & ife - > metalist ) )
return 0 ;
nest = nla_nest_start ( skb , TCA_IFE_METALST ) ;
if ( ! nest )
goto out_nlmsg_trim ;
list_for_each_entry ( e , & ife - > metalist , metalist ) {
if ( ! e - > ops - > get ( skb , e ) )
total_encoded + = 1 ;
}
if ( ! total_encoded )
goto out_nlmsg_trim ;
nla_nest_end ( skb , nest ) ;
return 0 ;
out_nlmsg_trim :
nlmsg_trim ( skb , b ) ;
return - 1 ;
}
/* under ife->tcf_lock */
2017-12-05 12:53:07 -08:00
static void _tcf_ife_cleanup ( struct tc_action * a )
2016-02-27 08:08:54 -05:00
{
2016-07-25 16:09:41 -07:00
struct tcf_ife_info * ife = to_ife ( a ) ;
2016-02-27 08:08:54 -05:00
struct tcf_meta_info * e , * n ;
list_for_each_entry_safe ( e , n , & ife - > metalist , metalist ) {
module_put ( e - > ops - > owner ) ;
list_del ( & e - > metalist ) ;
if ( e - > metaval ) {
if ( e - > ops - > release )
e - > ops - > release ( e ) ;
else
kfree ( e - > metaval ) ;
}
kfree ( e ) ;
}
}
2017-12-05 12:53:07 -08:00
static void tcf_ife_cleanup ( struct tc_action * a )
2016-02-27 08:08:54 -05:00
{
2016-07-25 16:09:41 -07:00
struct tcf_ife_info * ife = to_ife ( a ) ;
2017-10-11 17:16:08 -04:00
struct tcf_ife_params * p ;
2016-02-27 08:08:54 -05:00
spin_lock_bh ( & ife - > tcf_lock ) ;
2017-12-05 12:53:07 -08:00
_tcf_ife_cleanup ( a ) ;
2016-02-27 08:08:54 -05:00
spin_unlock_bh ( & ife - > tcf_lock ) ;
2017-10-11 17:16:08 -04:00
p = rcu_dereference_protected ( ife - > params , 1 ) ;
net/sched: act_ife: fix recursive lock and idr leak
a recursive lock warning [1] can be observed with the following script,
# $TC actions add action ife encode allow prio pass index 42
IFE type 0xED3E
# $TC actions replace action ife encode allow tcindex pass index 42
in case the kernel was unable to run the last command (e.g. because of
the impossibility to load 'act_meta_skbtcindex'). For a similar reason,
the kernel can leak idr in the error path of tcf_ife_init(), because
tcf_idr_release() is not called after successful idr reservation:
# $TC actions add action ife encode allow tcindex index 47
IFE type 0xED3E
RTNETLINK answers: No such file or directory
We have an error talking to the kernel
# $TC actions add action ife encode allow tcindex index 47
IFE type 0xED3E
RTNETLINK answers: No space left on device
We have an error talking to the kernel
# $TC actions add action ife encode use mark 7 type 0xfefe pass index 47
IFE type 0xFEFE
RTNETLINK answers: No space left on device
We have an error talking to the kernel
Since tcfa_lock is already taken when the action is being edited, a call
to tcf_idr_release() wrongly makes tcf_idr_cleanup() take the same lock
again. On the other hand, tcf_idr_release() needs to be called in the
error path of tcf_ife_init(), to undo the last tcf_idr_create() invocation.
Fix both problems in tcf_ife_init().
Since the cleanup() routine can now be called when ife->params is NULL,
also add a NULL pointer check to avoid calling kfree_rcu(NULL, rcu).
[1]
============================================
WARNING: possible recursive locking detected
4.17.0-rc4.kasan+ #417 Tainted: G E
--------------------------------------------
tc/3932 is trying to acquire lock:
000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_cleanup+0x19/0x80 [act_ife]
but task is already holding lock:
000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&(&p->tcfa_lock)->rlock);
lock(&(&p->tcfa_lock)->rlock);
*** DEADLOCK ***
May be due to missing lock nesting notation
2 locks held by tc/3932:
#0: 000000007ca8e990 (rtnl_mutex){+.+.}, at: tcf_ife_init+0xf61/0x13c0 [act_ife]
#1: 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife]
stack backtrace:
CPU: 3 PID: 3932 Comm: tc Tainted: G E 4.17.0-rc4.kasan+ #417
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
Call Trace:
dump_stack+0x9a/0xeb
__lock_acquire+0xf43/0x34a0
? debug_check_no_locks_freed+0x2b0/0x2b0
? debug_check_no_locks_freed+0x2b0/0x2b0
? debug_check_no_locks_freed+0x2b0/0x2b0
? __mutex_lock+0x62f/0x1240
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
? lock_acquire+0x10b/0x330
lock_acquire+0x10b/0x330
? tcf_ife_cleanup+0x19/0x80 [act_ife]
_raw_spin_lock_bh+0x38/0x70
? tcf_ife_cleanup+0x19/0x80 [act_ife]
tcf_ife_cleanup+0x19/0x80 [act_ife]
__tcf_idr_release+0xff/0x350
tcf_ife_init+0xdde/0x13c0 [act_ife]
? ife_exit_net+0x290/0x290 [act_ife]
? __lock_is_held+0xb4/0x140
tcf_action_init_1+0x67b/0xad0
? tcf_action_dump_old+0xa0/0xa0
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? memset+0x1f/0x40
tcf_action_init+0x30f/0x590
? tcf_action_init_1+0xad0/0xad0
? memset+0x1f/0x40
tc_ctl_action+0x48e/0x5e0
? mutex_lock_io_nested+0x1160/0x1160
? tca_action_gd+0x990/0x990
? sched_clock+0x5/0x10
? find_held_lock+0x39/0x1d0
rtnetlink_rcv_msg+0x4da/0x990
? validate_linkmsg+0x680/0x680
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
netlink_rcv_skb+0x127/0x350
? validate_linkmsg+0x680/0x680
? netlink_ack+0x970/0x970
? __kmalloc_node_track_caller+0x304/0x3a0
netlink_unicast+0x40f/0x5d0
? netlink_attachskb+0x580/0x580
? _copy_from_iter_full+0x187/0x760
? import_iovec+0x90/0x390
netlink_sendmsg+0x67f/0xb50
? netlink_unicast+0x5d0/0x5d0
? copy_msghdr_from_user+0x206/0x340
? netlink_unicast+0x5d0/0x5d0
sock_sendmsg+0xb3/0xf0
___sys_sendmsg+0x60a/0x8b0
? copy_msghdr_from_user+0x340/0x340
? lock_downgrade+0x5e0/0x5e0
? tty_write_lock+0x18/0x50
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
? lock_downgrade+0x5e0/0x5e0
? lock_acquire+0x10b/0x330
? __audit_syscall_entry+0x316/0x690
? current_kernel_time64+0x6b/0xd0
? __fget_light+0x55/0x1f0
? __sys_sendmsg+0xd2/0x170
__sys_sendmsg+0xd2/0x170
? __ia32_sys_shutdown+0x70/0x70
? syscall_trace_enter+0x57a/0xd60
? rcu_read_lock_sched_held+0xdc/0x110
? __bpf_trace_sys_enter+0x10/0x10
? do_syscall_64+0x22/0x480
do_syscall_64+0xa5/0x480
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7fd646988ba0
RSP: 002b:00007fffc9fab3c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007fffc9fab4f0 RCX: 00007fd646988ba0
RDX: 0000000000000000 RSI: 00007fffc9fab440 RDI: 0000000000000003
RBP: 000000005b28c8b3 R08: 0000000000000002 R09: 0000000000000000
R10: 00007fffc9faae20 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fffc9fab504 R14: 0000000000000001 R15: 000000000066c100
Fixes: 4e8c86155010 ("net sched: net sched: ife action fix late binding")
Fixes: ef6980b6becb ("introduce IFE action")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-06-19 15:39:46 +02:00
if ( p )
kfree_rcu ( p , rcu ) ;
2016-02-27 08:08:54 -05:00
}
2016-06-20 13:37:18 -07:00
/* under ife->tcf_lock for existing action */
static int populate_metalist ( struct tcf_ife_info * ife , struct nlattr * * tb ,
bool exists )
2016-02-27 08:08:54 -05:00
{
int len = 0 ;
int rc = 0 ;
int i = 0 ;
void * val ;
for ( i = 1 ; i < max_metacnt ; i + + ) {
if ( tb [ i ] ) {
val = nla_data ( tb [ i ] ) ;
len = nla_len ( tb [ i ] ) ;
2016-06-20 13:37:18 -07:00
rc = load_metaops_and_vet ( ife , i , val , len , exists ) ;
2016-02-27 08:08:54 -05:00
if ( rc ! = 0 )
return rc ;
2016-06-20 13:37:18 -07:00
rc = add_metainfo ( ife , i , val , len , exists ) ;
2016-02-27 08:08:54 -05:00
if ( rc )
return rc ;
}
}
return rc ;
}
static int tcf_ife_init ( struct net * net , struct nlattr * nla ,
2016-07-25 16:09:41 -07:00
struct nlattr * est , struct tc_action * * a ,
2018-07-05 17:24:25 +03:00
int ovr , int bind , bool rtnl_held ,
struct netlink_ext_ack * extack )
2016-02-27 08:08:54 -05:00
{
struct tc_action_net * tn = net_generic ( net , ife_net_id ) ;
struct nlattr * tb [ TCA_IFE_MAX + 1 ] ;
struct nlattr * tb2 [ IFE_META_MAX + 1 ] ;
2017-10-11 17:16:08 -04:00
struct tcf_ife_params * p , * p_old ;
2016-02-27 08:08:54 -05:00
struct tcf_ife_info * ife ;
2017-08-28 15:03:14 -04:00
u16 ife_type = ETH_P_IFE ;
2016-02-27 08:08:54 -05:00
struct tc_ife * parm ;
u8 * daddr = NULL ;
u8 * saddr = NULL ;
2016-06-13 13:46:28 -07:00
bool exists = false ;
int ret = 0 ;
2016-02-27 08:08:54 -05:00
int err ;
2017-04-12 14:34:07 +02:00
err = nla_parse_nested ( tb , TCA_IFE_MAX , nla , ife_policy , NULL ) ;
2016-02-27 08:08:54 -05:00
if ( err < 0 )
return err ;
if ( ! tb [ TCA_IFE_PARMS ] )
return - EINVAL ;
parm = nla_data ( tb [ TCA_IFE_PARMS ] ) ;
2017-10-11 17:16:06 -04:00
/* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because
* they cannot run as the same time . Check on all other values which
* are not supported right now .
*/
if ( parm - > flags & ~ IFE_ENCODE )
return - EINVAL ;
2017-10-11 17:16:08 -04:00
p = kzalloc ( sizeof ( * p ) , GFP_KERNEL ) ;
if ( ! p )
return - ENOMEM ;
2017-08-30 02:31:59 -04:00
exists = tcf_idr_check ( tn , parm - > index , a , bind ) ;
2017-10-11 17:16:08 -04:00
if ( exists & & bind ) {
kfree ( p ) ;
2016-05-10 16:49:31 -04:00
return 0 ;
2017-10-11 17:16:08 -04:00
}
2016-05-10 16:49:31 -04:00
if ( ! exists ) {
2017-08-30 02:31:59 -04:00
ret = tcf_idr_create ( tn , parm - > index , est , a , & act_ife_ops ,
2017-10-11 17:16:07 -04:00
bind , true ) ;
2017-10-11 17:16:08 -04:00
if ( ret ) {
kfree ( p ) ;
2016-02-27 08:08:54 -05:00
return ret ;
2017-10-11 17:16:08 -04:00
}
2016-02-27 08:08:54 -05:00
ret = ACT_P_CREATED ;
} else {
2017-08-30 02:31:59 -04:00
tcf_idr_release ( * a , bind ) ;
2017-10-11 17:16:08 -04:00
if ( ! ovr ) {
kfree ( p ) ;
2016-02-27 08:08:54 -05:00
return - EEXIST ;
2017-10-11 17:16:08 -04:00
}
2016-02-27 08:08:54 -05:00
}
2016-07-25 16:09:41 -07:00
ife = to_ife ( * a ) ;
2017-10-11 17:16:08 -04:00
p - > flags = parm - > flags ;
2016-02-27 08:08:54 -05:00
if ( parm - > flags & IFE_ENCODE ) {
2017-08-28 15:03:14 -04:00
if ( tb [ TCA_IFE_TYPE ] )
ife_type = nla_get_u16 ( tb [ TCA_IFE_TYPE ] ) ;
2016-02-27 08:08:54 -05:00
if ( tb [ TCA_IFE_DMAC ] )
daddr = nla_data ( tb [ TCA_IFE_DMAC ] ) ;
if ( tb [ TCA_IFE_SMAC ] )
saddr = nla_data ( tb [ TCA_IFE_SMAC ] ) ;
}
if ( parm - > flags & IFE_ENCODE ) {
if ( daddr )
2017-10-11 17:16:08 -04:00
ether_addr_copy ( p - > eth_dst , daddr ) ;
2016-02-27 08:08:54 -05:00
else
2017-10-11 17:16:08 -04:00
eth_zero_addr ( p - > eth_dst ) ;
2016-02-27 08:08:54 -05:00
if ( saddr )
2017-10-11 17:16:08 -04:00
ether_addr_copy ( p - > eth_src , saddr ) ;
2016-02-27 08:08:54 -05:00
else
2017-10-11 17:16:08 -04:00
eth_zero_addr ( p - > eth_src ) ;
2016-02-27 08:08:54 -05:00
2017-10-11 17:16:08 -04:00
p - > eth_type = ife_type ;
2016-02-27 08:08:54 -05:00
}
2017-10-11 17:16:08 -04:00
if ( exists )
spin_lock_bh ( & ife - > tcf_lock ) ;
2016-02-27 08:08:54 -05:00
if ( ret = = ACT_P_CREATED )
INIT_LIST_HEAD ( & ife - > metalist ) ;
if ( tb [ TCA_IFE_METALST ] ) {
err = nla_parse_nested ( tb2 , IFE_META_MAX , tb [ TCA_IFE_METALST ] ,
2017-04-12 14:34:07 +02:00
NULL , NULL ) ;
2016-02-27 08:08:54 -05:00
if ( err ) {
metadata_parse_err :
if ( ret = = ACT_P_CREATED )
net/sched: act_ife: fix recursive lock and idr leak
a recursive lock warning [1] can be observed with the following script,
# $TC actions add action ife encode allow prio pass index 42
IFE type 0xED3E
# $TC actions replace action ife encode allow tcindex pass index 42
in case the kernel was unable to run the last command (e.g. because of
the impossibility to load 'act_meta_skbtcindex'). For a similar reason,
the kernel can leak idr in the error path of tcf_ife_init(), because
tcf_idr_release() is not called after successful idr reservation:
# $TC actions add action ife encode allow tcindex index 47
IFE type 0xED3E
RTNETLINK answers: No such file or directory
We have an error talking to the kernel
# $TC actions add action ife encode allow tcindex index 47
IFE type 0xED3E
RTNETLINK answers: No space left on device
We have an error talking to the kernel
# $TC actions add action ife encode use mark 7 type 0xfefe pass index 47
IFE type 0xFEFE
RTNETLINK answers: No space left on device
We have an error talking to the kernel
Since tcfa_lock is already taken when the action is being edited, a call
to tcf_idr_release() wrongly makes tcf_idr_cleanup() take the same lock
again. On the other hand, tcf_idr_release() needs to be called in the
error path of tcf_ife_init(), to undo the last tcf_idr_create() invocation.
Fix both problems in tcf_ife_init().
Since the cleanup() routine can now be called when ife->params is NULL,
also add a NULL pointer check to avoid calling kfree_rcu(NULL, rcu).
[1]
============================================
WARNING: possible recursive locking detected
4.17.0-rc4.kasan+ #417 Tainted: G E
--------------------------------------------
tc/3932 is trying to acquire lock:
000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_cleanup+0x19/0x80 [act_ife]
but task is already holding lock:
000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&(&p->tcfa_lock)->rlock);
lock(&(&p->tcfa_lock)->rlock);
*** DEADLOCK ***
May be due to missing lock nesting notation
2 locks held by tc/3932:
#0: 000000007ca8e990 (rtnl_mutex){+.+.}, at: tcf_ife_init+0xf61/0x13c0 [act_ife]
#1: 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife]
stack backtrace:
CPU: 3 PID: 3932 Comm: tc Tainted: G E 4.17.0-rc4.kasan+ #417
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
Call Trace:
dump_stack+0x9a/0xeb
__lock_acquire+0xf43/0x34a0
? debug_check_no_locks_freed+0x2b0/0x2b0
? debug_check_no_locks_freed+0x2b0/0x2b0
? debug_check_no_locks_freed+0x2b0/0x2b0
? __mutex_lock+0x62f/0x1240
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
? lock_acquire+0x10b/0x330
lock_acquire+0x10b/0x330
? tcf_ife_cleanup+0x19/0x80 [act_ife]
_raw_spin_lock_bh+0x38/0x70
? tcf_ife_cleanup+0x19/0x80 [act_ife]
tcf_ife_cleanup+0x19/0x80 [act_ife]
__tcf_idr_release+0xff/0x350
tcf_ife_init+0xdde/0x13c0 [act_ife]
? ife_exit_net+0x290/0x290 [act_ife]
? __lock_is_held+0xb4/0x140
tcf_action_init_1+0x67b/0xad0
? tcf_action_dump_old+0xa0/0xa0
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? memset+0x1f/0x40
tcf_action_init+0x30f/0x590
? tcf_action_init_1+0xad0/0xad0
? memset+0x1f/0x40
tc_ctl_action+0x48e/0x5e0
? mutex_lock_io_nested+0x1160/0x1160
? tca_action_gd+0x990/0x990
? sched_clock+0x5/0x10
? find_held_lock+0x39/0x1d0
rtnetlink_rcv_msg+0x4da/0x990
? validate_linkmsg+0x680/0x680
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
netlink_rcv_skb+0x127/0x350
? validate_linkmsg+0x680/0x680
? netlink_ack+0x970/0x970
? __kmalloc_node_track_caller+0x304/0x3a0
netlink_unicast+0x40f/0x5d0
? netlink_attachskb+0x580/0x580
? _copy_from_iter_full+0x187/0x760
? import_iovec+0x90/0x390
netlink_sendmsg+0x67f/0xb50
? netlink_unicast+0x5d0/0x5d0
? copy_msghdr_from_user+0x206/0x340
? netlink_unicast+0x5d0/0x5d0
sock_sendmsg+0xb3/0xf0
___sys_sendmsg+0x60a/0x8b0
? copy_msghdr_from_user+0x340/0x340
? lock_downgrade+0x5e0/0x5e0
? tty_write_lock+0x18/0x50
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
? lock_downgrade+0x5e0/0x5e0
? lock_acquire+0x10b/0x330
? __audit_syscall_entry+0x316/0x690
? current_kernel_time64+0x6b/0xd0
? __fget_light+0x55/0x1f0
? __sys_sendmsg+0xd2/0x170
__sys_sendmsg+0xd2/0x170
? __ia32_sys_shutdown+0x70/0x70
? syscall_trace_enter+0x57a/0xd60
? rcu_read_lock_sched_held+0xdc/0x110
? __bpf_trace_sys_enter+0x10/0x10
? do_syscall_64+0x22/0x480
do_syscall_64+0xa5/0x480
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7fd646988ba0
RSP: 002b:00007fffc9fab3c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007fffc9fab4f0 RCX: 00007fd646988ba0
RDX: 0000000000000000 RSI: 00007fffc9fab440 RDI: 0000000000000003
RBP: 000000005b28c8b3 R08: 0000000000000002 R09: 0000000000000000
R10: 00007fffc9faae20 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fffc9fab504 R14: 0000000000000001 R15: 000000000066c100
Fixes: 4e8c86155010 ("net sched: net sched: ife action fix late binding")
Fixes: ef6980b6becb ("introduce IFE action")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-06-19 15:39:46 +02:00
tcf_idr_release ( * a , bind ) ;
2016-02-27 08:08:54 -05:00
2016-06-20 13:37:18 -07:00
if ( exists )
spin_unlock_bh ( & ife - > tcf_lock ) ;
2017-10-11 17:16:08 -04:00
kfree ( p ) ;
2016-02-27 08:08:54 -05:00
return err ;
}
2016-06-20 13:37:18 -07:00
err = populate_metalist ( ife , tb2 , exists ) ;
2016-02-27 08:08:54 -05:00
if ( err )
goto metadata_parse_err ;
} else {
/* if no passed metadata allow list or passed allow-all
* then here we process by adding as many supported metadatum
* as we can . You better have at least one else we are
* going to bail out
*/
err = use_all_metadata ( ife ) ;
if ( err ) {
if ( ret = = ACT_P_CREATED )
net/sched: act_ife: fix recursive lock and idr leak
a recursive lock warning [1] can be observed with the following script,
# $TC actions add action ife encode allow prio pass index 42
IFE type 0xED3E
# $TC actions replace action ife encode allow tcindex pass index 42
in case the kernel was unable to run the last command (e.g. because of
the impossibility to load 'act_meta_skbtcindex'). For a similar reason,
the kernel can leak idr in the error path of tcf_ife_init(), because
tcf_idr_release() is not called after successful idr reservation:
# $TC actions add action ife encode allow tcindex index 47
IFE type 0xED3E
RTNETLINK answers: No such file or directory
We have an error talking to the kernel
# $TC actions add action ife encode allow tcindex index 47
IFE type 0xED3E
RTNETLINK answers: No space left on device
We have an error talking to the kernel
# $TC actions add action ife encode use mark 7 type 0xfefe pass index 47
IFE type 0xFEFE
RTNETLINK answers: No space left on device
We have an error talking to the kernel
Since tcfa_lock is already taken when the action is being edited, a call
to tcf_idr_release() wrongly makes tcf_idr_cleanup() take the same lock
again. On the other hand, tcf_idr_release() needs to be called in the
error path of tcf_ife_init(), to undo the last tcf_idr_create() invocation.
Fix both problems in tcf_ife_init().
Since the cleanup() routine can now be called when ife->params is NULL,
also add a NULL pointer check to avoid calling kfree_rcu(NULL, rcu).
[1]
============================================
WARNING: possible recursive locking detected
4.17.0-rc4.kasan+ #417 Tainted: G E
--------------------------------------------
tc/3932 is trying to acquire lock:
000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_cleanup+0x19/0x80 [act_ife]
but task is already holding lock:
000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&(&p->tcfa_lock)->rlock);
lock(&(&p->tcfa_lock)->rlock);
*** DEADLOCK ***
May be due to missing lock nesting notation
2 locks held by tc/3932:
#0: 000000007ca8e990 (rtnl_mutex){+.+.}, at: tcf_ife_init+0xf61/0x13c0 [act_ife]
#1: 000000005097c9a6 (&(&p->tcfa_lock)->rlock){+...}, at: tcf_ife_init+0xf6d/0x13c0 [act_ife]
stack backtrace:
CPU: 3 PID: 3932 Comm: tc Tainted: G E 4.17.0-rc4.kasan+ #417
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
Call Trace:
dump_stack+0x9a/0xeb
__lock_acquire+0xf43/0x34a0
? debug_check_no_locks_freed+0x2b0/0x2b0
? debug_check_no_locks_freed+0x2b0/0x2b0
? debug_check_no_locks_freed+0x2b0/0x2b0
? __mutex_lock+0x62f/0x1240
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
? lock_acquire+0x10b/0x330
lock_acquire+0x10b/0x330
? tcf_ife_cleanup+0x19/0x80 [act_ife]
_raw_spin_lock_bh+0x38/0x70
? tcf_ife_cleanup+0x19/0x80 [act_ife]
tcf_ife_cleanup+0x19/0x80 [act_ife]
__tcf_idr_release+0xff/0x350
tcf_ife_init+0xdde/0x13c0 [act_ife]
? ife_exit_net+0x290/0x290 [act_ife]
? __lock_is_held+0xb4/0x140
tcf_action_init_1+0x67b/0xad0
? tcf_action_dump_old+0xa0/0xa0
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? memset+0x1f/0x40
tcf_action_init+0x30f/0x590
? tcf_action_init_1+0xad0/0xad0
? memset+0x1f/0x40
tc_ctl_action+0x48e/0x5e0
? mutex_lock_io_nested+0x1160/0x1160
? tca_action_gd+0x990/0x990
? sched_clock+0x5/0x10
? find_held_lock+0x39/0x1d0
rtnetlink_rcv_msg+0x4da/0x990
? validate_linkmsg+0x680/0x680
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
netlink_rcv_skb+0x127/0x350
? validate_linkmsg+0x680/0x680
? netlink_ack+0x970/0x970
? __kmalloc_node_track_caller+0x304/0x3a0
netlink_unicast+0x40f/0x5d0
? netlink_attachskb+0x580/0x580
? _copy_from_iter_full+0x187/0x760
? import_iovec+0x90/0x390
netlink_sendmsg+0x67f/0xb50
? netlink_unicast+0x5d0/0x5d0
? copy_msghdr_from_user+0x206/0x340
? netlink_unicast+0x5d0/0x5d0
sock_sendmsg+0xb3/0xf0
___sys_sendmsg+0x60a/0x8b0
? copy_msghdr_from_user+0x340/0x340
? lock_downgrade+0x5e0/0x5e0
? tty_write_lock+0x18/0x50
? kvm_sched_clock_read+0x1a/0x30
? sched_clock+0x5/0x10
? sched_clock_cpu+0x18/0x170
? find_held_lock+0x39/0x1d0
? lock_downgrade+0x5e0/0x5e0
? lock_acquire+0x10b/0x330
? __audit_syscall_entry+0x316/0x690
? current_kernel_time64+0x6b/0xd0
? __fget_light+0x55/0x1f0
? __sys_sendmsg+0xd2/0x170
__sys_sendmsg+0xd2/0x170
? __ia32_sys_shutdown+0x70/0x70
? syscall_trace_enter+0x57a/0xd60
? rcu_read_lock_sched_held+0xdc/0x110
? __bpf_trace_sys_enter+0x10/0x10
? do_syscall_64+0x22/0x480
do_syscall_64+0xa5/0x480
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7fd646988ba0
RSP: 002b:00007fffc9fab3c8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007fffc9fab4f0 RCX: 00007fd646988ba0
RDX: 0000000000000000 RSI: 00007fffc9fab440 RDI: 0000000000000003
RBP: 000000005b28c8b3 R08: 0000000000000002 R09: 0000000000000000
R10: 00007fffc9faae20 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fffc9fab504 R14: 0000000000000001 R15: 000000000066c100
Fixes: 4e8c86155010 ("net sched: net sched: ife action fix late binding")
Fixes: ef6980b6becb ("introduce IFE action")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2018-06-19 15:39:46 +02:00
tcf_idr_release ( * a , bind ) ;
2016-02-27 08:08:54 -05:00
2016-06-20 13:37:18 -07:00
if ( exists )
spin_unlock_bh ( & ife - > tcf_lock ) ;
2017-10-11 17:16:08 -04:00
kfree ( p ) ;
2016-02-27 08:08:54 -05:00
return err ;
}
}
2018-06-19 15:45:50 +02:00
ife - > tcf_action = parm - > action ;
2016-06-20 13:37:18 -07:00
if ( exists )
spin_unlock_bh ( & ife - > tcf_lock ) ;
2016-02-27 08:08:54 -05:00
2017-10-11 17:16:08 -04:00
p_old = rtnl_dereference ( ife - > params ) ;
rcu_assign_pointer ( ife - > params , p ) ;
if ( p_old )
kfree_rcu ( p_old , rcu ) ;
2016-02-27 08:08:54 -05:00
if ( ret = = ACT_P_CREATED )
2017-08-30 02:31:59 -04:00
tcf_idr_insert ( tn , * a ) ;
2016-02-27 08:08:54 -05:00
return ret ;
}
static int tcf_ife_dump ( struct sk_buff * skb , struct tc_action * a , int bind ,
int ref )
{
unsigned char * b = skb_tail_pointer ( skb ) ;
2016-07-25 16:09:41 -07:00
struct tcf_ife_info * ife = to_ife ( a ) ;
2017-10-11 17:16:08 -04:00
struct tcf_ife_params * p = rtnl_dereference ( ife - > params ) ;
2016-02-27 08:08:54 -05:00
struct tc_ife opt = {
. index = ife - > tcf_index ,
2018-07-05 17:24:24 +03:00
. refcnt = refcount_read ( & ife - > tcf_refcnt ) - ref ,
. bindcnt = atomic_read ( & ife - > tcf_bindcnt ) - bind ,
2016-02-27 08:08:54 -05:00
. action = ife - > tcf_action ,
2017-10-11 17:16:08 -04:00
. flags = p - > flags ,
2016-02-27 08:08:54 -05:00
} ;
struct tcf_t t ;
if ( nla_put ( skb , TCA_IFE_PARMS , sizeof ( opt ) , & opt ) )
goto nla_put_failure ;
2016-06-06 06:32:55 -04:00
tcf_tm_dump ( & t , & ife - > tcf_tm ) ;
2016-04-26 10:06:18 +02:00
if ( nla_put_64bit ( skb , TCA_IFE_TM , sizeof ( t ) , & t , TCA_IFE_PAD ) )
2016-02-27 08:08:54 -05:00
goto nla_put_failure ;
2017-10-11 17:16:08 -04:00
if ( ! is_zero_ether_addr ( p - > eth_dst ) ) {
if ( nla_put ( skb , TCA_IFE_DMAC , ETH_ALEN , p - > eth_dst ) )
2016-02-27 08:08:54 -05:00
goto nla_put_failure ;
}
2017-10-11 17:16:08 -04:00
if ( ! is_zero_ether_addr ( p - > eth_src ) ) {
if ( nla_put ( skb , TCA_IFE_SMAC , ETH_ALEN , p - > eth_src ) )
2016-02-27 08:08:54 -05:00
goto nla_put_failure ;
}
2017-10-11 17:16:08 -04:00
if ( nla_put ( skb , TCA_IFE_TYPE , 2 , & p - > eth_type ) )
2016-02-27 08:08:54 -05:00
goto nla_put_failure ;
if ( dump_metalist ( skb , ife ) ) {
/*ignore failure to dump metalist */
pr_info ( " Failed to dump metalist \n " ) ;
}
return skb - > len ;
nla_put_failure :
nlmsg_trim ( skb , b ) ;
return - 1 ;
}
2017-03-16 12:53:41 +02:00
static int find_decode_metaid ( struct sk_buff * skb , struct tcf_ife_info * ife ,
u16 metaid , u16 mlen , void * mdata )
2016-02-27 08:08:54 -05:00
{
struct tcf_meta_info * e ;
/* XXX: use hash to speed up */
list_for_each_entry ( e , & ife - > metalist , metalist ) {
if ( metaid = = e - > metaid ) {
if ( e - > ops ) {
/* We check for decode presence already */
return e - > ops - > decode ( skb , mdata , mlen ) ;
}
}
}
2018-04-20 15:15:03 -04:00
return - ENOENT ;
2016-02-27 08:08:54 -05:00
}
static int tcf_ife_decode ( struct sk_buff * skb , const struct tc_action * a ,
struct tcf_result * res )
{
2016-07-25 16:09:41 -07:00
struct tcf_ife_info * ife = to_ife ( a ) ;
2016-02-27 08:08:54 -05:00
int action = ife - > tcf_action ;
2017-02-01 15:30:03 +02:00
u8 * ifehdr_end ;
u8 * tlv_data ;
u16 metalen ;
2016-02-27 08:08:54 -05:00
2017-10-11 17:16:07 -04:00
bstats_cpu_update ( this_cpu_ptr ( ife - > common . cpu_bstats ) , skb ) ;
2016-06-06 06:32:53 -04:00
tcf_lastuse_update ( & ife - > tcf_tm ) ;
2016-02-27 08:08:54 -05:00
2017-02-01 15:30:03 +02:00
if ( skb_at_tc_ingress ( skb ) )
skb_push ( skb , skb - > dev - > hard_header_len ) ;
tlv_data = ife_decode ( skb , & metalen ) ;
if ( unlikely ( ! tlv_data ) ) {
2017-10-11 17:16:07 -04:00
qstats_drop_inc ( this_cpu_ptr ( ife - > common . cpu_qstats ) ) ;
2016-02-27 08:08:54 -05:00
return TC_ACT_SHOT ;
}
2017-02-01 15:30:03 +02:00
ifehdr_end = tlv_data + metalen ;
for ( ; tlv_data < ifehdr_end ; tlv_data = ife_tlv_meta_next ( tlv_data ) ) {
u8 * curr_data ;
u16 mtype ;
u16 dlen ;
2016-02-27 08:08:54 -05:00
2018-04-20 15:15:04 -04:00
curr_data = ife_tlv_meta_decode ( tlv_data , ifehdr_end , & mtype ,
& dlen , NULL ) ;
if ( ! curr_data ) {
qstats_drop_inc ( this_cpu_ptr ( ife - > common . cpu_qstats ) ) ;
return TC_ACT_SHOT ;
}
2016-02-27 08:08:54 -05:00
2017-02-01 15:30:03 +02:00
if ( find_decode_metaid ( skb , ife , mtype , dlen , curr_data ) ) {
2016-02-27 08:08:54 -05:00
/* abuse overlimits to count when we receive metadata
* but dont have an ops for it
*/
2017-02-01 15:30:03 +02:00
pr_info_ratelimited ( " Unknown metaid %d dlen %d \n " ,
mtype , dlen ) ;
2017-10-11 17:16:07 -04:00
qstats_overlimit_inc ( this_cpu_ptr ( ife - > common . cpu_qstats ) ) ;
2016-02-27 08:08:54 -05:00
}
2017-02-01 15:30:03 +02:00
}
2016-02-27 08:08:54 -05:00
2017-02-01 15:30:03 +02:00
if ( WARN_ON ( tlv_data ! = ifehdr_end ) ) {
2017-10-11 17:16:07 -04:00
qstats_drop_inc ( this_cpu_ptr ( ife - > common . cpu_qstats ) ) ;
2017-02-01 15:30:03 +02:00
return TC_ACT_SHOT ;
2016-02-27 08:08:54 -05:00
}
2017-02-01 15:30:03 +02:00
skb - > protocol = eth_type_trans ( skb , skb - > dev ) ;
2016-02-27 08:08:54 -05:00
skb_reset_network_header ( skb ) ;
2017-02-01 15:30:03 +02:00
2016-02-27 08:08:54 -05:00
return action ;
}
/*XXX: check if we can do this at install time instead of current
* send data path
* */
static int ife_get_sz ( struct sk_buff * skb , struct tcf_ife_info * ife )
{
struct tcf_meta_info * e , * n ;
int tot_run_sz = 0 , run_sz = 0 ;
list_for_each_entry_safe ( e , n , & ife - > metalist , metalist ) {
if ( e - > ops - > check_presence ) {
run_sz = e - > ops - > check_presence ( skb , e ) ;
tot_run_sz + = run_sz ;
}
}
return tot_run_sz ;
}
static int tcf_ife_encode ( struct sk_buff * skb , const struct tc_action * a ,
2017-10-11 17:16:08 -04:00
struct tcf_result * res , struct tcf_ife_params * p )
2016-02-27 08:08:54 -05:00
{
2016-07-25 16:09:41 -07:00
struct tcf_ife_info * ife = to_ife ( a ) ;
2016-02-27 08:08:54 -05:00
int action = ife - > tcf_action ;
struct ethhdr * oethh ; /* outer ether header */
struct tcf_meta_info * e ;
/*
OUTERHDR : TOTMETALEN : { TLVHDR : Metadatum : TLVHDR . . } : ORIGDATA
where ORIGDATA = original ethernet header . . .
*/
u16 metalen = ife_get_sz ( skb , ife ) ;
int hdrm = metalen + skb - > dev - > hard_header_len + IFE_METAHDRLEN ;
2017-02-01 15:30:03 +02:00
unsigned int skboff = 0 ;
2016-02-27 08:08:54 -05:00
int new_len = skb - > len + hdrm ;
bool exceed_mtu = false ;
2017-02-01 15:30:03 +02:00
void * ife_meta ;
int err = 0 ;
2016-02-27 08:08:54 -05:00
2017-01-07 17:06:36 -05:00
if ( ! skb_at_tc_ingress ( skb ) ) {
2016-02-27 08:08:54 -05:00
if ( new_len > skb - > dev - > mtu )
exceed_mtu = true ;
}
2017-10-11 17:16:07 -04:00
bstats_cpu_update ( this_cpu_ptr ( ife - > common . cpu_bstats ) , skb ) ;
2016-06-06 06:32:53 -04:00
tcf_lastuse_update ( & ife - > tcf_tm ) ;
2016-02-27 08:08:54 -05:00
if ( ! metalen ) { /* no metadata to send */
/* abuse overlimits to count when we allow packet
* with no metadata
*/
2017-10-11 17:16:07 -04:00
qstats_overlimit_inc ( this_cpu_ptr ( ife - > common . cpu_qstats ) ) ;
2016-02-27 08:08:54 -05:00
return action ;
}
/* could be stupid policy setup or mtu config
* so lets be conservative . . */
if ( ( action = = TC_ACT_SHOT ) | | exceed_mtu ) {
2017-10-11 17:16:07 -04:00
qstats_drop_inc ( this_cpu_ptr ( ife - > common . cpu_qstats ) ) ;
2016-02-27 08:08:54 -05:00
return TC_ACT_SHOT ;
}
2017-01-07 17:06:36 -05:00
if ( skb_at_tc_ingress ( skb ) )
2016-02-27 08:08:54 -05:00
skb_push ( skb , skb - > dev - > hard_header_len ) ;
2017-02-01 15:30:03 +02:00
ife_meta = ife_encode ( skb , metalen ) ;
2016-02-27 08:08:54 -05:00
2017-10-11 17:16:07 -04:00
spin_lock ( & ife - > tcf_lock ) ;
2016-02-27 08:08:54 -05:00
/* XXX: we dont have a clever way of telling encode to
* not repeat some of the computations that are done by
* ops - > presence_check . . .
*/
list_for_each_entry ( e , & ife - > metalist , metalist ) {
if ( e - > ops - > encode ) {
2017-02-01 15:30:03 +02:00
err = e - > ops - > encode ( skb , ( void * ) ( ife_meta + skboff ) ,
2016-02-27 08:08:54 -05:00
e ) ;
}
if ( err < 0 ) {
/* too corrupt to keep around if overwritten */
spin_unlock ( & ife - > tcf_lock ) ;
2017-10-11 17:16:07 -04:00
qstats_drop_inc ( this_cpu_ptr ( ife - > common . cpu_qstats ) ) ;
2016-02-27 08:08:54 -05:00
return TC_ACT_SHOT ;
}
skboff + = err ;
}
2017-10-11 17:16:08 -04:00
spin_unlock ( & ife - > tcf_lock ) ;
2017-02-01 15:30:03 +02:00
oethh = ( struct ethhdr * ) skb - > data ;
2016-02-27 08:08:54 -05:00
2017-10-11 17:16:08 -04:00
if ( ! is_zero_ether_addr ( p - > eth_src ) )
ether_addr_copy ( oethh - > h_source , p - > eth_src ) ;
if ( ! is_zero_ether_addr ( p - > eth_dst ) )
ether_addr_copy ( oethh - > h_dest , p - > eth_dst ) ;
oethh - > h_proto = htons ( p - > eth_type ) ;
2016-02-27 08:08:54 -05:00
2017-01-07 17:06:36 -05:00
if ( skb_at_tc_ingress ( skb ) )
2016-02-27 08:08:54 -05:00
skb_pull ( skb , skb - > dev - > hard_header_len ) ;
return action ;
}
static int tcf_ife_act ( struct sk_buff * skb , const struct tc_action * a ,
struct tcf_result * res )
{
2016-07-25 16:09:41 -07:00
struct tcf_ife_info * ife = to_ife ( a ) ;
2017-10-11 17:16:08 -04:00
struct tcf_ife_params * p ;
int ret ;
rcu_read_lock ( ) ;
p = rcu_dereference ( ife - > params ) ;
if ( p - > flags & IFE_ENCODE ) {
ret = tcf_ife_encode ( skb , a , res , p ) ;
rcu_read_unlock ( ) ;
return ret ;
}
rcu_read_unlock ( ) ;
2016-02-27 08:08:54 -05:00
2017-10-11 17:16:06 -04:00
return tcf_ife_decode ( skb , a , res ) ;
2016-02-27 08:08:54 -05:00
}
static int tcf_ife_walker ( struct net * net , struct sk_buff * skb ,
struct netlink_callback * cb , int type ,
2018-02-15 10:54:58 -05:00
const struct tc_action_ops * ops ,
struct netlink_ext_ack * extack )
2016-02-27 08:08:54 -05:00
{
struct tc_action_net * tn = net_generic ( net , ife_net_id ) ;
2018-02-15 10:54:59 -05:00
return tcf_generic_walker ( tn , skb , cb , type , ops , extack ) ;
2016-02-27 08:08:54 -05:00
}
2018-02-15 10:54:57 -05:00
static int tcf_ife_search ( struct net * net , struct tc_action * * a , u32 index ,
struct netlink_ext_ack * extack )
2016-02-27 08:08:54 -05:00
{
struct tc_action_net * tn = net_generic ( net , ife_net_id ) ;
2017-08-30 02:31:59 -04:00
return tcf_idr_search ( tn , a , index ) ;
2016-02-27 08:08:54 -05:00
}
static struct tc_action_ops act_ife_ops = {
. kind = " ife " ,
. type = TCA_ACT_IFE ,
. owner = THIS_MODULE ,
. act = tcf_ife_act ,
. dump = tcf_ife_dump ,
. cleanup = tcf_ife_cleanup ,
. init = tcf_ife_init ,
. walk = tcf_ife_walker ,
. lookup = tcf_ife_search ,
2016-07-25 16:09:41 -07:00
. size = sizeof ( struct tcf_ife_info ) ,
2016-02-27 08:08:54 -05:00
} ;
static __net_init int ife_init_net ( struct net * net )
{
struct tc_action_net * tn = net_generic ( net , ife_net_id ) ;
2017-11-06 13:47:18 -08:00
return tc_action_net_init ( tn , & act_ife_ops ) ;
2016-02-27 08:08:54 -05:00
}
2017-12-11 15:35:03 -08:00
static void __net_exit ife_exit_net ( struct list_head * net_list )
2016-02-27 08:08:54 -05:00
{
2017-12-11 15:35:03 -08:00
tc_action_net_exit ( net_list , ife_net_id ) ;
2016-02-27 08:08:54 -05:00
}
static struct pernet_operations ife_net_ops = {
. init = ife_init_net ,
2017-12-11 15:35:03 -08:00
. exit_batch = ife_exit_net ,
2016-02-27 08:08:54 -05:00
. id = & ife_net_id ,
. size = sizeof ( struct tc_action_net ) ,
} ;
static int __init ife_init_module ( void )
{
return tcf_register_action ( & act_ife_ops , & ife_net_ops ) ;
}
static void __exit ife_cleanup_module ( void )
{
tcf_unregister_action ( & act_ife_ops , & ife_net_ops ) ;
}
module_init ( ife_init_module ) ;
module_exit ( ife_cleanup_module ) ;
MODULE_AUTHOR ( " Jamal Hadi Salim(2015) " ) ;
MODULE_DESCRIPTION ( " Inter-FE LFB action " ) ;
MODULE_LICENSE ( " GPL " ) ;