2005-04-17 02:20:36 +04:00
/*
* net / sched / cls_api . c Packet classifier API .
*
* This program is free software ; you can redistribute it and / or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation ; either version
* 2 of the License , or ( at your option ) any later version .
*
* Authors : Alexey Kuznetsov , < kuznet @ ms2 . inr . ac . ru >
*
* Changes :
*
* Eduardo J . Blanco < ejbs @ netlabs . com . uy > : 990222 : kmod support
*
*/
# include <linux/module.h>
# include <linux/types.h>
# include <linux/kernel.h>
# include <linux/string.h>
# include <linux/errno.h>
2017-02-09 16:38:57 +03:00
# include <linux/err.h>
2005-04-17 02:20:36 +04:00
# include <linux/skbuff.h>
# include <linux/init.h>
# include <linux/kmod.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h
percpu.h is included by sched.h and module.h and thus ends up being
included when building most .c files. percpu.h includes slab.h which
in turn includes gfp.h making everything defined by the two files
universally available and complicating inclusion dependencies.
percpu.h -> slab.h dependency is about to be removed. Prepare for
this change by updating users of gfp and slab facilities include those
headers directly instead of assuming availability. As this conversion
needs to touch large number of source files, the following script is
used as the basis of conversion.
http://userweb.kernel.org/~tj/misc/slabh-sweep.py
The script does the followings.
* Scan files for gfp and slab usages and update includes such that
only the necessary includes are there. ie. if only gfp is used,
gfp.h, if slab is used, slab.h.
* When the script inserts a new include, it looks at the include
blocks and try to put the new include such that its order conforms
to its surrounding. It's put in the include block which contains
core kernel includes, in the same order that the rest are ordered -
alphabetical, Christmas tree, rev-Xmas-tree or at the end if there
doesn't seem to be any matching order.
* If the script can't find a place to put a new include (mostly
because the file doesn't have fitting include block), it prints out
an error message indicating which .h file needs to be added to the
file.
The conversion was done in the following steps.
1. The initial automatic conversion of all .c files updated slightly
over 4000 files, deleting around 700 includes and adding ~480 gfp.h
and ~3000 slab.h inclusions. The script emitted errors for ~400
files.
2. Each error was manually checked. Some didn't need the inclusion,
some needed manual addition while adding it to implementation .h or
embedding .c file was more appropriate for others. This step added
inclusions to around 150 files.
3. The script was run again and the output was compared to the edits
from #2 to make sure no file was left behind.
4. Several build tests were done and a couple of problems were fixed.
e.g. lib/decompress_*.c used malloc/free() wrappers around slab
APIs requiring slab.h to be added manually.
5. The script was run on all .h files but without automatically
editing them as sprinkling gfp.h and slab.h inclusions around .h
files could easily lead to inclusion dependency hell. Most gfp.h
inclusion directives were ignored as stuff from gfp.h was usually
wildly available and often used in preprocessor macros. Each
slab.h inclusion directive was examined and added manually as
necessary.
6. percpu.h was updated not to include slab.h.
7. Build test were done on the following configurations and failures
were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my
distributed build env didn't work with gcov compiles) and a few
more options had to be turned off depending on archs to make things
build (like ipr on powerpc/64 which failed due to missing writeq).
* x86 and x86_64 UP and SMP allmodconfig and a custom test config.
* powerpc and powerpc64 SMP allmodconfig
* sparc and sparc64 SMP allmodconfig
* ia64 SMP allmodconfig
* s390 SMP allmodconfig
* alpha SMP allmodconfig
* um on x86_64 SMP allmodconfig
8. percpu.h modifications were reverted so that it could be applied as
a separate patch and serve as bisection point.
Given the fact that I had only a couple of failures from tests on step
6, I'm fairly confident about the coverage of this conversion patch.
If there is a breakage, it's likely to be something in one of the arch
headers which should be easily discoverable easily on most builds of
the specific arch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 11:04:11 +03:00
# include <linux/slab.h>
2018-01-17 13:46:46 +03:00
# include <linux/idr.h>
2007-11-30 16:21:31 +03:00
# include <net/net_namespace.h>
# include <net/sock.h>
2007-03-26 10:06:12 +04:00
# include <net/netlink.h>
2005-04-17 02:20:36 +04:00
# include <net/pkt_sched.h>
# include <net/pkt_cls.h>
2018-10-10 23:00:58 +03:00
extern const struct nla_policy rtm_tca_policy [ TCA_MAX + 1 ] ;
2005-04-17 02:20:36 +04:00
/* The list of all installed classifier types */
2013-12-16 08:15:11 +04:00
static LIST_HEAD ( tcf_proto_base ) ;
2005-04-17 02:20:36 +04:00
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK ( cls_mod_lock ) ;
/* Find classifier type by string name */
2018-07-23 10:23:04 +03:00
static const struct tcf_proto_ops * __tcf_proto_lookup_ops ( const char * kind )
2005-04-17 02:20:36 +04:00
{
2013-12-20 22:04:18 +04:00
const struct tcf_proto_ops * t , * res = NULL ;
2005-04-17 02:20:36 +04:00
if ( kind ) {
read_lock ( & cls_mod_lock ) ;
2013-12-16 08:15:11 +04:00
list_for_each_entry ( t , & tcf_proto_base , head ) {
2017-02-09 16:38:57 +03:00
if ( strcmp ( kind , t - > kind ) = = 0 ) {
2013-12-20 22:04:18 +04:00
if ( try_module_get ( t - > owner ) )
res = t ;
2005-04-17 02:20:36 +04:00
break ;
}
}
read_unlock ( & cls_mod_lock ) ;
}
2013-12-20 22:04:18 +04:00
return res ;
2005-04-17 02:20:36 +04:00
}
2018-07-23 10:23:04 +03:00
static const struct tcf_proto_ops *
tcf_proto_lookup_ops ( const char * kind , struct netlink_ext_ack * extack )
{
const struct tcf_proto_ops * ops ;
ops = __tcf_proto_lookup_ops ( kind ) ;
if ( ops )
return ops ;
# ifdef CONFIG_MODULES
rtnl_unlock ( ) ;
request_module ( " cls_%s " , kind ) ;
rtnl_lock ( ) ;
ops = __tcf_proto_lookup_ops ( kind ) ;
/* We dropped the RTNL semaphore in order to perform
* the module load . So , even if we succeeded in loading
* the module we have to replay the request . We indicate
* this using - EAGAIN .
*/
if ( ops ) {
module_put ( ops - > owner ) ;
return ERR_PTR ( - EAGAIN ) ;
}
# endif
NL_SET_ERR_MSG ( extack , " TC classifier not found " ) ;
return ERR_PTR ( - ENOENT ) ;
}
2005-04-17 02:20:36 +04:00
/* Register(unregister) new classifier type */
int register_tcf_proto_ops ( struct tcf_proto_ops * ops )
{
2013-12-16 08:15:11 +04:00
struct tcf_proto_ops * t ;
2005-04-17 02:20:36 +04:00
int rc = - EEXIST ;
write_lock ( & cls_mod_lock ) ;
2013-12-16 08:15:11 +04:00
list_for_each_entry ( t , & tcf_proto_base , head )
2005-04-17 02:20:36 +04:00
if ( ! strcmp ( ops - > kind , t - > kind ) )
goto out ;
2013-12-16 08:15:11 +04:00
list_add_tail ( & ops - > head , & tcf_proto_base ) ;
2005-04-17 02:20:36 +04:00
rc = 0 ;
out :
write_unlock ( & cls_mod_lock ) ;
return rc ;
}
2008-01-21 13:26:41 +03:00
EXPORT_SYMBOL ( register_tcf_proto_ops ) ;
2005-04-17 02:20:36 +04:00
2017-10-27 04:24:28 +03:00
static struct workqueue_struct * tc_filter_wq ;
2005-04-17 02:20:36 +04:00
int unregister_tcf_proto_ops ( struct tcf_proto_ops * ops )
{
2013-12-16 08:15:11 +04:00
struct tcf_proto_ops * t ;
2005-04-17 02:20:36 +04:00
int rc = - ENOENT ;
net: sched: fix call_rcu() race on classifier module unloads
Vijay reported that a loop as simple as ...
while true; do
tc qdisc add dev foo root handle 1: prio
tc filter add dev foo parent 1: u32 match u32 0 0 flowid 1
tc qdisc del dev foo root
rmmod cls_u32
done
... will panic the kernel. Moreover, he bisected the change
apparently introducing it to 78fd1d0ab072 ("netlink: Re-add
locking to netlink_lookup() and seq walker").
The removal of synchronize_net() from the netlink socket
triggering the qdisc to be removed, seems to have uncovered
an RCU resp. module reference count race from the tc API.
Given that RCU conversion was done after e341694e3eb5 ("netlink:
Convert netlink_lookup() to use RCU protected hash table")
which added the synchronize_net() originally, occasion of
hitting the bug was less likely (not impossible though):
When qdiscs that i) support attaching classifiers and,
ii) have at least one of them attached, get deleted, they
invoke tcf_destroy_chain(), and thus call into ->destroy()
handler from a classifier module.
After RCU conversion, all classifier that have an internal
prio list, unlink them and initiate freeing via call_rcu()
deferral.
Meanhile, tcf_destroy() releases already reference to the
tp->ops->owner module before the queued RCU callback handler
has been invoked.
Subsequent rmmod on the classifier module is then not prevented
since all module references are already dropped.
By the time, the kernel invokes the RCU callback handler from
the module, that function address is then invalid.
One way to fix it would be to add an rcu_barrier() to
unregister_tcf_proto_ops() to wait for all pending call_rcu()s
to complete.
synchronize_rcu() is not appropriate as under heavy RCU
callback load, registered call_rcu()s could be deferred
longer than a grace period. In case we don't have any pending
call_rcu()s, the barrier is allowed to return immediately.
Since we came here via unregister_tcf_proto_ops(), there
are no users of a given classifier anymore. Further nested
call_rcu()s pointing into the module space are not being
done anywhere.
Only cls_bpf_delete_prog() may schedule a work item, to
unlock pages eventually, but that is not in the range/context
of cls_bpf anymore.
Fixes: 25d8c0d55f24 ("net: rcu-ify tcf_proto")
Fixes: 9888faefe132 ("net: sched: cls_basic use RCU")
Reported-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Tested-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-20 18:13:33 +03:00
/* Wait for outstanding call_rcu()s, if any, from a
* tcf_proto_ops ' s destroy ( ) handler .
*/
rcu_barrier ( ) ;
2017-10-27 04:24:28 +03:00
flush_workqueue ( tc_filter_wq ) ;
net: sched: fix call_rcu() race on classifier module unloads
Vijay reported that a loop as simple as ...
while true; do
tc qdisc add dev foo root handle 1: prio
tc filter add dev foo parent 1: u32 match u32 0 0 flowid 1
tc qdisc del dev foo root
rmmod cls_u32
done
... will panic the kernel. Moreover, he bisected the change
apparently introducing it to 78fd1d0ab072 ("netlink: Re-add
locking to netlink_lookup() and seq walker").
The removal of synchronize_net() from the netlink socket
triggering the qdisc to be removed, seems to have uncovered
an RCU resp. module reference count race from the tc API.
Given that RCU conversion was done after e341694e3eb5 ("netlink:
Convert netlink_lookup() to use RCU protected hash table")
which added the synchronize_net() originally, occasion of
hitting the bug was less likely (not impossible though):
When qdiscs that i) support attaching classifiers and,
ii) have at least one of them attached, get deleted, they
invoke tcf_destroy_chain(), and thus call into ->destroy()
handler from a classifier module.
After RCU conversion, all classifier that have an internal
prio list, unlink them and initiate freeing via call_rcu()
deferral.
Meanhile, tcf_destroy() releases already reference to the
tp->ops->owner module before the queued RCU callback handler
has been invoked.
Subsequent rmmod on the classifier module is then not prevented
since all module references are already dropped.
By the time, the kernel invokes the RCU callback handler from
the module, that function address is then invalid.
One way to fix it would be to add an rcu_barrier() to
unregister_tcf_proto_ops() to wait for all pending call_rcu()s
to complete.
synchronize_rcu() is not appropriate as under heavy RCU
callback load, registered call_rcu()s could be deferred
longer than a grace period. In case we don't have any pending
call_rcu()s, the barrier is allowed to return immediately.
Since we came here via unregister_tcf_proto_ops(), there
are no users of a given classifier anymore. Further nested
call_rcu()s pointing into the module space are not being
done anywhere.
Only cls_bpf_delete_prog() may schedule a work item, to
unlock pages eventually, but that is not in the range/context
of cls_bpf anymore.
Fixes: 25d8c0d55f24 ("net: rcu-ify tcf_proto")
Fixes: 9888faefe132 ("net: sched: cls_basic use RCU")
Reported-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Tested-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-05-20 18:13:33 +03:00
2005-04-17 02:20:36 +04:00
write_lock ( & cls_mod_lock ) ;
2013-12-20 22:04:18 +04:00
list_for_each_entry ( t , & tcf_proto_base , head ) {
if ( t = = ops ) {
list_del ( & t - > head ) ;
rc = 0 ;
2005-04-17 02:20:36 +04:00
break ;
2013-12-20 22:04:18 +04:00
}
}
2005-04-17 02:20:36 +04:00
write_unlock ( & cls_mod_lock ) ;
return rc ;
}
2008-01-21 13:26:41 +03:00
EXPORT_SYMBOL ( unregister_tcf_proto_ops ) ;
2005-04-17 02:20:36 +04:00
2018-05-24 01:26:53 +03:00
bool tcf_queue_work ( struct rcu_work * rwork , work_func_t func )
2017-10-27 04:24:28 +03:00
{
2018-05-24 01:26:53 +03:00
INIT_RCU_WORK ( rwork , func ) ;
return queue_rcu_work ( tc_filter_wq , rwork ) ;
2017-10-27 04:24:28 +03:00
}
EXPORT_SYMBOL ( tcf_queue_work ) ;
2005-04-17 02:20:36 +04:00
/* Select new prio value from the range, managed by kernel. */
2008-01-21 13:26:41 +03:00
static inline u32 tcf_auto_prio ( struct tcf_proto * tp )
2005-04-17 02:20:36 +04:00
{
2008-01-21 13:26:41 +03:00
u32 first = TC_H_MAKE ( 0xC0000000U , 0U ) ;
2005-04-17 02:20:36 +04:00
if ( tp )
2011-01-19 22:26:56 +03:00
first = tp - > prio - 1 ;
2005-04-17 02:20:36 +04:00
2017-05-17 12:07:58 +03:00
return TC_H_MAJ ( first ) ;
2005-04-17 02:20:36 +04:00
}
2017-02-09 16:38:57 +03:00
static struct tcf_proto * tcf_proto_create ( const char * kind , u32 protocol ,
2018-01-18 19:20:50 +03:00
u32 prio , struct tcf_chain * chain ,
struct netlink_ext_ack * extack )
2017-02-09 16:38:57 +03:00
{
struct tcf_proto * tp ;
int err ;
tp = kzalloc ( sizeof ( * tp ) , GFP_KERNEL ) ;
if ( ! tp )
return ERR_PTR ( - ENOBUFS ) ;
2018-07-23 10:23:04 +03:00
tp - > ops = tcf_proto_lookup_ops ( kind , extack ) ;
if ( IS_ERR ( tp - > ops ) ) {
err = PTR_ERR ( tp - > ops ) ;
2018-05-11 18:45:32 +03:00
goto errout ;
2017-02-09 16:38:57 +03:00
}
tp - > classify = tp - > ops - > classify ;
tp - > protocol = protocol ;
tp - > prio = prio ;
2017-05-17 12:08:01 +03:00
tp - > chain = chain ;
2017-02-09 16:38:57 +03:00
err = tp - > ops - > init ( tp ) ;
if ( err ) {
module_put ( tp - > ops - > owner ) ;
goto errout ;
}
return tp ;
errout :
kfree ( tp ) ;
return ERR_PTR ( err ) ;
}
2018-01-24 23:54:13 +03:00
static void tcf_proto_destroy ( struct tcf_proto * tp ,
struct netlink_ext_ack * extack )
2017-02-09 16:38:56 +03:00
{
2018-01-24 23:54:13 +03:00
tp - > ops - > destroy ( tp , extack ) ;
2017-04-20 00:21:21 +03:00
module_put ( tp - > ops - > owner ) ;
kfree_rcu ( tp , rcu ) ;
2017-02-09 16:38:56 +03:00
}
2018-01-17 13:46:45 +03:00
struct tcf_filter_chain_list_item {
struct list_head list ;
tcf_chain_head_change_t * chain_head_change ;
void * chain_head_change_priv ;
} ;
2017-05-17 12:08:01 +03:00
static struct tcf_chain * tcf_chain_create ( struct tcf_block * block ,
u32 chain_index )
2017-05-17 12:07:59 +03:00
{
2017-05-17 12:08:01 +03:00
struct tcf_chain * chain ;
chain = kzalloc ( sizeof ( * chain ) , GFP_KERNEL ) ;
if ( ! chain )
return NULL ;
list_add_tail ( & chain - > list , & block - > chain_list ) ;
chain - > block = block ;
chain - > index = chain_index ;
2017-09-12 02:33:31 +03:00
chain - > refcnt = 1 ;
2018-07-23 10:23:05 +03:00
if ( ! chain - > index )
block - > chain0 . chain = chain ;
2017-05-17 12:08:01 +03:00
return chain ;
2017-05-17 12:07:59 +03:00
}
2018-01-17 13:46:45 +03:00
static void tcf_chain_head_change_item ( struct tcf_filter_chain_list_item * item ,
struct tcf_proto * tp_head )
{
if ( item - > chain_head_change )
item - > chain_head_change ( tp_head , item - > chain_head_change_priv ) ;
}
2018-07-23 10:23:05 +03:00
static void tcf_chain0_head_change ( struct tcf_chain * chain ,
struct tcf_proto * tp_head )
2017-11-03 13:46:24 +03:00
{
2018-01-17 13:46:45 +03:00
struct tcf_filter_chain_list_item * item ;
2018-07-23 10:23:05 +03:00
struct tcf_block * block = chain - > block ;
2018-01-17 13:46:45 +03:00
2018-07-23 10:23:05 +03:00
if ( chain - > index )
return ;
list_for_each_entry ( item , & block - > chain0 . filter_chain_list , list )
2018-01-17 13:46:45 +03:00
tcf_chain_head_change_item ( item , tp_head ) ;
2017-11-03 13:46:24 +03:00
}
2017-05-20 16:01:32 +03:00
static void tcf_chain_destroy ( struct tcf_chain * chain )
{
2017-12-04 21:48:18 +03:00
struct tcf_block * block = chain - > block ;
2017-09-12 02:33:31 +03:00
list_del ( & chain - > list ) ;
2018-07-23 10:23:05 +03:00
if ( ! chain - > index )
block - > chain0 . chain = NULL ;
2017-09-12 02:33:31 +03:00
kfree ( chain ) ;
2018-09-24 19:22:54 +03:00
if ( list_empty ( & block - > chain_list ) & & ! refcount_read ( & block - > refcnt ) )
2018-09-24 19:22:57 +03:00
kfree_rcu ( block , rcu ) ;
2017-09-12 02:33:31 +03:00
}
2017-08-22 23:46:49 +03:00
2017-09-12 02:33:31 +03:00
static void tcf_chain_hold ( struct tcf_chain * chain )
{
+ + chain - > refcnt ;
2017-05-17 12:07:59 +03:00
}
2018-08-01 13:36:55 +03:00
static bool tcf_chain_held_by_acts_only ( struct tcf_chain * chain )
2018-07-27 10:45:05 +03:00
{
/* In case all the references are action references, this
2018-08-01 13:36:55 +03:00
* chain should not be shown to the user .
2018-07-27 10:45:05 +03:00
*/
return chain - > refcnt = = chain - > action_refcnt ;
}
2018-07-23 10:23:06 +03:00
static struct tcf_chain * tcf_chain_lookup ( struct tcf_block * block ,
u32 chain_index )
2017-05-17 12:08:01 +03:00
{
struct tcf_chain * chain ;
list_for_each_entry ( chain , & block - > chain_list , list ) {
2018-07-23 10:23:06 +03:00
if ( chain - > index = = chain_index )
2017-09-12 02:33:31 +03:00
return chain ;
2018-07-23 10:23:06 +03:00
}
return NULL ;
}
static int tc_chain_notify ( struct tcf_chain * chain , struct sk_buff * oskb ,
u32 seq , u16 flags , int event , bool unicast ) ;
2018-08-01 13:36:56 +03:00
static struct tcf_chain * __tcf_chain_get ( struct tcf_block * block ,
u32 chain_index , bool create ,
bool by_act )
2018-07-23 10:23:06 +03:00
{
struct tcf_chain * chain = tcf_chain_lookup ( block , chain_index ) ;
if ( chain ) {
tcf_chain_hold ( chain ) ;
2018-08-01 13:36:56 +03:00
} else {
if ( ! create )
return NULL ;
chain = tcf_chain_create ( block , chain_index ) ;
if ( ! chain )
return NULL ;
2017-05-17 12:08:01 +03:00
}
2017-09-06 14:14:19 +03:00
2018-08-01 13:36:56 +03:00
if ( by_act )
+ + chain - > action_refcnt ;
/* Send notification only in case we got the first
* non - action reference . Until then , the chain acts only as
* a placeholder for actions pointing to it and user ought
* not know about them .
*/
if ( chain - > refcnt - chain - > action_refcnt = = 1 & & ! by_act )
tc_chain_notify ( chain , NULL , 0 , NLM_F_CREATE | NLM_F_EXCL ,
RTM_NEWCHAIN , false ) ;
2018-07-23 10:23:06 +03:00
return chain ;
2017-05-17 12:08:01 +03:00
}
2018-08-01 13:36:56 +03:00
2018-08-01 13:36:57 +03:00
static struct tcf_chain * tcf_chain_get ( struct tcf_block * block , u32 chain_index ,
bool create )
2018-08-01 13:36:56 +03:00
{
return __tcf_chain_get ( block , chain_index , create , false ) ;
}
2017-05-17 12:08:01 +03:00
2018-07-27 10:45:05 +03:00
struct tcf_chain * tcf_chain_get_by_act ( struct tcf_block * block , u32 chain_index )
{
2018-08-01 13:36:56 +03:00
return __tcf_chain_get ( block , chain_index , true , true ) ;
2018-07-27 10:45:05 +03:00
}
EXPORT_SYMBOL ( tcf_chain_get_by_act ) ;
2018-07-23 10:23:07 +03:00
static void tc_chain_tmplt_del ( struct tcf_chain * chain ) ;
2018-08-01 13:36:56 +03:00
static void __tcf_chain_put ( struct tcf_chain * chain , bool by_act )
2017-05-17 12:08:01 +03:00
{
2018-08-01 13:36:56 +03:00
if ( by_act )
chain - > action_refcnt - - ;
chain - > refcnt - - ;
/* The last dropped non-action reference will trigger notification. */
if ( chain - > refcnt - chain - > action_refcnt = = 0 & & ! by_act )
2018-07-23 10:23:06 +03:00
tc_chain_notify ( chain , NULL , 0 , 0 , RTM_DELCHAIN , false ) ;
2018-08-01 13:36:56 +03:00
if ( chain - > refcnt = = 0 ) {
2018-07-23 10:23:07 +03:00
tc_chain_tmplt_del ( chain ) ;
2017-05-17 12:08:01 +03:00
tcf_chain_destroy ( chain ) ;
2018-07-23 10:23:06 +03:00
}
2017-05-17 12:08:01 +03:00
}
2018-08-01 13:36:56 +03:00
2018-08-01 13:36:57 +03:00
static void tcf_chain_put ( struct tcf_chain * chain )
2018-08-01 13:36:56 +03:00
{
__tcf_chain_put ( chain , false ) ;
}
2017-05-17 12:08:01 +03:00
2018-07-27 10:45:05 +03:00
void tcf_chain_put_by_act ( struct tcf_chain * chain )
{
2018-08-01 13:36:56 +03:00
__tcf_chain_put ( chain , true ) ;
2018-07-27 10:45:05 +03:00
}
EXPORT_SYMBOL ( tcf_chain_put_by_act ) ;
2018-07-23 10:23:06 +03:00
static void tcf_chain_put_explicitly_created ( struct tcf_chain * chain )
{
if ( chain - > explicitly_created )
tcf_chain_put ( chain ) ;
}
2018-08-01 13:36:57 +03:00
static void tcf_chain_flush ( struct tcf_chain * chain )
{
struct tcf_proto * tp = rtnl_dereference ( chain - > filter_chain ) ;
tcf_chain0_head_change ( chain , NULL ) ;
while ( tp ) {
RCU_INIT_POINTER ( chain - > filter_chain , tp - > next ) ;
tcf_proto_destroy ( tp , NULL ) ;
tp = rtnl_dereference ( chain - > filter_chain ) ;
tcf_chain_put ( chain ) ;
}
}
2018-01-17 13:46:50 +03:00
static bool tcf_block_offload_in_use ( struct tcf_block * block )
{
return block - > offloadcnt ;
}
static int tcf_block_offload_cmd ( struct tcf_block * block ,
struct net_device * dev ,
struct tcf_block_ext_info * ei ,
2018-06-26 00:30:04 +03:00
enum tc_block_command command ,
struct netlink_ext_ack * extack )
2017-10-19 16:50:29 +03:00
{
struct tc_block_offload bo = { } ;
bo . command = command ;
bo . binder_type = ei - > binder_type ;
bo . block = block ;
2018-06-26 00:30:04 +03:00
bo . extack = extack ;
2018-01-17 13:46:50 +03:00
return dev - > netdev_ops - > ndo_setup_tc ( dev , TC_SETUP_BLOCK , & bo ) ;
2017-10-19 16:50:29 +03:00
}
2018-01-17 13:46:50 +03:00
static int tcf_block_offload_bind ( struct tcf_block * block , struct Qdisc * q ,
2018-06-26 00:30:04 +03:00
struct tcf_block_ext_info * ei ,
struct netlink_ext_ack * extack )
2017-10-19 16:50:29 +03:00
{
2018-01-17 13:46:50 +03:00
struct net_device * dev = q - > dev_queue - > dev ;
int err ;
if ( ! dev - > netdev_ops - > ndo_setup_tc )
goto no_offload_dev_inc ;
/* If tc offload feature is disabled and the block we try to bind
* to already has some offloaded filters , forbid to bind .
*/
2018-06-26 00:30:04 +03:00
if ( ! tc_can_offload ( dev ) & & tcf_block_offload_in_use ( block ) ) {
NL_SET_ERR_MSG ( extack , " Bind to offloaded block failed as dev has offload disabled " ) ;
2018-01-17 13:46:50 +03:00
return - EOPNOTSUPP ;
2018-06-26 00:30:04 +03:00
}
2018-01-17 13:46:50 +03:00
2018-06-26 00:30:04 +03:00
err = tcf_block_offload_cmd ( block , dev , ei , TC_BLOCK_BIND , extack ) ;
2018-01-17 13:46:50 +03:00
if ( err = = - EOPNOTSUPP )
goto no_offload_dev_inc ;
return err ;
no_offload_dev_inc :
if ( tcf_block_offload_in_use ( block ) )
return - EOPNOTSUPP ;
block - > nooffloaddevcnt + + ;
return 0 ;
2017-10-19 16:50:29 +03:00
}
static void tcf_block_offload_unbind ( struct tcf_block * block , struct Qdisc * q ,
struct tcf_block_ext_info * ei )
{
2018-01-17 13:46:50 +03:00
struct net_device * dev = q - > dev_queue - > dev ;
int err ;
if ( ! dev - > netdev_ops - > ndo_setup_tc )
goto no_offload_dev_dec ;
2018-06-26 00:30:04 +03:00
err = tcf_block_offload_cmd ( block , dev , ei , TC_BLOCK_UNBIND , NULL ) ;
2018-01-17 13:46:50 +03:00
if ( err = = - EOPNOTSUPP )
goto no_offload_dev_dec ;
return ;
no_offload_dev_dec :
WARN_ON ( block - > nooffloaddevcnt - - = = 0 ) ;
2017-10-19 16:50:29 +03:00
}
2018-01-17 13:46:45 +03:00
static int
2018-07-23 10:23:05 +03:00
tcf_chain0_head_change_cb_add ( struct tcf_block * block ,
struct tcf_block_ext_info * ei ,
struct netlink_ext_ack * extack )
2018-01-17 13:46:45 +03:00
{
2018-07-23 10:23:05 +03:00
struct tcf_chain * chain0 = block - > chain0 . chain ;
2018-01-17 13:46:45 +03:00
struct tcf_filter_chain_list_item * item ;
item = kmalloc ( sizeof ( * item ) , GFP_KERNEL ) ;
if ( ! item ) {
NL_SET_ERR_MSG ( extack , " Memory allocation for head change callback item failed " ) ;
return - ENOMEM ;
}
item - > chain_head_change = ei - > chain_head_change ;
item - > chain_head_change_priv = ei - > chain_head_change_priv ;
2018-07-23 10:23:05 +03:00
if ( chain0 & & chain0 - > filter_chain )
tcf_chain_head_change_item ( item , chain0 - > filter_chain ) ;
list_add ( & item - > list , & block - > chain0 . filter_chain_list ) ;
2018-01-17 13:46:45 +03:00
return 0 ;
}
static void
2018-07-23 10:23:05 +03:00
tcf_chain0_head_change_cb_del ( struct tcf_block * block ,
struct tcf_block_ext_info * ei )
2018-01-17 13:46:45 +03:00
{
2018-07-23 10:23:05 +03:00
struct tcf_chain * chain0 = block - > chain0 . chain ;
2018-01-17 13:46:45 +03:00
struct tcf_filter_chain_list_item * item ;
2018-07-23 10:23:05 +03:00
list_for_each_entry ( item , & block - > chain0 . filter_chain_list , list ) {
2018-01-17 13:46:45 +03:00
if ( ( ! ei - > chain_head_change & & ! ei - > chain_head_change_priv ) | |
( item - > chain_head_change = = ei - > chain_head_change & &
item - > chain_head_change_priv = = ei - > chain_head_change_priv ) ) {
2018-07-23 10:23:05 +03:00
if ( chain0 )
tcf_chain_head_change_item ( item , NULL ) ;
2018-01-17 13:46:45 +03:00
list_del ( & item - > list ) ;
kfree ( item ) ;
return ;
}
}
WARN_ON ( 1 ) ;
}
2018-01-17 13:46:46 +03:00
struct tcf_net {
2018-09-24 19:22:56 +03:00
spinlock_t idr_lock ; /* Protects idr */
2018-01-17 13:46:46 +03:00
struct idr idr ;
} ;
static unsigned int tcf_net_id ;
static int tcf_block_insert ( struct tcf_block * block , struct net * net ,
2018-02-13 14:00:16 +03:00
struct netlink_ext_ack * extack )
2018-01-17 13:46:45 +03:00
{
2018-01-17 13:46:46 +03:00
struct tcf_net * tn = net_generic ( net , tcf_net_id ) ;
2018-09-24 19:22:56 +03:00
int err ;
idr_preload ( GFP_KERNEL ) ;
spin_lock ( & tn - > idr_lock ) ;
err = idr_alloc_u32 ( & tn - > idr , block , & block - > index , block - > index ,
GFP_NOWAIT ) ;
spin_unlock ( & tn - > idr_lock ) ;
idr_preload_end ( ) ;
2018-01-17 13:46:46 +03:00
2018-09-24 19:22:56 +03:00
return err ;
2018-01-17 13:46:45 +03:00
}
2018-01-17 13:46:46 +03:00
static void tcf_block_remove ( struct tcf_block * block , struct net * net )
{
struct tcf_net * tn = net_generic ( net , tcf_net_id ) ;
2018-09-24 19:22:56 +03:00
spin_lock ( & tn - > idr_lock ) ;
2017-11-28 17:48:43 +03:00
idr_remove ( & tn - > idr , block - > index ) ;
2018-09-24 19:22:56 +03:00
spin_unlock ( & tn - > idr_lock ) ;
2018-01-17 13:46:46 +03:00
}
static struct tcf_block * tcf_block_create ( struct net * net , struct Qdisc * q ,
2018-02-13 14:00:16 +03:00
u32 block_index ,
2018-01-17 13:46:46 +03:00
struct netlink_ext_ack * extack )
2017-05-17 12:07:55 +03:00
{
2018-01-17 13:46:46 +03:00
struct tcf_block * block ;
2017-05-17 12:07:55 +03:00
2018-01-17 13:46:46 +03:00
block = kzalloc ( sizeof ( * block ) , GFP_KERNEL ) ;
2017-12-20 20:35:19 +03:00
if ( ! block ) {
NL_SET_ERR_MSG ( extack , " Memory allocation for block failed " ) ;
2018-01-17 13:46:46 +03:00
return ERR_PTR ( - ENOMEM ) ;
2017-12-20 20:35:19 +03:00
}
2017-05-17 12:08:01 +03:00
INIT_LIST_HEAD ( & block - > chain_list ) ;
2017-10-19 16:50:31 +03:00
INIT_LIST_HEAD ( & block - > cb_list ) ;
2018-01-17 13:46:48 +03:00
INIT_LIST_HEAD ( & block - > owner_list ) ;
2018-07-23 10:23:05 +03:00
INIT_LIST_HEAD ( & block - > chain0 . filter_chain_list ) ;
2017-10-19 16:50:31 +03:00
2018-09-24 19:22:54 +03:00
refcount_set ( & block - > refcnt , 1 ) ;
2018-01-17 13:46:46 +03:00
block - > net = net ;
2018-02-13 14:00:16 +03:00
block - > index = block_index ;
/* Don't store q pointer for blocks which are shared */
if ( ! tcf_block_shared ( block ) )
block - > q = q ;
2018-01-17 13:46:46 +03:00
return block ;
}
static struct tcf_block * tcf_block_lookup ( struct net * net , u32 block_index )
{
struct tcf_net * tn = net_generic ( net , tcf_net_id ) ;
2017-11-28 18:01:24 +03:00
return idr_find ( & tn - > idr , block_index ) ;
2018-01-17 13:46:46 +03:00
}
2018-09-24 19:22:57 +03:00
static struct tcf_block * tcf_block_refcnt_get ( struct net * net , u32 block_index )
{
struct tcf_block * block ;
rcu_read_lock ( ) ;
block = tcf_block_lookup ( net , block_index ) ;
if ( block & & ! refcount_inc_not_zero ( & block - > refcnt ) )
block = NULL ;
rcu_read_unlock ( ) ;
return block ;
}
2018-09-24 19:22:55 +03:00
static void tcf_block_flush_all_chains ( struct tcf_block * block )
{
struct tcf_chain * chain ;
/* Hold a refcnt for all chains, so that they don't disappear
* while we are iterating .
*/
list_for_each_entry ( chain , & block - > chain_list , list )
tcf_chain_hold ( chain ) ;
list_for_each_entry ( chain , & block - > chain_list , list )
tcf_chain_flush ( chain ) ;
}
static void tcf_block_put_all_chains ( struct tcf_block * block )
{
struct tcf_chain * chain , * tmp ;
/* At this point, all the chains should have refcnt >= 1. */
list_for_each_entry_safe ( chain , tmp , & block - > chain_list , list ) {
tcf_chain_put_explicitly_created ( chain ) ;
tcf_chain_put ( chain ) ;
}
}
2018-09-24 19:22:57 +03:00
static void __tcf_block_put ( struct tcf_block * block , struct Qdisc * q ,
struct tcf_block_ext_info * ei )
{
if ( refcount_dec_and_test ( & block - > refcnt ) ) {
/* Flushing/putting all chains will cause the block to be
* deallocated when last chain is freed . However , if chain_list
* is empty , block has to be manually deallocated . After block
* reference counter reached 0 , it is no longer possible to
* increment it or add new chains to block .
*/
bool free_block = list_empty ( & block - > chain_list ) ;
if ( tcf_block_shared ( block ) )
tcf_block_remove ( block , block - > net ) ;
if ( ! free_block )
tcf_block_flush_all_chains ( block ) ;
if ( q )
tcf_block_offload_unbind ( block , q , ei ) ;
if ( free_block )
kfree_rcu ( block , rcu ) ;
else
tcf_block_put_all_chains ( block ) ;
} else if ( q ) {
tcf_block_offload_unbind ( block , q , ei ) ;
}
}
static void tcf_block_refcnt_put ( struct tcf_block * block )
{
__tcf_block_put ( block , NULL , NULL ) ;
}
2018-05-31 09:52:53 +03:00
/* Find tcf block.
* Set q , parent , cl when appropriate .
*/
static struct tcf_block * tcf_block_find ( struct net * net , struct Qdisc * * q ,
u32 * parent , unsigned long * cl ,
int ifindex , u32 block_index ,
struct netlink_ext_ack * extack )
{
struct tcf_block * block ;
2018-09-24 19:22:53 +03:00
int err = 0 ;
2018-05-31 09:52:53 +03:00
if ( ifindex = = TCM_IFINDEX_MAGIC_BLOCK ) {
2018-09-24 19:22:58 +03:00
block = tcf_block_refcnt_get ( net , block_index ) ;
2018-05-31 09:52:53 +03:00
if ( ! block ) {
NL_SET_ERR_MSG ( extack , " Block of given index was not found " ) ;
return ERR_PTR ( - EINVAL ) ;
}
} else {
const struct Qdisc_class_ops * cops ;
struct net_device * dev ;
2018-09-24 19:22:53 +03:00
rcu_read_lock ( ) ;
2018-05-31 09:52:53 +03:00
/* Find link */
2018-09-24 19:22:53 +03:00
dev = dev_get_by_index_rcu ( net , ifindex ) ;
if ( ! dev ) {
rcu_read_unlock ( ) ;
2018-05-31 09:52:53 +03:00
return ERR_PTR ( - ENODEV ) ;
2018-09-24 19:22:53 +03:00
}
2018-05-31 09:52:53 +03:00
/* Find qdisc */
if ( ! * parent ) {
* q = dev - > qdisc ;
* parent = ( * q ) - > handle ;
} else {
2018-09-24 19:22:53 +03:00
* q = qdisc_lookup_rcu ( dev , TC_H_MAJ ( * parent ) ) ;
2018-05-31 09:52:53 +03:00
if ( ! * q ) {
NL_SET_ERR_MSG ( extack , " Parent Qdisc doesn't exists " ) ;
2018-09-24 19:22:53 +03:00
err = - EINVAL ;
goto errout_rcu ;
2018-05-31 09:52:53 +03:00
}
}
2018-09-24 19:22:53 +03:00
* q = qdisc_refcount_inc_nz ( * q ) ;
if ( ! * q ) {
NL_SET_ERR_MSG ( extack , " Parent Qdisc doesn't exists " ) ;
err = - EINVAL ;
goto errout_rcu ;
}
2018-05-31 09:52:53 +03:00
/* Is it classful? */
cops = ( * q ) - > ops - > cl_ops ;
if ( ! cops ) {
NL_SET_ERR_MSG ( extack , " Qdisc not classful " ) ;
2018-09-24 19:22:53 +03:00
err = - EINVAL ;
goto errout_rcu ;
2018-05-31 09:52:53 +03:00
}
if ( ! cops - > tcf_block ) {
NL_SET_ERR_MSG ( extack , " Class doesn't support blocks " ) ;
2018-09-24 19:22:53 +03:00
err = - EOPNOTSUPP ;
goto errout_rcu ;
2018-05-31 09:52:53 +03:00
}
2018-09-24 19:22:53 +03:00
/* At this point we know that qdisc is not noop_qdisc,
* which means that qdisc holds a reference to net_device
* and we hold a reference to qdisc , so it is safe to release
* rcu read lock .
*/
rcu_read_unlock ( ) ;
2018-05-31 09:52:53 +03:00
/* Do we search for filter, attached to class? */
if ( TC_H_MIN ( * parent ) ) {
* cl = cops - > find ( * q , * parent ) ;
if ( * cl = = 0 ) {
NL_SET_ERR_MSG ( extack , " Specified class doesn't exist " ) ;
2018-09-24 19:22:53 +03:00
err = - ENOENT ;
goto errout_qdisc ;
2018-05-31 09:52:53 +03:00
}
}
/* And the last stroke */
block = cops - > tcf_block ( * q , * cl , extack ) ;
2018-09-24 19:22:53 +03:00
if ( ! block ) {
err = - EINVAL ;
goto errout_qdisc ;
}
2018-05-31 09:52:53 +03:00
if ( tcf_block_shared ( block ) ) {
NL_SET_ERR_MSG ( extack , " This filter block is shared. Please use the block index to manipulate the filters " ) ;
2018-09-24 19:22:53 +03:00
err = - EOPNOTSUPP ;
goto errout_qdisc ;
2018-05-31 09:52:53 +03:00
}
2018-09-24 19:22:58 +03:00
/* Always take reference to block in order to support execution
* of rules update path of cls API without rtnl lock . Caller
* must release block when it is finished using it . ' if ' block
* of this conditional obtain reference to block by calling
* tcf_block_refcnt_get ( ) .
*/
refcount_inc ( & block - > refcnt ) ;
2018-05-31 09:52:53 +03:00
}
return block ;
2018-09-24 19:22:53 +03:00
errout_rcu :
rcu_read_unlock ( ) ;
errout_qdisc :
2018-09-27 23:42:19 +03:00
if ( * q ) {
2018-09-24 19:22:53 +03:00
qdisc_put ( * q ) ;
2018-09-27 23:42:19 +03:00
* q = NULL ;
}
2018-09-24 19:22:53 +03:00
return ERR_PTR ( err ) ;
}
static void tcf_block_release ( struct Qdisc * q , struct tcf_block * block )
{
2018-09-24 19:22:58 +03:00
if ( ! IS_ERR_OR_NULL ( block ) )
tcf_block_refcnt_put ( block ) ;
2018-09-24 19:22:53 +03:00
if ( q )
qdisc_put ( q ) ;
2018-05-31 09:52:53 +03:00
}
2018-01-17 13:46:48 +03:00
struct tcf_block_owner_item {
struct list_head list ;
struct Qdisc * q ;
enum tcf_block_binder_type binder_type ;
} ;
static void
tcf_block_owner_netif_keep_dst ( struct tcf_block * block ,
struct Qdisc * q ,
enum tcf_block_binder_type binder_type )
{
if ( block - > keep_dst & &
binder_type ! = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS & &
binder_type ! = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS )
netif_keep_dst ( qdisc_dev ( q ) ) ;
}
void tcf_block_netif_keep_dst ( struct tcf_block * block )
{
struct tcf_block_owner_item * item ;
block - > keep_dst = true ;
list_for_each_entry ( item , & block - > owner_list , list )
tcf_block_owner_netif_keep_dst ( block , item - > q ,
item - > binder_type ) ;
}
EXPORT_SYMBOL ( tcf_block_netif_keep_dst ) ;
static int tcf_block_owner_add ( struct tcf_block * block ,
struct Qdisc * q ,
enum tcf_block_binder_type binder_type )
{
struct tcf_block_owner_item * item ;
item = kmalloc ( sizeof ( * item ) , GFP_KERNEL ) ;
if ( ! item )
return - ENOMEM ;
item - > q = q ;
item - > binder_type = binder_type ;
list_add ( & item - > list , & block - > owner_list ) ;
return 0 ;
}
static void tcf_block_owner_del ( struct tcf_block * block ,
struct Qdisc * q ,
enum tcf_block_binder_type binder_type )
{
struct tcf_block_owner_item * item ;
list_for_each_entry ( item , & block - > owner_list , list ) {
if ( item - > q = = q & & item - > binder_type = = binder_type ) {
list_del ( & item - > list ) ;
kfree ( item ) ;
return ;
}
}
WARN_ON ( 1 ) ;
}
2018-01-17 13:46:46 +03:00
int tcf_block_get_ext ( struct tcf_block * * p_block , struct Qdisc * q ,
struct tcf_block_ext_info * ei ,
struct netlink_ext_ack * extack )
{
struct net * net = qdisc_net ( q ) ;
struct tcf_block * block = NULL ;
int err ;
2018-09-24 19:22:58 +03:00
if ( ei - > block_index )
2018-01-17 13:46:46 +03:00
/* block_index not 0 means the shared block is requested */
2018-09-24 19:22:58 +03:00
block = tcf_block_refcnt_get ( net , ei - > block_index ) ;
2018-01-17 13:46:46 +03:00
if ( ! block ) {
2018-02-13 14:00:16 +03:00
block = tcf_block_create ( net , q , ei - > block_index , extack ) ;
2018-01-17 13:46:46 +03:00
if ( IS_ERR ( block ) )
return PTR_ERR ( block ) ;
2018-02-13 14:00:16 +03:00
if ( tcf_block_shared ( block ) ) {
err = tcf_block_insert ( block , net , extack ) ;
2018-01-17 13:46:46 +03:00
if ( err )
goto err_block_insert ;
}
}
2018-01-17 13:46:48 +03:00
err = tcf_block_owner_add ( block , q , ei - > binder_type ) ;
if ( err )
goto err_block_owner_add ;
tcf_block_owner_netif_keep_dst ( block , q , ei - > binder_type ) ;
2018-07-23 10:23:05 +03:00
err = tcf_chain0_head_change_cb_add ( block , ei , extack ) ;
2018-01-17 13:46:45 +03:00
if ( err )
2018-07-23 10:23:05 +03:00
goto err_chain0_head_change_cb_add ;
2018-01-17 13:46:50 +03:00
2018-06-26 00:30:04 +03:00
err = tcf_block_offload_bind ( block , q , ei , extack ) ;
2018-01-17 13:46:50 +03:00
if ( err )
goto err_block_offload_bind ;
2017-05-17 12:07:55 +03:00
* p_block = block ;
return 0 ;
2017-05-17 12:07:59 +03:00
2018-01-17 13:46:50 +03:00
err_block_offload_bind :
2018-07-23 10:23:05 +03:00
tcf_chain0_head_change_cb_del ( block , ei ) ;
err_chain0_head_change_cb_add :
2018-01-17 13:46:48 +03:00
tcf_block_owner_del ( block , q , ei - > binder_type ) ;
err_block_owner_add :
2018-01-17 13:46:46 +03:00
err_block_insert :
2018-09-24 19:22:58 +03:00
tcf_block_refcnt_put ( block ) ;
2017-05-17 12:07:59 +03:00
return err ;
2017-05-17 12:07:55 +03:00
}
2017-10-19 16:50:29 +03:00
EXPORT_SYMBOL ( tcf_block_get_ext ) ;
2017-11-03 13:46:24 +03:00
static void tcf_chain_head_change_dflt ( struct tcf_proto * tp_head , void * priv )
{
struct tcf_proto __rcu * * p_filter_chain = priv ;
rcu_assign_pointer ( * p_filter_chain , tp_head ) ;
}
2017-10-19 16:50:29 +03:00
int tcf_block_get ( struct tcf_block * * p_block ,
2017-12-20 20:35:19 +03:00
struct tcf_proto __rcu * * p_filter_chain , struct Qdisc * q ,
struct netlink_ext_ack * extack )
2017-10-19 16:50:29 +03:00
{
2017-11-03 13:46:24 +03:00
struct tcf_block_ext_info ei = {
. chain_head_change = tcf_chain_head_change_dflt ,
. chain_head_change_priv = p_filter_chain ,
} ;
2017-10-19 16:50:29 +03:00
2017-11-03 13:46:24 +03:00
WARN_ON ( ! p_filter_chain ) ;
2017-12-20 20:35:19 +03:00
return tcf_block_get_ext ( p_block , q , & ei , extack ) ;
2017-10-19 16:50:29 +03:00
}
2017-05-17 12:07:55 +03:00
EXPORT_SYMBOL ( tcf_block_get ) ;
2017-10-27 04:24:28 +03:00
/* XXX: Standalone actions are not allowed to jump to any chain, and bound
2017-11-24 14:27:58 +03:00
* actions should be all removed after flushing .
2017-10-27 04:24:28 +03:00
*/
2017-11-03 13:46:24 +03:00
void tcf_block_put_ext ( struct tcf_block * block , struct Qdisc * q ,
2017-10-30 08:10:01 +03:00
struct tcf_block_ext_info * ei )
2017-10-27 04:24:28 +03:00
{
2017-12-17 06:11:55 +03:00
if ( ! block )
return ;
2018-07-23 10:23:05 +03:00
tcf_chain0_head_change_cb_del ( block , ei ) ;
2018-01-17 13:46:48 +03:00
tcf_block_owner_del ( block , q , ei - > binder_type ) ;
2017-11-24 14:27:58 +03:00
2018-09-24 19:22:57 +03:00
__tcf_block_put ( block , q , ei ) ;
2017-05-17 12:07:55 +03:00
}
2017-10-19 16:50:29 +03:00
EXPORT_SYMBOL ( tcf_block_put_ext ) ;
void tcf_block_put ( struct tcf_block * block )
{
struct tcf_block_ext_info ei = { 0 , } ;
2017-12-21 15:13:59 +03:00
if ( ! block )
return ;
2017-11-03 13:46:24 +03:00
tcf_block_put_ext ( block , block - > q , & ei ) ;
2017-10-19 16:50:29 +03:00
}
2017-10-30 08:10:01 +03:00
2017-05-17 12:07:55 +03:00
EXPORT_SYMBOL ( tcf_block_put ) ;
2017-02-09 16:38:56 +03:00
2017-10-19 16:50:31 +03:00
struct tcf_block_cb {
struct list_head list ;
tc_setup_cb_t * cb ;
void * cb_ident ;
void * cb_priv ;
unsigned int refcnt ;
} ;
void * tcf_block_cb_priv ( struct tcf_block_cb * block_cb )
{
return block_cb - > cb_priv ;
}
EXPORT_SYMBOL ( tcf_block_cb_priv ) ;
struct tcf_block_cb * tcf_block_cb_lookup ( struct tcf_block * block ,
tc_setup_cb_t * cb , void * cb_ident )
{ struct tcf_block_cb * block_cb ;
list_for_each_entry ( block_cb , & block - > cb_list , list )
if ( block_cb - > cb = = cb & & block_cb - > cb_ident = = cb_ident )
return block_cb ;
return NULL ;
}
EXPORT_SYMBOL ( tcf_block_cb_lookup ) ;
void tcf_block_cb_incref ( struct tcf_block_cb * block_cb )
{
block_cb - > refcnt + + ;
}
EXPORT_SYMBOL ( tcf_block_cb_incref ) ;
unsigned int tcf_block_cb_decref ( struct tcf_block_cb * block_cb )
{
return - - block_cb - > refcnt ;
}
EXPORT_SYMBOL ( tcf_block_cb_decref ) ;
2018-06-26 00:30:10 +03:00
static int
tcf_block_playback_offloads ( struct tcf_block * block , tc_setup_cb_t * cb ,
void * cb_priv , bool add , bool offload_in_use ,
struct netlink_ext_ack * extack )
{
struct tcf_chain * chain ;
struct tcf_proto * tp ;
int err ;
list_for_each_entry ( chain , & block - > chain_list , list ) {
for ( tp = rtnl_dereference ( chain - > filter_chain ) ; tp ;
tp = rtnl_dereference ( tp - > next ) ) {
if ( tp - > ops - > reoffload ) {
err = tp - > ops - > reoffload ( tp , add , cb , cb_priv ,
extack ) ;
if ( err & & add )
goto err_playback_remove ;
} else if ( add & & offload_in_use ) {
err = - EOPNOTSUPP ;
NL_SET_ERR_MSG ( extack , " Filter HW offload failed - classifier without re-offloading support " ) ;
goto err_playback_remove ;
}
}
}
return 0 ;
err_playback_remove :
tcf_block_playback_offloads ( block , cb , cb_priv , false , offload_in_use ,
extack ) ;
return err ;
}
2017-10-19 16:50:31 +03:00
struct tcf_block_cb * __tcf_block_cb_register ( struct tcf_block * block ,
tc_setup_cb_t * cb , void * cb_ident ,
2018-06-26 00:30:04 +03:00
void * cb_priv ,
struct netlink_ext_ack * extack )
2017-10-19 16:50:31 +03:00
{
struct tcf_block_cb * block_cb ;
2018-06-26 00:30:10 +03:00
int err ;
2017-10-19 16:50:31 +03:00
2018-06-26 00:30:10 +03:00
/* Replay any already present rules */
err = tcf_block_playback_offloads ( block , cb , cb_priv , true ,
tcf_block_offload_in_use ( block ) ,
extack ) ;
if ( err )
return ERR_PTR ( err ) ;
2018-01-17 13:46:50 +03:00
2017-10-19 16:50:31 +03:00
block_cb = kzalloc ( sizeof ( * block_cb ) , GFP_KERNEL ) ;
if ( ! block_cb )
2018-01-17 13:46:50 +03:00
return ERR_PTR ( - ENOMEM ) ;
2017-10-19 16:50:31 +03:00
block_cb - > cb = cb ;
block_cb - > cb_ident = cb_ident ;
block_cb - > cb_priv = cb_priv ;
list_add ( & block_cb - > list , & block - > cb_list ) ;
return block_cb ;
}
EXPORT_SYMBOL ( __tcf_block_cb_register ) ;
int tcf_block_cb_register ( struct tcf_block * block ,
tc_setup_cb_t * cb , void * cb_ident ,
2018-06-26 00:30:04 +03:00
void * cb_priv , struct netlink_ext_ack * extack )
2017-10-19 16:50:31 +03:00
{
struct tcf_block_cb * block_cb ;
2018-06-26 00:30:04 +03:00
block_cb = __tcf_block_cb_register ( block , cb , cb_ident , cb_priv ,
extack ) ;
2018-07-19 07:14:17 +03:00
return PTR_ERR_OR_ZERO ( block_cb ) ;
2017-10-19 16:50:31 +03:00
}
EXPORT_SYMBOL ( tcf_block_cb_register ) ;
2018-06-26 00:30:10 +03:00
void __tcf_block_cb_unregister ( struct tcf_block * block ,
struct tcf_block_cb * block_cb )
2017-10-19 16:50:31 +03:00
{
2018-06-26 00:30:10 +03:00
tcf_block_playback_offloads ( block , block_cb - > cb , block_cb - > cb_priv ,
false , tcf_block_offload_in_use ( block ) ,
NULL ) ;
2017-10-19 16:50:31 +03:00
list_del ( & block_cb - > list ) ;
kfree ( block_cb ) ;
}
EXPORT_SYMBOL ( __tcf_block_cb_unregister ) ;
void tcf_block_cb_unregister ( struct tcf_block * block ,
tc_setup_cb_t * cb , void * cb_ident )
{
struct tcf_block_cb * block_cb ;
block_cb = tcf_block_cb_lookup ( block , cb , cb_ident ) ;
if ( ! block_cb )
return ;
2018-06-26 00:30:10 +03:00
__tcf_block_cb_unregister ( block , block_cb ) ;
2017-10-19 16:50:31 +03:00
}
EXPORT_SYMBOL ( tcf_block_cb_unregister ) ;
static int tcf_block_cb_call ( struct tcf_block * block , enum tc_setup_type type ,
void * type_data , bool err_stop )
{
struct tcf_block_cb * block_cb ;
int ok_count = 0 ;
int err ;
2018-06-06 20:55:47 +03:00
/* Make sure all netdevs sharing this block are offload-capable. */
if ( block - > nooffloaddevcnt & & err_stop )
return - EOPNOTSUPP ;
2017-10-19 16:50:31 +03:00
list_for_each_entry ( block_cb , & block - > cb_list , list ) {
err = block_cb - > cb ( type , type_data , block_cb - > cb_priv ) ;
if ( err ) {
if ( err_stop )
return err ;
} else {
ok_count + + ;
}
}
return ok_count ;
}
2017-05-17 12:07:54 +03:00
/* Main classifier routine: scans classifier chain attached
* to this qdisc , ( optionally ) tests for protocol and asks
* specific classifiers .
*/
int tcf_classify ( struct sk_buff * skb , const struct tcf_proto * tp ,
struct tcf_result * res , bool compat_mode )
{
__be16 protocol = tc_skb_protocol ( skb ) ;
# ifdef CONFIG_NET_CLS_ACT
const int max_reclassify_loop = 4 ;
2017-05-23 10:11:59 +03:00
const struct tcf_proto * orig_tp = tp ;
const struct tcf_proto * first_tp ;
2017-05-17 12:07:54 +03:00
int limit = 0 ;
reclassify :
# endif
for ( ; tp ; tp = rcu_dereference_bh ( tp - > next ) ) {
int err ;
if ( tp - > protocol ! = protocol & &
tp - > protocol ! = htons ( ETH_P_ALL ) )
continue ;
err = tp - > classify ( skb , tp , res ) ;
# ifdef CONFIG_NET_CLS_ACT
2017-05-17 12:08:03 +03:00
if ( unlikely ( err = = TC_ACT_RECLASSIFY & & ! compat_mode ) ) {
2017-05-23 10:11:59 +03:00
first_tp = orig_tp ;
2017-05-17 12:07:54 +03:00
goto reset ;
2017-05-17 12:08:03 +03:00
} else if ( unlikely ( TC_ACT_EXT_CMP ( err , TC_ACT_GOTO_CHAIN ) ) ) {
2017-05-23 10:11:59 +03:00
first_tp = res - > goto_tp ;
2017-05-17 12:08:03 +03:00
goto reset ;
}
2017-05-17 12:07:54 +03:00
# endif
if ( err > = 0 )
return err ;
}
return TC_ACT_UNSPEC ; /* signal: continue lookup */
# ifdef CONFIG_NET_CLS_ACT
reset :
if ( unlikely ( limit + + > = max_reclassify_loop ) ) {
2018-01-17 13:46:47 +03:00
net_notice_ratelimited ( " %u: reclassify loop, rule prio %u, protocol %02x \n " ,
tp - > chain - > block - > index ,
tp - > prio & 0xffff ,
2017-05-17 12:07:54 +03:00
ntohs ( tp - > protocol ) ) ;
return TC_ACT_SHOT ;
}
2017-05-23 10:11:59 +03:00
tp = first_tp ;
2017-05-17 12:07:54 +03:00
protocol = tc_skb_protocol ( skb ) ;
goto reclassify ;
# endif
}
EXPORT_SYMBOL ( tcf_classify ) ;
2017-05-17 12:07:59 +03:00
struct tcf_chain_info {
struct tcf_proto __rcu * * pprev ;
struct tcf_proto __rcu * next ;
} ;
static struct tcf_proto * tcf_chain_tp_prev ( struct tcf_chain_info * chain_info )
{
return rtnl_dereference ( * chain_info - > pprev ) ;
}
static void tcf_chain_tp_insert ( struct tcf_chain * chain ,
struct tcf_chain_info * chain_info ,
struct tcf_proto * tp )
{
2017-11-03 13:46:24 +03:00
if ( * chain_info - > pprev = = chain - > filter_chain )
2018-07-23 10:23:05 +03:00
tcf_chain0_head_change ( chain , tp ) ;
2017-05-17 12:07:59 +03:00
RCU_INIT_POINTER ( tp - > next , tcf_chain_tp_prev ( chain_info ) ) ;
rcu_assign_pointer ( * chain_info - > pprev , tp ) ;
2017-09-12 02:33:31 +03:00
tcf_chain_hold ( chain ) ;
2017-05-17 12:07:59 +03:00
}
static void tcf_chain_tp_remove ( struct tcf_chain * chain ,
struct tcf_chain_info * chain_info ,
struct tcf_proto * tp )
{
struct tcf_proto * next = rtnl_dereference ( chain_info - > next ) ;
2017-11-03 13:46:24 +03:00
if ( tp = = chain - > filter_chain )
2018-07-23 10:23:05 +03:00
tcf_chain0_head_change ( chain , next ) ;
2017-05-17 12:07:59 +03:00
RCU_INIT_POINTER ( * chain_info - > pprev , next ) ;
2017-09-12 02:33:31 +03:00
tcf_chain_put ( chain ) ;
2017-05-17 12:07:59 +03:00
}
static struct tcf_proto * tcf_chain_tp_find ( struct tcf_chain * chain ,
struct tcf_chain_info * chain_info ,
u32 protocol , u32 prio ,
bool prio_allocate )
{
struct tcf_proto * * pprev ;
struct tcf_proto * tp ;
/* Check the chain for existence of proto-tcf with this priority */
for ( pprev = & chain - > filter_chain ;
( tp = rtnl_dereference ( * pprev ) ) ; pprev = & tp - > next ) {
if ( tp - > prio > = prio ) {
if ( tp - > prio = = prio ) {
if ( prio_allocate | |
( tp - > protocol ! = protocol & & protocol ) )
return ERR_PTR ( - EINVAL ) ;
} else {
tp = NULL ;
}
break ;
}
}
chain_info - > pprev = pprev ;
chain_info - > next = tp ? tp - > next : NULL ;
return tp ;
}
2017-08-08 01:26:50 +03:00
static int tcf_fill_node ( struct net * net , struct sk_buff * skb ,
2018-01-17 13:46:51 +03:00
struct tcf_proto * tp , struct tcf_block * block ,
struct Qdisc * q , u32 parent , void * fh ,
u32 portid , u32 seq , u16 flags , int event )
2017-08-08 01:26:50 +03:00
{
struct tcmsg * tcm ;
struct nlmsghdr * nlh ;
unsigned char * b = skb_tail_pointer ( skb ) ;
nlh = nlmsg_put ( skb , portid , seq , event , sizeof ( * tcm ) , flags ) ;
if ( ! nlh )
goto out_nlmsg_trim ;
tcm = nlmsg_data ( nlh ) ;
tcm - > tcm_family = AF_UNSPEC ;
tcm - > tcm__pad1 = 0 ;
tcm - > tcm__pad2 = 0 ;
2018-01-17 13:46:51 +03:00
if ( q ) {
tcm - > tcm_ifindex = qdisc_dev ( q ) - > ifindex ;
tcm - > tcm_parent = parent ;
} else {
tcm - > tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK ;
tcm - > tcm_block_index = block - > index ;
}
2017-08-08 01:26:50 +03:00
tcm - > tcm_info = TC_H_MAKE ( tp - > prio , tp - > protocol ) ;
if ( nla_put_string ( skb , TCA_KIND , tp - > ops - > kind ) )
goto nla_put_failure ;
if ( nla_put_u32 ( skb , TCA_CHAIN , tp - > chain - > index ) )
goto nla_put_failure ;
if ( ! fh ) {
tcm - > tcm_handle = 0 ;
} else {
if ( tp - > ops - > dump & & tp - > ops - > dump ( net , tp , fh , skb , tcm ) < 0 )
goto nla_put_failure ;
}
nlh - > nlmsg_len = skb_tail_pointer ( skb ) - b ;
return skb - > len ;
out_nlmsg_trim :
nla_put_failure :
nlmsg_trim ( skb , b ) ;
return - 1 ;
}
static int tfilter_notify ( struct net * net , struct sk_buff * oskb ,
struct nlmsghdr * n , struct tcf_proto * tp ,
2018-01-17 13:46:51 +03:00
struct tcf_block * block , struct Qdisc * q ,
u32 parent , void * fh , int event , bool unicast )
2017-08-08 01:26:50 +03:00
{
struct sk_buff * skb ;
u32 portid = oskb ? NETLINK_CB ( oskb ) . portid : 0 ;
skb = alloc_skb ( NLMSG_GOODSIZE , GFP_KERNEL ) ;
if ( ! skb )
return - ENOBUFS ;
2018-01-17 13:46:51 +03:00
if ( tcf_fill_node ( net , skb , tp , block , q , parent , fh , portid ,
n - > nlmsg_seq , n - > nlmsg_flags , event ) < = 0 ) {
2017-08-08 01:26:50 +03:00
kfree_skb ( skb ) ;
return - EINVAL ;
}
if ( unicast )
return netlink_unicast ( net - > rtnl , skb , portid , MSG_DONTWAIT ) ;
return rtnetlink_send ( skb , net , portid , RTNLGRP_TC ,
n - > nlmsg_flags & NLM_F_ECHO ) ;
}
static int tfilter_del_notify ( struct net * net , struct sk_buff * oskb ,
struct nlmsghdr * n , struct tcf_proto * tp ,
2018-01-17 13:46:51 +03:00
struct tcf_block * block , struct Qdisc * q ,
2018-01-18 19:20:50 +03:00
u32 parent , void * fh , bool unicast , bool * last ,
struct netlink_ext_ack * extack )
2017-08-08 01:26:50 +03:00
{
struct sk_buff * skb ;
u32 portid = oskb ? NETLINK_CB ( oskb ) . portid : 0 ;
int err ;
skb = alloc_skb ( NLMSG_GOODSIZE , GFP_KERNEL ) ;
if ( ! skb )
return - ENOBUFS ;
2018-01-17 13:46:51 +03:00
if ( tcf_fill_node ( net , skb , tp , block , q , parent , fh , portid ,
n - > nlmsg_seq , n - > nlmsg_flags , RTM_DELTFILTER ) < = 0 ) {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Failed to build del event notification " ) ;
2017-08-08 01:26:50 +03:00
kfree_skb ( skb ) ;
return - EINVAL ;
}
2018-01-18 19:20:53 +03:00
err = tp - > ops - > delete ( tp , fh , last , extack ) ;
2017-08-08 01:26:50 +03:00
if ( err ) {
kfree_skb ( skb ) ;
return err ;
}
if ( unicast )
return netlink_unicast ( net - > rtnl , skb , portid , MSG_DONTWAIT ) ;
2018-01-18 19:20:50 +03:00
err = rtnetlink_send ( skb , net , portid , RTNLGRP_TC ,
n - > nlmsg_flags & NLM_F_ECHO ) ;
if ( err < 0 )
NL_SET_ERR_MSG ( extack , " Failed to send filter delete notification " ) ;
return err ;
2017-08-08 01:26:50 +03:00
}
static void tfilter_notify_chain ( struct net * net , struct sk_buff * oskb ,
2018-01-17 13:46:51 +03:00
struct tcf_block * block , struct Qdisc * q ,
u32 parent , struct nlmsghdr * n ,
2017-08-08 01:26:50 +03:00
struct tcf_chain * chain , int event )
{
struct tcf_proto * tp ;
for ( tp = rtnl_dereference ( chain - > filter_chain ) ;
tp ; tp = rtnl_dereference ( tp - > next ) )
2018-01-17 13:46:51 +03:00
tfilter_notify ( net , oskb , n , tp , block ,
2018-07-17 15:58:14 +03:00
q , parent , NULL , event , false ) ;
2017-08-08 01:26:50 +03:00
}
2018-05-31 09:52:53 +03:00
static int tc_new_tfilter ( struct sk_buff * skb , struct nlmsghdr * n ,
2017-04-16 19:48:24 +03:00
struct netlink_ext_ack * extack )
2005-04-17 02:20:36 +04:00
{
2008-03-25 20:26:21 +03:00
struct net * net = sock_net ( skb - > sk ) ;
2008-01-23 09:11:33 +03:00
struct nlattr * tca [ TCA_MAX + 1 ] ;
2005-04-17 02:20:36 +04:00
struct tcmsg * t ;
u32 protocol ;
u32 prio ;
2017-05-17 12:07:57 +03:00
bool prio_allocate ;
2005-04-17 02:20:36 +04:00
u32 parent ;
2017-05-17 12:08:01 +03:00
u32 chain_index ;
2018-01-17 13:46:51 +03:00
struct Qdisc * q = NULL ;
2017-05-17 12:07:59 +03:00
struct tcf_chain_info chain_info ;
2017-05-17 12:08:01 +03:00
struct tcf_chain * chain = NULL ;
2017-05-17 12:07:55 +03:00
struct tcf_block * block ;
2005-04-17 02:20:36 +04:00
struct tcf_proto * tp ;
unsigned long cl ;
2017-08-05 07:31:43 +03:00
void * fh ;
2005-04-17 02:20:36 +04:00
int err ;
net, sched: fix soft lockup in tc_classify
Shahar reported a soft lockup in tc_classify(), where we run into an
endless loop when walking the classifier chain due to tp->next == tp
which is a state we should never run into. The issue only seems to
trigger under load in the tc control path.
What happens is that in tc_ctl_tfilter(), thread A allocates a new
tp, initializes it, sets tp_created to 1, and calls into tp->ops->change()
with it. In that classifier callback we had to unlock/lock the rtnl
mutex and returned with -EAGAIN. One reason why we need to drop there
is, for example, that we need to request an action module to be loaded.
This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning
after we loaded and found the requested action, we need to redo the
whole request so we don't race against others. While we had to unlock
rtnl in that time, thread B's request was processed next on that CPU.
Thread B added a new tp instance successfully to the classifier chain.
When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN
and destroying its tp instance which never got linked, we goto replay
and redo A's request.
This time when walking the classifier chain in tc_ctl_tfilter() for
checking for existing tp instances we had a priority match and found
the tp instance that was created and linked by thread B. Now calling
again into tp->ops->change() with that tp was successful and returned
without error.
tp_created was never cleared in the second round, thus kernel thinks
that we need to link it into the classifier chain (once again). tp and
*back point to the same object due to the match we had earlier on. Thus
for thread B's already public tp, we reset tp->next to tp itself and
link it into the chain, which eventually causes the mentioned endless
loop in tc_classify() once a packet hits the data path.
Fix is to clear tp_created at the beginning of each request, also when
we replay it. On the paths that can cause -EAGAIN we already destroy
the original tp instance we had and on replay we really need to start
from scratch. It seems that this issue was first introduced in commit
12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining
and avoid kernel panic when we use cls_cgroup").
Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup")
Reported-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Tested-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-12-21 20:04:11 +03:00
int tp_created ;
2005-04-17 02:20:36 +04:00
2018-05-31 09:52:53 +03:00
if ( ! netlink_ns_capable ( skb , net - > user_ns , CAP_NET_ADMIN ) )
2012-11-16 07:03:00 +04:00
return - EPERM ;
2013-03-25 21:36:33 +04:00
2005-04-17 02:20:36 +04:00
replay :
net, sched: fix soft lockup in tc_classify
Shahar reported a soft lockup in tc_classify(), where we run into an
endless loop when walking the classifier chain due to tp->next == tp
which is a state we should never run into. The issue only seems to
trigger under load in the tc control path.
What happens is that in tc_ctl_tfilter(), thread A allocates a new
tp, initializes it, sets tp_created to 1, and calls into tp->ops->change()
with it. In that classifier callback we had to unlock/lock the rtnl
mutex and returned with -EAGAIN. One reason why we need to drop there
is, for example, that we need to request an action module to be loaded.
This happens via tcf_exts_validate() -> tcf_action_init/_1() meaning
after we loaded and found the requested action, we need to redo the
whole request so we don't race against others. While we had to unlock
rtnl in that time, thread B's request was processed next on that CPU.
Thread B added a new tp instance successfully to the classifier chain.
When thread A returned grabbing the rtnl mutex again, propagating -EAGAIN
and destroying its tp instance which never got linked, we goto replay
and redo A's request.
This time when walking the classifier chain in tc_ctl_tfilter() for
checking for existing tp instances we had a priority match and found
the tp instance that was created and linked by thread B. Now calling
again into tp->ops->change() with that tp was successful and returned
without error.
tp_created was never cleared in the second round, thus kernel thinks
that we need to link it into the classifier chain (once again). tp and
*back point to the same object due to the match we had earlier on. Thus
for thread B's already public tp, we reset tp->next to tp itself and
link it into the chain, which eventually causes the mentioned endless
loop in tc_classify() once a packet hits the data path.
Fix is to clear tp_created at the beginning of each request, also when
we replay it. On the paths that can cause -EAGAIN we already destroy
the original tp instance we had and on replay we really need to start
from scratch. It seems that this issue was first introduced in commit
12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining
and avoid kernel panic when we use cls_cgroup").
Fixes: 12186be7d2e1 ("net_cls: fix unconfigured struct tcf_proto keeps chaining and avoid kernel panic when we use cls_cgroup")
Reported-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Tested-by: Shahar Klein <shahark@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-12-21 20:04:11 +03:00
tp_created = 0 ;
2018-10-10 23:00:58 +03:00
err = nlmsg_parse ( n , sizeof ( * t ) , tca , TCA_MAX , rtm_tca_policy , extack ) ;
2013-03-25 21:36:33 +04:00
if ( err < 0 )
return err ;
2012-06-27 08:48:50 +04:00
t = nlmsg_data ( n ) ;
2005-04-17 02:20:36 +04:00
protocol = TC_H_MIN ( t - > tcm_info ) ;
prio = TC_H_MAJ ( t - > tcm_info ) ;
2017-05-17 12:07:57 +03:00
prio_allocate = false ;
2005-04-17 02:20:36 +04:00
parent = t - > tcm_parent ;
cl = 0 ;
if ( prio = = 0 ) {
2018-05-31 09:52:53 +03:00
/* If no priority is provided by the user,
* we allocate one .
*/
if ( n - > nlmsg_flags & NLM_F_CREATE ) {
prio = TC_H_MAKE ( 0x80000000U , 0U ) ;
prio_allocate = true ;
} else {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Invalid filter command with priority of zero " ) ;
2005-04-17 02:20:36 +04:00
return - ENOENT ;
net, cls: allow for deleting all filters for given parent
Add a possibility where the user can just specify the parent and
all filters under that parent are then being purged. Currently,
for example for scripting, one needs to specify pref/prio to have
a well-defined number for 'tc filter del' command for addressing
the previously created instance or additionally filter handle in
case of priorities being the same. Improve usage by allowing the
option for tc to specify the parent and removing the whole chain
for that given parent.
Example usage after patch, no tc changes required:
# tc qdisc replace dev foo clsact
# tc filter add dev foo egress bpf da obj ./bpf.o
# tc filter add dev foo egress bpf da obj ./bpf.o
# tc filter show dev foo egress
filter protocol all pref 49151 bpf
filter protocol all pref 49151 bpf handle 0x1 bpf.o:[classifier] direct-action
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 bpf.o:[classifier] direct-action
# tc filter del dev foo egress
# tc filter show dev foo egress
#
Previously, RTM_DELTFILTER requests with invalid prio of 0 were
rejected, so only netlink requests with RTM_NEWTFILTER and NLM_F_CREATE
flag were allowed where the kernel would auto-generate a pref/prio.
We can piggyback on that and use prio of 0 as a wildcard for
requests of RTM_DELTFILTER.
For notifying tc netlink monitoring users (e.g. libnl uses this
for caching), there are two options, that is, sending individual
tfilter_notify() notifications for each tcf_proto, or sending a
single one indicating wildcard removal. I tried both and there
are pros and cons for each, eventually I decided for sending
individual tfilter_notify(), so that user space can support this
seamlessly and there won't be a mess of changing each and every
application to make sure expectations from the kernel won't break
when they don't understand single notification. Since linear chains
don't really scale, I expect only a handful of classifiers to be
attached at max for a given parent anyway.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-11 00:10:22 +03:00
}
2005-04-17 02:20:36 +04:00
}
/* Find head of filter chain. */
2018-05-31 09:52:53 +03:00
block = tcf_block_find ( net , & q , & parent , & cl ,
t - > tcm_ifindex , t - > tcm_block_index , extack ) ;
if ( IS_ERR ( block ) ) {
err = PTR_ERR ( block ) ;
goto errout ;
2017-02-09 16:38:58 +03:00
}
2017-05-17 12:08:01 +03:00
chain_index = tca [ TCA_CHAIN ] ? nla_get_u32 ( tca [ TCA_CHAIN ] ) : 0 ;
if ( chain_index > TC_ACT_EXT_VAL_MASK ) {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Specified chain index exceeds upper limit " ) ;
2017-05-17 12:08:01 +03:00
err = - EINVAL ;
goto errout ;
}
2018-05-31 09:52:53 +03:00
chain = tcf_chain_get ( block , chain_index , true ) ;
2017-05-17 12:08:01 +03:00
if ( ! chain ) {
2018-08-27 21:58:43 +03:00
NL_SET_ERR_MSG ( extack , " Cannot create specified filter chain " ) ;
2018-05-31 09:52:53 +03:00
err = - ENOMEM ;
net, cls: allow for deleting all filters for given parent
Add a possibility where the user can just specify the parent and
all filters under that parent are then being purged. Currently,
for example for scripting, one needs to specify pref/prio to have
a well-defined number for 'tc filter del' command for addressing
the previously created instance or additionally filter handle in
case of priorities being the same. Improve usage by allowing the
option for tc to specify the parent and removing the whole chain
for that given parent.
Example usage after patch, no tc changes required:
# tc qdisc replace dev foo clsact
# tc filter add dev foo egress bpf da obj ./bpf.o
# tc filter add dev foo egress bpf da obj ./bpf.o
# tc filter show dev foo egress
filter protocol all pref 49151 bpf
filter protocol all pref 49151 bpf handle 0x1 bpf.o:[classifier] direct-action
filter protocol all pref 49152 bpf
filter protocol all pref 49152 bpf handle 0x1 bpf.o:[classifier] direct-action
# tc filter del dev foo egress
# tc filter show dev foo egress
#
Previously, RTM_DELTFILTER requests with invalid prio of 0 were
rejected, so only netlink requests with RTM_NEWTFILTER and NLM_F_CREATE
flag were allowed where the kernel would auto-generate a pref/prio.
We can piggyback on that and use prio of 0 as a wildcard for
requests of RTM_DELTFILTER.
For notifying tc netlink monitoring users (e.g. libnl uses this
for caching), there are two options, that is, sending individual
tfilter_notify() notifications for each tcf_proto, or sending a
single one indicating wildcard removal. I tried both and there
are pros and cons for each, eventually I decided for sending
individual tfilter_notify(), so that user space can support this
seamlessly and there won't be a mess of changing each and every
application to make sure expectations from the kernel won't break
when they don't understand single notification. Since linear chains
don't really scale, I expect only a handful of classifiers to be
attached at max for a given parent anyway.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2016-06-11 00:10:22 +03:00
goto errout ;
}
2005-04-17 02:20:36 +04:00
2017-05-17 12:07:59 +03:00
tp = tcf_chain_tp_find ( chain , & chain_info , protocol ,
prio , prio_allocate ) ;
if ( IS_ERR ( tp ) ) {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Filter with specified priority/protocol not found " ) ;
2017-05-17 12:07:59 +03:00
err = PTR_ERR ( tp ) ;
goto errout ;
2005-04-17 02:20:36 +04:00
}
if ( tp = = NULL ) {
/* Proto-tcf does not exist, create new one */
2017-02-09 16:38:58 +03:00
if ( tca [ TCA_KIND ] = = NULL | | ! protocol ) {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Filter kind and protocol must be specified " ) ;
2017-02-09 16:38:58 +03:00
err = - EINVAL ;
2005-04-17 02:20:36 +04:00
goto errout ;
2017-02-09 16:38:58 +03:00
}
2005-04-17 02:20:36 +04:00
2018-05-31 09:52:53 +03:00
if ( ! ( n - > nlmsg_flags & NLM_F_CREATE ) ) {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter " ) ;
2017-02-09 16:38:58 +03:00
err = - ENOENT ;
2005-04-17 02:20:36 +04:00
goto errout ;
2017-02-09 16:38:58 +03:00
}
2005-04-17 02:20:36 +04:00
2017-05-17 12:07:57 +03:00
if ( prio_allocate )
2017-05-17 12:07:59 +03:00
prio = tcf_auto_prio ( tcf_chain_tp_prev ( & chain_info ) ) ;
2005-04-17 02:20:36 +04:00
2017-02-09 16:38:57 +03:00
tp = tcf_proto_create ( nla_data ( tca [ TCA_KIND ] ) ,
2018-01-18 19:20:50 +03:00
protocol , prio , chain , extack ) ;
2017-02-09 16:38:57 +03:00
if ( IS_ERR ( tp ) ) {
err = PTR_ERR ( tp ) ;
2005-04-17 02:20:36 +04:00
goto errout ;
}
2009-06-02 13:17:34 +04:00
tp_created = 1 ;
2017-02-09 16:38:58 +03:00
} else if ( tca [ TCA_KIND ] & & nla_strcmp ( tca [ TCA_KIND ] , tp - > ops - > kind ) ) {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Specified filter kind does not match existing one " ) ;
2017-02-09 16:38:58 +03:00
err = - EINVAL ;
2005-04-17 02:20:36 +04:00
goto errout ;
2017-02-09 16:38:58 +03:00
}
2005-04-17 02:20:36 +04:00
fh = tp - > ops - > get ( tp , t - > tcm_handle ) ;
2017-08-05 07:31:43 +03:00
if ( ! fh ) {
2018-05-31 09:52:53 +03:00
if ( ! ( n - > nlmsg_flags & NLM_F_CREATE ) ) {
2018-01-18 19:20:50 +03:00
NL_SET_ERR_MSG ( extack , " Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter " ) ;
2017-02-09 16:38:58 +03:00
err = - ENOENT ;
2005-04-17 02:20:36 +04:00
goto errout ;
2017-02-09 16:38:58 +03:00
}
2018-05-31 09:52:53 +03:00
} else if ( n - > nlmsg_flags & NLM_F_EXCL ) {
NL_SET_ERR_MSG ( extack , " Filter already exists " ) ;
err = - EEXIST ;
goto errout ;
2005-04-17 02:20:36 +04:00
}
2018-07-23 10:23:07 +03:00
if ( chain - > tmplt_ops & & chain - > tmplt_ops ! = tp - > ops ) {
NL_SET_ERR_MSG ( extack , " Chain template is set to a different filter kind " ) ;
err = - EINVAL ;
goto errout ;
}
2014-04-26 00:54:06 +04:00
err = tp - > ops - > change ( net , skb , tp , cl , t - > tcm_handle , tca , & fh ,
2018-01-18 19:20:51 +03:00
n - > nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE ,
extack ) ;
2009-06-02 13:17:34 +04:00
if ( err = = 0 ) {
2017-05-17 12:07:59 +03:00
if ( tp_created )
tcf_chain_tp_insert ( chain , & chain_info , tp ) ;
2018-01-17 13:46:51 +03:00
tfilter_notify ( net , skb , n , tp , block , q , parent , fh ,
2017-10-13 15:01:05 +03:00
RTM_NEWTFILTER , false ) ;
2009-06-02 13:17:34 +04:00
} else {
if ( tp_created )
2018-01-24 23:54:13 +03:00
tcf_proto_destroy ( tp , NULL ) ;
2009-06-02 13:17:34 +04:00
}
2005-04-17 02:20:36 +04:00
errout :
2017-05-17 12:08:01 +03:00
if ( chain )
tcf_chain_put ( chain ) ;
2018-09-24 19:22:53 +03:00
tcf_block_release ( q , block ) ;
2005-04-17 02:20:36 +04:00
if ( err = = - EAGAIN )
/* Replay the request. */
goto replay ;
return err ;
}
2018-05-31 09:52:53 +03:00
static int tc_del_tfilter ( struct sk_buff * skb , struct nlmsghdr * n ,
struct netlink_ext_ack * extack )
{
struct net * net = sock_net ( skb - > sk ) ;
struct nlattr * tca [ TCA_MAX + 1 ] ;
struct tcmsg * t ;
u32 protocol ;
u32 prio ;
u32 parent ;
u32 chain_index ;
struct Qdisc * q = NULL ;
struct tcf_chain_info chain_info ;
struct tcf_chain * chain = NULL ;
struct tcf_block * block ;
struct tcf_proto * tp = NULL ;
unsigned long cl = 0 ;
void * fh = NULL ;
int err ;
if ( ! netlink_ns_capable ( skb , net - > user_ns , CAP_NET_ADMIN ) )
return - EPERM ;
2018-10-10 23:00:58 +03:00
err = nlmsg_parse ( n , sizeof ( * t ) , tca , TCA_MAX , rtm_tca_policy , extack ) ;
2018-05-31 09:52:53 +03:00
if ( err < 0 )
return err ;
t = nlmsg_data ( n ) ;
protocol = TC_H_MIN ( t - > tcm_info ) ;
prio = TC_H_MAJ ( t - > tcm_info ) ;
parent = t - > tcm_parent ;
if ( prio = = 0 & & ( protocol | | t - > tcm_handle | | tca [ TCA_KIND ] ) ) {
NL_SET_ERR_MSG ( extack , " Cannot flush filters with protocol, handle or kind set " ) ;
return - ENOENT ;
}
/* Find head of filter chain. */
block = tcf_block_find ( net , & q , & parent , & cl ,
t - > tcm_ifindex , t - > tcm_block_index , extack ) ;
if ( IS_ERR ( block ) ) {
err = PTR_ERR ( block ) ;
goto errout ;
}
chain_index = tca [ TCA_CHAIN ] ? nla_get_u32 ( tca [ TCA_CHAIN ] ) : 0 ;
if ( chain_index > TC_ACT_EXT_VAL_MASK ) {
NL_SET_ERR_MSG ( extack , " Specified chain index exceeds upper limit " ) ;
err = - EINVAL ;
goto errout ;
}
chain = tcf_chain_get ( block , chain_index , false ) ;
if ( ! chain ) {
2018-08-03 12:08:47 +03:00
/* User requested flush on non-existent chain. Nothing to do,
* so just return success .
*/
if ( prio = = 0 ) {
err = 0 ;
goto errout ;
}
2018-05-31 09:52:53 +03:00
NL_SET_ERR_MSG ( extack , " Cannot find specified filter chain " ) ;
2018-08-27 21:58:44 +03:00
err = - ENOENT ;
2018-05-31 09:52:53 +03:00
goto errout ;
}
if ( prio = = 0 ) {
tfilter_notify_chain ( net , skb , block , q , parent , n ,
chain , RTM_DELTFILTER ) ;
tcf_chain_flush ( chain ) ;
err = 0 ;
goto errout ;
}
tp = tcf_chain_tp_find ( chain , & chain_info , protocol ,
prio , false ) ;
if ( ! tp | | IS_ERR ( tp ) ) {
NL_SET_ERR_MSG ( extack , " Filter with specified priority/protocol not found " ) ;
2018-06-04 18:32:23 +03:00
err = tp ? PTR_ERR ( tp ) : - ENOENT ;
2018-05-31 09:52:53 +03:00
goto errout ;
} else if ( tca [ TCA_KIND ] & & nla_strcmp ( tca [ TCA_KIND ] , tp - > ops - > kind ) ) {
NL_SET_ERR_MSG ( extack , " Specified filter kind does not match existing one " ) ;
err = - EINVAL ;
goto errout ;
}
fh = tp - > ops - > get ( tp , t - > tcm_handle ) ;
if ( ! fh ) {
if ( t - > tcm_handle = = 0 ) {
tcf_chain_tp_remove ( chain , & chain_info , tp ) ;
tfilter_notify ( net , skb , n , tp , block , q , parent , fh ,
RTM_DELTFILTER , false ) ;
tcf_proto_destroy ( tp , extack ) ;
err = 0 ;
} else {
NL_SET_ERR_MSG ( extack , " Specified filter handle not found " ) ;
err = - ENOENT ;
}
} else {
bool last ;
err = tfilter_del_notify ( net , skb , n , tp , block ,
q , parent , fh , false , & last ,
extack ) ;
if ( err )
goto errout ;
if ( last ) {
tcf_chain_tp_remove ( chain , & chain_info , tp ) ;
tcf_proto_destroy ( tp , extack ) ;
}
}
errout :
if ( chain )
tcf_chain_put ( chain ) ;
2018-09-24 19:22:53 +03:00
tcf_block_release ( q , block ) ;
2018-05-31 09:52:53 +03:00
return err ;
}
static int tc_get_tfilter ( struct sk_buff * skb , struct nlmsghdr * n ,
struct netlink_ext_ack * extack )
{
struct net * net = sock_net ( skb - > sk ) ;
struct nlattr * tca [ TCA_MAX + 1 ] ;
struct tcmsg * t ;
u32 protocol ;
u32 prio ;
u32 parent ;
u32 chain_index ;
struct Qdisc * q = NULL ;
struct tcf_chain_info chain_info ;
struct tcf_chain * chain = NULL ;
struct tcf_block * block ;
struct tcf_proto * tp = NULL ;
unsigned long cl = 0 ;
void * fh = NULL ;
int err ;
2018-10-10 23:00:58 +03:00
err = nlmsg_parse ( n , sizeof ( * t ) , tca , TCA_MAX , rtm_tca_policy , extack ) ;
2018-05-31 09:52:53 +03:00
if ( err < 0 )
return err ;
t = nlmsg_data ( n ) ;
protocol = TC_H_MIN ( t - > tcm_info ) ;
prio = TC_H_MAJ ( t - > tcm_info ) ;
parent = t - > tcm_parent ;
if ( prio = = 0 ) {
NL_SET_ERR_MSG ( extack , " Invalid filter command with priority of zero " ) ;
return - ENOENT ;
}
/* Find head of filter chain. */
block = tcf_block_find ( net , & q , & parent , & cl ,
t - > tcm_ifindex , t - > tcm_block_index , extack ) ;
if ( IS_ERR ( block ) ) {
err = PTR_ERR ( block ) ;
goto errout ;
}
chain_index = tca [ TCA_CHAIN ] ? nla_get_u32 ( tca [ TCA_CHAIN ] ) : 0 ;
if ( chain_index > TC_ACT_EXT_VAL_MASK ) {
NL_SET_ERR_MSG ( extack , " Specified chain index exceeds upper limit " ) ;
err = - EINVAL ;
goto errout ;
}
chain = tcf_chain_get ( block , chain_index , false ) ;
if ( ! chain ) {
NL_SET_ERR_MSG ( extack , " Cannot find specified filter chain " ) ;
err = - EINVAL ;
goto errout ;
}
tp = tcf_chain_tp_find ( chain , & chain_info , protocol ,
prio , false ) ;
if ( ! tp | | IS_ERR ( tp ) ) {
NL_SET_ERR_MSG ( extack , " Filter with specified priority/protocol not found " ) ;
2018-06-04 18:32:23 +03:00
err = tp ? PTR_ERR ( tp ) : - ENOENT ;
2018-05-31 09:52:53 +03:00
goto errout ;
} else if ( tca [ TCA_KIND ] & & nla_strcmp ( tca [ TCA_KIND ] , tp - > ops - > kind ) ) {
NL_SET_ERR_MSG ( extack , " Specified filter kind does not match existing one " ) ;
err = - EINVAL ;
goto errout ;
}
fh = tp - > ops - > get ( tp , t - > tcm_handle ) ;
if ( ! fh ) {
NL_SET_ERR_MSG ( extack , " Specified filter handle not found " ) ;
err = - ENOENT ;
} else {
err = tfilter_notify ( net , skb , n , tp , block , q , parent ,
fh , RTM_NEWTFILTER , true ) ;
if ( err < 0 )
NL_SET_ERR_MSG ( extack , " Failed to send filter notify message " ) ;
}
errout :
if ( chain )
tcf_chain_put ( chain ) ;
2018-09-24 19:22:53 +03:00
tcf_block_release ( q , block ) ;
2018-05-31 09:52:53 +03:00
return err ;
}
2008-01-21 13:26:41 +03:00
struct tcf_dump_args {
2005-04-17 02:20:36 +04:00
struct tcf_walker w ;
struct sk_buff * skb ;
struct netlink_callback * cb ;
2018-01-17 13:46:51 +03:00
struct tcf_block * block ;
2017-10-13 15:01:05 +03:00
struct Qdisc * q ;
u32 parent ;
2005-04-17 02:20:36 +04:00
} ;
2017-08-05 07:31:43 +03:00
static int tcf_node_dump ( struct tcf_proto * tp , void * n , struct tcf_walker * arg )
2005-04-17 02:20:36 +04:00
{
2008-01-21 13:26:41 +03:00
struct tcf_dump_args * a = ( void * ) arg ;
2014-01-10 04:14:01 +04:00
struct net * net = sock_net ( a - > skb - > sk ) ;
2005-04-17 02:20:36 +04:00
2018-01-17 13:46:51 +03:00
return tcf_fill_node ( net , a - > skb , tp , a - > block , a - > q , a - > parent ,
2017-10-13 15:01:05 +03:00
n , NETLINK_CB ( a - > cb - > skb ) . portid ,
2016-09-18 15:45:33 +03:00
a - > cb - > nlh - > nlmsg_seq , NLM_F_MULTI ,
RTM_NEWTFILTER ) ;
2005-04-17 02:20:36 +04:00
}
2017-10-13 15:01:05 +03:00
static bool tcf_chain_dump ( struct tcf_chain * chain , struct Qdisc * q , u32 parent ,
struct sk_buff * skb , struct netlink_callback * cb ,
2017-05-17 12:08:00 +03:00
long index_start , long * p_index )
{
struct net * net = sock_net ( skb - > sk ) ;
2018-01-17 13:46:51 +03:00
struct tcf_block * block = chain - > block ;
2017-05-17 12:08:00 +03:00
struct tcmsg * tcm = nlmsg_data ( cb - > nlh ) ;
struct tcf_dump_args arg ;
struct tcf_proto * tp ;
for ( tp = rtnl_dereference ( chain - > filter_chain ) ;
tp ; tp = rtnl_dereference ( tp - > next ) , ( * p_index ) + + ) {
if ( * p_index < index_start )
continue ;
if ( TC_H_MAJ ( tcm - > tcm_info ) & &
TC_H_MAJ ( tcm - > tcm_info ) ! = tp - > prio )
continue ;
if ( TC_H_MIN ( tcm - > tcm_info ) & &
TC_H_MIN ( tcm - > tcm_info ) ! = tp - > protocol )
continue ;
if ( * p_index > index_start )
memset ( & cb - > args [ 1 ] , 0 ,
sizeof ( cb - > args ) - sizeof ( cb - > args [ 0 ] ) ) ;
if ( cb - > args [ 1 ] = = 0 ) {
2018-07-17 15:58:14 +03:00
if ( tcf_fill_node ( net , skb , tp , block , q , parent , NULL ,
2017-05-17 12:08:00 +03:00
NETLINK_CB ( cb - > skb ) . portid ,
cb - > nlh - > nlmsg_seq , NLM_F_MULTI ,
RTM_NEWTFILTER ) < = 0 )
2017-05-17 12:08:01 +03:00
return false ;
2017-05-17 12:08:00 +03:00
cb - > args [ 1 ] = 1 ;
}
if ( ! tp - > ops - > walk )
continue ;
arg . w . fn = tcf_node_dump ;
arg . skb = skb ;
arg . cb = cb ;
2018-01-17 13:46:51 +03:00
arg . block = block ;
2017-10-13 15:01:05 +03:00
arg . q = q ;
arg . parent = parent ;
2017-05-17 12:08:00 +03:00
arg . w . stop = 0 ;
arg . w . skip = cb - > args [ 1 ] - 1 ;
arg . w . count = 0 ;
2018-07-09 13:29:11 +03:00
arg . w . cookie = cb - > args [ 2 ] ;
2017-05-17 12:08:00 +03:00
tp - > ops - > walk ( tp , & arg . w ) ;
2018-07-09 13:29:11 +03:00
cb - > args [ 2 ] = arg . w . cookie ;
2017-05-17 12:08:00 +03:00
cb - > args [ 1 ] = arg . w . count + 1 ;
if ( arg . w . stop )
2017-05-17 12:08:01 +03:00
return false ;
2017-05-17 12:08:00 +03:00
}
2017-05-17 12:08:01 +03:00
return true ;
2017-05-17 12:08:00 +03:00
}
2009-11-06 07:57:26 +03:00
/* called with RTNL */
2005-04-17 02:20:36 +04:00
static int tc_dump_tfilter ( struct sk_buff * skb , struct netlink_callback * cb )
{
2008-03-25 20:26:21 +03:00
struct net * net = sock_net ( skb - > sk ) ;
2017-05-17 12:08:01 +03:00
struct nlattr * tca [ TCA_MAX + 1 ] ;
2018-01-17 13:46:51 +03:00
struct Qdisc * q = NULL ;
2017-05-17 12:07:55 +03:00
struct tcf_block * block ;
2017-05-17 12:07:59 +03:00
struct tcf_chain * chain ;
2012-06-27 08:48:50 +04:00
struct tcmsg * tcm = nlmsg_data ( cb - > nlh ) ;
2017-05-17 12:08:00 +03:00
long index_start ;
long index ;
2017-10-13 15:01:05 +03:00
u32 parent ;
2017-05-17 12:08:01 +03:00
int err ;
2005-04-17 02:20:36 +04:00
2013-03-27 10:47:04 +04:00
if ( nlmsg_len ( cb - > nlh ) < sizeof ( * tcm ) )
2005-04-17 02:20:36 +04:00
return skb - > len ;
2017-05-17 12:08:01 +03:00
2018-10-08 06:16:24 +03:00
err = nlmsg_parse ( cb - > nlh , sizeof ( * tcm ) , tca , TCA_MAX , NULL ,
cb - > extack ) ;
2017-05-17 12:08:01 +03:00
if ( err )
return err ;
2018-01-17 13:46:51 +03:00
if ( tcm - > tcm_ifindex = = TCM_IFINDEX_MAGIC_BLOCK ) {
2018-09-24 19:22:58 +03:00
block = tcf_block_refcnt_get ( net , tcm - > tcm_block_index ) ;
2018-01-17 13:46:51 +03:00
if ( ! block )
goto out ;
2018-01-18 18:14:49 +03:00
/* If we work with block index, q is NULL and parent value
* will never be used in the following code . The check
* in tcf_fill_node prevents it . However , compiler does not
* see that far , so set parent to zero to silence the warning
* about parent being uninitialized .
*/
parent = 0 ;
2017-10-13 15:01:05 +03:00
} else {
2018-01-17 13:46:51 +03:00
const struct Qdisc_class_ops * cops ;
struct net_device * dev ;
unsigned long cl = 0 ;
dev = __dev_get_by_index ( net , tcm - > tcm_ifindex ) ;
if ( ! dev )
return skb - > len ;
parent = tcm - > tcm_parent ;
if ( ! parent ) {
q = dev - > qdisc ;
parent = q - > handle ;
} else {
q = qdisc_lookup ( dev , TC_H_MAJ ( tcm - > tcm_parent ) ) ;
}
if ( ! q )
goto out ;
cops = q - > ops - > cl_ops ;
if ( ! cops )
net_sched: remove tc class reference counting
For TC classes, their ->get() and ->put() are always paired, and the
reference counting is completely useless, because:
1) For class modification and dumping paths, we already hold RTNL lock,
so all of these ->get(),->change(),->put() are atomic.
2) For filter bindiing/unbinding, we use other reference counter than
this one, and they should have RTNL lock too.
3) For ->qlen_notify(), it is special because it is called on ->enqueue()
path, but we already hold qdisc tree lock there, and we hold this
tree lock when graft or delete the class too, so it should not be gone
or changed until we release the tree lock.
Therefore, this patch removes ->get() and ->put(), but:
1) Adds a new ->find() to find the pointer to a class by classid, no
refcnt.
2) Move the original class destroy upon the last refcnt into ->delete(),
right after releasing tree lock. This is fine because the class is
already removed from hash when holding the lock.
For those who also use ->put() as ->unbind(), just rename them to reflect
this change.
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2017-08-25 02:51:29 +03:00
goto out ;
2018-01-17 13:46:51 +03:00
if ( ! cops - > tcf_block )
goto out ;
if ( TC_H_MIN ( tcm - > tcm_parent ) ) {
cl = cops - > find ( q , tcm - > tcm_parent ) ;
if ( cl = = 0 )
goto out ;
}
block = cops - > tcf_block ( q , cl , NULL ) ;
if ( ! block )
goto out ;
if ( tcf_block_shared ( block ) )
q = NULL ;
2005-04-17 02:20:36 +04:00
}
2017-05-17 12:08:00 +03:00
index_start = cb - > args [ 0 ] ;
index = 0 ;
2017-05-17 12:08:01 +03:00
list_for_each_entry ( chain , & block - > chain_list , list ) {
if ( tca [ TCA_CHAIN ] & &
nla_get_u32 ( tca [ TCA_CHAIN ] ) ! = chain - > index )
continue ;
2017-10-13 15:01:05 +03:00
if ( ! tcf_chain_dump ( chain , q , parent , skb , cb ,
2018-02-19 23:32:51 +03:00
index_start , & index ) ) {
err = - EMSGSIZE ;
2017-05-17 12:08:01 +03:00
break ;
2018-02-19 23:32:51 +03:00
}
2017-05-17 12:08:01 +03:00
}
2018-09-24 19:22:58 +03:00
if ( tcm - > tcm_ifindex = = TCM_IFINDEX_MAGIC_BLOCK )
tcf_block_refcnt_put ( block ) ;
2017-05-17 12:08:00 +03:00
cb - > args [ 0 ] = index ;
2005-04-17 02:20:36 +04:00
out :
2018-02-19 23:32:51 +03:00
/* If we did no progress, the error (EMSGSIZE) is real */
if ( skb - > len = = 0 & & err )
return err ;
2005-04-17 02:20:36 +04:00
return skb - > len ;
}
2018-07-23 10:23:06 +03:00
static int tc_chain_fill_node ( struct tcf_chain * chain , struct net * net ,
struct sk_buff * skb , struct tcf_block * block ,
u32 portid , u32 seq , u16 flags , int event )
{
unsigned char * b = skb_tail_pointer ( skb ) ;
2018-07-23 10:23:07 +03:00
const struct tcf_proto_ops * ops ;
2018-07-23 10:23:06 +03:00
struct nlmsghdr * nlh ;
struct tcmsg * tcm ;
2018-07-23 10:23:07 +03:00
void * priv ;
ops = chain - > tmplt_ops ;
priv = chain - > tmplt_priv ;
2018-07-23 10:23:06 +03:00
nlh = nlmsg_put ( skb , portid , seq , event , sizeof ( * tcm ) , flags ) ;
if ( ! nlh )
goto out_nlmsg_trim ;
tcm = nlmsg_data ( nlh ) ;
tcm - > tcm_family = AF_UNSPEC ;
tcm - > tcm__pad1 = 0 ;
tcm - > tcm__pad2 = 0 ;
tcm - > tcm_handle = 0 ;
if ( block - > q ) {
tcm - > tcm_ifindex = qdisc_dev ( block - > q ) - > ifindex ;
tcm - > tcm_parent = block - > q - > handle ;
} else {
tcm - > tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK ;
tcm - > tcm_block_index = block - > index ;
}
if ( nla_put_u32 ( skb , TCA_CHAIN , chain - > index ) )
goto nla_put_failure ;
2018-07-23 10:23:07 +03:00
if ( ops ) {
if ( nla_put_string ( skb , TCA_KIND , ops - > kind ) )
goto nla_put_failure ;
if ( ops - > tmplt_dump ( skb , net , priv ) < 0 )
goto nla_put_failure ;
}
2018-07-23 10:23:06 +03:00
nlh - > nlmsg_len = skb_tail_pointer ( skb ) - b ;
return skb - > len ;
out_nlmsg_trim :
nla_put_failure :
nlmsg_trim ( skb , b ) ;
return - EMSGSIZE ;
}
static int tc_chain_notify ( struct tcf_chain * chain , struct sk_buff * oskb ,
u32 seq , u16 flags , int event , bool unicast )
{
u32 portid = oskb ? NETLINK_CB ( oskb ) . portid : 0 ;
struct tcf_block * block = chain - > block ;
struct net * net = block - > net ;
struct sk_buff * skb ;
skb = alloc_skb ( NLMSG_GOODSIZE , GFP_KERNEL ) ;
if ( ! skb )
return - ENOBUFS ;
if ( tc_chain_fill_node ( chain , net , skb , block , portid ,
seq , flags , event ) < = 0 ) {
kfree_skb ( skb ) ;
return - EINVAL ;
}
if ( unicast )
return netlink_unicast ( net - > rtnl , skb , portid , MSG_DONTWAIT ) ;
return rtnetlink_send ( skb , net , portid , RTNLGRP_TC , flags & NLM_F_ECHO ) ;
}
2018-07-23 10:23:07 +03:00
static int tc_chain_tmplt_add ( struct tcf_chain * chain , struct net * net ,
struct nlattr * * tca ,
struct netlink_ext_ack * extack )
{
const struct tcf_proto_ops * ops ;
void * tmplt_priv ;
/* If kind is not set, user did not specify template. */
if ( ! tca [ TCA_KIND ] )
return 0 ;
ops = tcf_proto_lookup_ops ( nla_data ( tca [ TCA_KIND ] ) , extack ) ;
if ( IS_ERR ( ops ) )
return PTR_ERR ( ops ) ;
if ( ! ops - > tmplt_create | | ! ops - > tmplt_destroy | | ! ops - > tmplt_dump ) {
NL_SET_ERR_MSG ( extack , " Chain templates are not supported with specified classifier " ) ;
return - EOPNOTSUPP ;
}
tmplt_priv = ops - > tmplt_create ( net , chain , tca , extack ) ;
if ( IS_ERR ( tmplt_priv ) ) {
module_put ( ops - > owner ) ;
return PTR_ERR ( tmplt_priv ) ;
}
chain - > tmplt_ops = ops ;
chain - > tmplt_priv = tmplt_priv ;
return 0 ;
}
static void tc_chain_tmplt_del ( struct tcf_chain * chain )
{
const struct tcf_proto_ops * ops = chain - > tmplt_ops ;
/* If template ops are set, no work to do for us. */
if ( ! ops )
return ;
ops - > tmplt_destroy ( chain - > tmplt_priv ) ;
module_put ( ops - > owner ) ;
}
2018-07-23 10:23:06 +03:00
/* Add/delete/get a chain */
static int tc_ctl_chain ( struct sk_buff * skb , struct nlmsghdr * n ,
struct netlink_ext_ack * extack )
{
struct net * net = sock_net ( skb - > sk ) ;
struct nlattr * tca [ TCA_MAX + 1 ] ;
struct tcmsg * t ;
u32 parent ;
u32 chain_index ;
struct Qdisc * q = NULL ;
struct tcf_chain * chain = NULL ;
struct tcf_block * block ;
unsigned long cl ;
int err ;
if ( n - > nlmsg_type ! = RTM_GETCHAIN & &
! netlink_ns_capable ( skb , net - > user_ns , CAP_NET_ADMIN ) )
return - EPERM ;
replay :
2018-10-10 23:00:58 +03:00
err = nlmsg_parse ( n , sizeof ( * t ) , tca , TCA_MAX , rtm_tca_policy , extack ) ;
2018-07-23 10:23:06 +03:00
if ( err < 0 )
return err ;
t = nlmsg_data ( n ) ;
parent = t - > tcm_parent ;
cl = 0 ;
block = tcf_block_find ( net , & q , & parent , & cl ,
t - > tcm_ifindex , t - > tcm_block_index , extack ) ;
if ( IS_ERR ( block ) )
return PTR_ERR ( block ) ;
chain_index = tca [ TCA_CHAIN ] ? nla_get_u32 ( tca [ TCA_CHAIN ] ) : 0 ;
if ( chain_index > TC_ACT_EXT_VAL_MASK ) {
NL_SET_ERR_MSG ( extack , " Specified chain index exceeds upper limit " ) ;
2018-09-24 19:22:53 +03:00
err = - EINVAL ;
goto errout_block ;
2018-07-23 10:23:06 +03:00
}
chain = tcf_chain_lookup ( block , chain_index ) ;
if ( n - > nlmsg_type = = RTM_NEWCHAIN ) {
if ( chain ) {
2018-08-01 13:36:55 +03:00
if ( tcf_chain_held_by_acts_only ( chain ) ) {
2018-07-27 10:45:05 +03:00
/* The chain exists only because there is
2018-08-01 13:36:55 +03:00
* some action referencing it .
2018-07-27 10:45:05 +03:00
*/
tcf_chain_hold ( chain ) ;
} else {
NL_SET_ERR_MSG ( extack , " Filter chain already exists " ) ;
2018-09-24 19:22:53 +03:00
err = - EEXIST ;
goto errout_block ;
2018-07-27 10:45:05 +03:00
}
} else {
if ( ! ( n - > nlmsg_flags & NLM_F_CREATE ) ) {
NL_SET_ERR_MSG ( extack , " Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain " ) ;
2018-09-24 19:22:53 +03:00
err = - ENOENT ;
goto errout_block ;
2018-07-27 10:45:05 +03:00
}
chain = tcf_chain_create ( block , chain_index ) ;
if ( ! chain ) {
NL_SET_ERR_MSG ( extack , " Failed to create filter chain " ) ;
2018-09-24 19:22:53 +03:00
err = - ENOMEM ;
goto errout_block ;
2018-07-27 10:45:05 +03:00
}
2018-07-23 10:23:06 +03:00
}
} else {
2018-08-01 13:36:55 +03:00
if ( ! chain | | tcf_chain_held_by_acts_only ( chain ) ) {
2018-07-23 10:23:06 +03:00
NL_SET_ERR_MSG ( extack , " Cannot find specified filter chain " ) ;
2018-09-24 19:22:53 +03:00
err = - EINVAL ;
goto errout_block ;
2018-07-23 10:23:06 +03:00
}
tcf_chain_hold ( chain ) ;
}
switch ( n - > nlmsg_type ) {
case RTM_NEWCHAIN :
2018-07-23 10:23:07 +03:00
err = tc_chain_tmplt_add ( chain , net , tca , extack ) ;
if ( err )
goto errout ;
2018-07-23 10:23:06 +03:00
/* In case the chain was successfully added, take a reference
* to the chain . This ensures that an empty chain
* does not disappear at the end of this function .
*/
tcf_chain_hold ( chain ) ;
chain - > explicitly_created = true ;
tc_chain_notify ( chain , NULL , 0 , NLM_F_CREATE | NLM_F_EXCL ,
RTM_NEWCHAIN , false ) ;
break ;
case RTM_DELCHAIN :
2018-09-12 00:22:23 +03:00
tfilter_notify_chain ( net , skb , block , q , parent , n ,
chain , RTM_DELTFILTER ) ;
2018-07-23 10:23:06 +03:00
/* Flush the chain first as the user requested chain removal. */
tcf_chain_flush ( chain ) ;
/* In case the chain was successfully deleted, put a reference
* to the chain previously taken during addition .
*/
tcf_chain_put_explicitly_created ( chain ) ;
2018-07-26 19:27:58 +03:00
chain - > explicitly_created = false ;
2018-07-23 10:23:06 +03:00
break ;
case RTM_GETCHAIN :
err = tc_chain_notify ( chain , skb , n - > nlmsg_seq ,
n - > nlmsg_seq , n - > nlmsg_type , true ) ;
if ( err < 0 )
NL_SET_ERR_MSG ( extack , " Failed to send chain notify message " ) ;
break ;
default :
err = - EOPNOTSUPP ;
NL_SET_ERR_MSG ( extack , " Unsupported message type " ) ;
goto errout ;
}
errout :
tcf_chain_put ( chain ) ;
2018-09-24 19:22:53 +03:00
errout_block :
tcf_block_release ( q , block ) ;
2018-07-23 10:23:06 +03:00
if ( err = = - EAGAIN )
/* Replay the request. */
goto replay ;
return err ;
}
/* called with RTNL */
static int tc_dump_chain ( struct sk_buff * skb , struct netlink_callback * cb )
{
struct net * net = sock_net ( skb - > sk ) ;
struct nlattr * tca [ TCA_MAX + 1 ] ;
struct Qdisc * q = NULL ;
struct tcf_block * block ;
struct tcf_chain * chain ;
struct tcmsg * tcm = nlmsg_data ( cb - > nlh ) ;
long index_start ;
long index ;
u32 parent ;
int err ;
if ( nlmsg_len ( cb - > nlh ) < sizeof ( * tcm ) )
return skb - > len ;
2018-10-10 23:00:58 +03:00
err = nlmsg_parse ( cb - > nlh , sizeof ( * tcm ) , tca , TCA_MAX , rtm_tca_policy ,
2018-10-08 06:16:24 +03:00
cb - > extack ) ;
2018-07-23 10:23:06 +03:00
if ( err )
return err ;
if ( tcm - > tcm_ifindex = = TCM_IFINDEX_MAGIC_BLOCK ) {
2018-09-24 19:22:58 +03:00
block = tcf_block_refcnt_get ( net , tcm - > tcm_block_index ) ;
2018-07-23 10:23:06 +03:00
if ( ! block )
goto out ;
/* If we work with block index, q is NULL and parent value
* will never be used in the following code . The check
* in tcf_fill_node prevents it . However , compiler does not
* see that far , so set parent to zero to silence the warning
* about parent being uninitialized .
*/
parent = 0 ;
} else {
const struct Qdisc_class_ops * cops ;
struct net_device * dev ;
unsigned long cl = 0 ;
dev = __dev_get_by_index ( net , tcm - > tcm_ifindex ) ;
if ( ! dev )
return skb - > len ;
parent = tcm - > tcm_parent ;
if ( ! parent ) {
q = dev - > qdisc ;
parent = q - > handle ;
} else {
q = qdisc_lookup ( dev , TC_H_MAJ ( tcm - > tcm_parent ) ) ;
}
if ( ! q )
goto out ;
cops = q - > ops - > cl_ops ;
if ( ! cops )
goto out ;
if ( ! cops - > tcf_block )
goto out ;
if ( TC_H_MIN ( tcm - > tcm_parent ) ) {
cl = cops - > find ( q , tcm - > tcm_parent ) ;
if ( cl = = 0 )
goto out ;
}
block = cops - > tcf_block ( q , cl , NULL ) ;
if ( ! block )
goto out ;
if ( tcf_block_shared ( block ) )
q = NULL ;
}
index_start = cb - > args [ 0 ] ;
index = 0 ;
list_for_each_entry ( chain , & block - > chain_list , list ) {
if ( ( tca [ TCA_CHAIN ] & &
nla_get_u32 ( tca [ TCA_CHAIN ] ) ! = chain - > index ) )
continue ;
if ( index < index_start ) {
index + + ;
continue ;
}
2018-08-01 13:36:55 +03:00
if ( tcf_chain_held_by_acts_only ( chain ) )
2018-07-27 10:45:05 +03:00
continue ;
2018-07-23 10:23:06 +03:00
err = tc_chain_fill_node ( chain , net , skb , block ,
NETLINK_CB ( cb - > skb ) . portid ,
cb - > nlh - > nlmsg_seq , NLM_F_MULTI ,
RTM_NEWCHAIN ) ;
if ( err < = 0 )
break ;
index + + ;
}
2018-09-24 19:22:58 +03:00
if ( tcm - > tcm_ifindex = = TCM_IFINDEX_MAGIC_BLOCK )
tcf_block_refcnt_put ( block ) ;
2018-07-23 10:23:06 +03:00
cb - > args [ 0 ] = index ;
out :
/* If we did no progress, the error (EMSGSIZE) is real */
if ( skb - > len = = 0 & & err )
return err ;
return skb - > len ;
}
2014-09-25 21:26:37 +04:00
void tcf_exts_destroy ( struct tcf_exts * exts )
2005-04-17 02:20:36 +04:00
{
# ifdef CONFIG_NET_CLS_ACT
2018-07-05 17:24:33 +03:00
tcf_action_destroy ( exts - > actions , TCA_ACT_UNBIND ) ;
2016-08-14 08:35:00 +03:00
kfree ( exts - > actions ) ;
exts - > nr_actions = 0 ;
2005-04-17 02:20:36 +04:00
# endif
}
2008-01-21 13:26:41 +03:00
EXPORT_SYMBOL ( tcf_exts_destroy ) ;
2005-04-17 02:20:36 +04:00
2013-01-14 09:15:39 +04:00
int tcf_exts_validate ( struct net * net , struct tcf_proto * tp , struct nlattr * * tb ,
2018-01-18 19:20:52 +03:00
struct nlattr * rate_tlv , struct tcf_exts * exts , bool ovr ,
struct netlink_ext_ack * extack )
2005-04-17 02:20:36 +04:00
{
# ifdef CONFIG_NET_CLS_ACT
{
struct tc_action * act ;
2018-03-09 00:59:17 +03:00
size_t attr_size = 0 ;
2005-04-17 02:20:36 +04:00
2013-12-16 08:15:07 +04:00
if ( exts - > police & & tb [ exts - > police ] ) {
2017-05-17 12:08:02 +03:00
act = tcf_action_init_1 ( net , tp , tb [ exts - > police ] ,
rate_tlv , " police " , ovr ,
2018-07-05 17:24:25 +03:00
TCA_ACT_BIND , true , extack ) ;
2008-01-24 07:33:13 +03:00
if ( IS_ERR ( act ) )
return PTR_ERR ( act ) ;
2005-04-17 02:20:36 +04:00
2013-12-16 08:15:05 +04:00
act - > type = exts - > type = TCA_OLD_COMPAT ;
2016-08-14 08:35:00 +03:00
exts - > actions [ 0 ] = act ;
exts - > nr_actions = 1 ;
2013-12-16 08:15:07 +04:00
} else if ( exts - > action & & tb [ exts - > action ] ) {
2018-07-05 17:24:33 +03:00
int err ;
2016-08-14 08:35:00 +03:00
2017-05-17 12:08:02 +03:00
err = tcf_action_init ( net , tp , tb [ exts - > action ] ,
rate_tlv , NULL , ovr , TCA_ACT_BIND ,
2018-07-05 17:24:33 +03:00
exts - > actions , & attr_size , true ,
2018-07-05 17:24:25 +03:00
extack ) ;
2018-07-05 17:24:33 +03:00
if ( err < 0 )
2013-12-16 08:15:05 +04:00
return err ;
2018-07-05 17:24:33 +03:00
exts - > nr_actions = err ;
2005-04-17 02:20:36 +04:00
}
2017-11-07 00:47:19 +03:00
exts - > net = net ;
2005-04-17 02:20:36 +04:00
}
# else
2013-12-16 08:15:07 +04:00
if ( ( exts - > action & & tb [ exts - > action ] ) | |
2018-01-18 19:20:52 +03:00
( exts - > police & & tb [ exts - > police ] ) ) {
NL_SET_ERR_MSG ( extack , " Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT) " ) ;
2005-04-17 02:20:36 +04:00
return - EOPNOTSUPP ;
2018-01-18 19:20:52 +03:00
}
2005-04-17 02:20:36 +04:00
# endif
return 0 ;
}
2008-01-21 13:26:41 +03:00
EXPORT_SYMBOL ( tcf_exts_validate ) ;
2005-04-17 02:20:36 +04:00
2017-08-04 15:29:15 +03:00
void tcf_exts_change ( struct tcf_exts * dst , struct tcf_exts * src )
2005-04-17 02:20:36 +04:00
{
# ifdef CONFIG_NET_CLS_ACT
2016-08-14 08:35:00 +03:00
struct tcf_exts old = * dst ;
2017-08-04 15:29:15 +03:00
* dst = * src ;
2016-08-14 08:35:00 +03:00
tcf_exts_destroy ( & old ) ;
2005-04-17 02:20:36 +04:00
# endif
}
2008-01-21 13:26:41 +03:00
EXPORT_SYMBOL ( tcf_exts_change ) ;
2005-04-17 02:20:36 +04:00
2016-08-14 08:35:00 +03:00
# ifdef CONFIG_NET_CLS_ACT
static struct tc_action * tcf_exts_first_act ( struct tcf_exts * exts )
{
if ( exts - > nr_actions = = 0 )
return NULL ;
else
return exts - > actions [ 0 ] ;
}
# endif
2013-12-16 08:15:05 +04:00
2013-12-16 08:15:07 +04:00
int tcf_exts_dump ( struct sk_buff * skb , struct tcf_exts * exts )
2005-04-17 02:20:36 +04:00
{
# ifdef CONFIG_NET_CLS_ACT
2014-07-17 01:25:30 +04:00
struct nlattr * nest ;
2017-08-04 15:29:03 +03:00
if ( exts - > action & & tcf_exts_has_actions ( exts ) ) {
2005-04-17 02:20:36 +04:00
/*
* again for backward compatible mode - we want
* to work with both old and new modes of entering
* tc data even if iproute2 was newer - jhs
*/
2013-12-16 08:15:05 +04:00
if ( exts - > type ! = TCA_OLD_COMPAT ) {
2013-12-16 08:15:07 +04:00
nest = nla_nest_start ( skb , exts - > action ) ;
2008-01-24 07:34:11 +03:00
if ( nest = = NULL )
goto nla_put_failure ;
2016-08-14 08:35:00 +03:00
2018-07-05 17:24:33 +03:00
if ( tcf_action_dump ( skb , exts - > actions , 0 , 0 ) < 0 )
2008-01-23 09:11:33 +03:00
goto nla_put_failure ;
2008-01-24 07:34:11 +03:00
nla_nest_end ( skb , nest ) ;
2013-12-16 08:15:07 +04:00
} else if ( exts - > police ) {
2013-12-16 08:15:05 +04:00
struct tc_action * act = tcf_exts_first_act ( exts ) ;
2013-12-16 08:15:07 +04:00
nest = nla_nest_start ( skb , exts - > police ) ;
2013-12-23 17:02:12 +04:00
if ( nest = = NULL | | ! act )
2008-01-24 07:34:11 +03:00
goto nla_put_failure ;
2013-12-16 08:15:05 +04:00
if ( tcf_action_dump_old ( skb , act , 0 , 0 ) < 0 )
2008-01-23 09:11:33 +03:00
goto nla_put_failure ;
2008-01-24 07:34:11 +03:00
nla_nest_end ( skb , nest ) ;
2005-04-17 02:20:36 +04:00
}
}
return 0 ;
2014-07-17 01:25:30 +04:00
nla_put_failure :
nla_nest_cancel ( skb , nest ) ;
2005-04-17 02:20:36 +04:00
return - 1 ;
2014-07-17 01:25:30 +04:00
# else
return 0 ;
# endif
2005-04-17 02:20:36 +04:00
}
2008-01-21 13:26:41 +03:00
EXPORT_SYMBOL ( tcf_exts_dump ) ;
2005-04-17 02:20:36 +04:00
2008-01-21 13:26:41 +03:00
2013-12-16 08:15:07 +04:00
int tcf_exts_dump_stats ( struct sk_buff * skb , struct tcf_exts * exts )
2005-04-17 02:20:36 +04:00
{
# ifdef CONFIG_NET_CLS_ACT
2013-12-16 08:15:05 +04:00
struct tc_action * a = tcf_exts_first_act ( exts ) ;
2015-02-03 21:05:18 +03:00
if ( a ! = NULL & & tcf_action_copy_stats ( skb , a , 1 ) < 0 )
2013-12-16 08:15:05 +04:00
return - 1 ;
2005-04-17 02:20:36 +04:00
# endif
return 0 ;
}
2008-01-21 13:26:41 +03:00
EXPORT_SYMBOL ( tcf_exts_dump_stats ) ;
2005-04-17 02:20:36 +04:00
2017-10-11 10:41:09 +03:00
static int tc_exts_setup_cb_egdev_call ( struct tcf_exts * exts ,
enum tc_setup_type type ,
void * type_data , bool err_stop )
2017-10-11 10:41:08 +03:00
{
int ok_count = 0 ;
# ifdef CONFIG_NET_CLS_ACT
const struct tc_action * a ;
struct net_device * dev ;
2017-10-24 08:58:02 +03:00
int i , ret ;
2017-10-11 10:41:08 +03:00
if ( ! tcf_exts_has_actions ( exts ) )
return 0 ;
2017-10-24 08:58:02 +03:00
for ( i = 0 ; i < exts - > nr_actions ; i + + ) {
a = exts - > actions [ i ] ;
2017-10-11 10:41:08 +03:00
if ( ! a - > ops - > get_dev )
continue ;
dev = a - > ops - > get_dev ( a ) ;
2017-11-01 13:47:40 +03:00
if ( ! dev )
2017-10-11 10:41:08 +03:00
continue ;
ret = tc_setup_cb_egdev_call ( dev , type , type_data , err_stop ) ;
2018-08-10 20:51:52 +03:00
a - > ops - > put_dev ( dev ) ;
2017-10-11 10:41:08 +03:00
if ( ret < 0 )
return ret ;
ok_count + = ret ;
}
# endif
return ok_count ;
}
2017-10-11 10:41:09 +03:00
2017-10-19 16:50:32 +03:00
int tc_setup_cb_call ( struct tcf_block * block , struct tcf_exts * exts ,
enum tc_setup_type type , void * type_data , bool err_stop )
2017-10-11 10:41:09 +03:00
{
2018-06-06 20:55:47 +03:00
int ok_count ;
2017-10-19 16:50:32 +03:00
int ret ;
2018-06-06 20:55:47 +03:00
ret = tcf_block_cb_call ( block , type , type_data , err_stop ) ;
if ( ret < 0 )
return ret ;
ok_count = ret ;
2017-10-19 16:50:32 +03:00
2018-05-23 19:24:48 +03:00
if ( ! exts | | ok_count )
2018-06-06 20:55:47 +03:00
return ok_count ;
2017-10-19 16:50:32 +03:00
ret = tc_exts_setup_cb_egdev_call ( exts , type , type_data , err_stop ) ;
if ( ret < 0 )
return ret ;
ok_count + = ret ;
return ok_count ;
2017-10-11 10:41:09 +03:00
}
EXPORT_SYMBOL ( tc_setup_cb_call ) ;
2017-10-11 10:41:08 +03:00
2018-01-17 13:46:46 +03:00
static __net_init int tcf_net_init ( struct net * net )
{
struct tcf_net * tn = net_generic ( net , tcf_net_id ) ;
2018-09-24 19:22:56 +03:00
spin_lock_init ( & tn - > idr_lock ) ;
2018-01-17 13:46:46 +03:00
idr_init ( & tn - > idr ) ;
return 0 ;
}
static void __net_exit tcf_net_exit ( struct net * net )
{
struct tcf_net * tn = net_generic ( net , tcf_net_id ) ;
idr_destroy ( & tn - > idr ) ;
}
static struct pernet_operations tcf_net_ops = {
. init = tcf_net_init ,
. exit = tcf_net_exit ,
. id = & tcf_net_id ,
. size = sizeof ( struct tcf_net ) ,
} ;
2005-04-17 02:20:36 +04:00
static int __init tc_filter_init ( void )
{
2018-01-17 13:46:46 +03:00
int err ;
2017-10-27 04:24:28 +03:00
tc_filter_wq = alloc_ordered_workqueue ( " tc_filter_workqueue " , 0 ) ;
if ( ! tc_filter_wq )
return - ENOMEM ;
2018-01-17 13:46:46 +03:00
err = register_pernet_subsys ( & tcf_net_ops ) ;
if ( err )
goto err_register_pernet_subsys ;
2018-05-31 09:52:53 +03:00
rtnl_register ( PF_UNSPEC , RTM_NEWTFILTER , tc_new_tfilter , NULL , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_DELTFILTER , tc_del_tfilter , NULL , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_GETTFILTER , tc_get_tfilter ,
2017-08-09 21:41:48 +03:00
tc_dump_tfilter , 0 ) ;
2018-07-23 10:23:06 +03:00
rtnl_register ( PF_UNSPEC , RTM_NEWCHAIN , tc_ctl_chain , NULL , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_DELCHAIN , tc_ctl_chain , NULL , 0 ) ;
rtnl_register ( PF_UNSPEC , RTM_GETCHAIN , tc_ctl_chain ,
tc_dump_chain , 0 ) ;
2005-04-17 02:20:36 +04:00
return 0 ;
2018-01-17 13:46:46 +03:00
err_register_pernet_subsys :
destroy_workqueue ( tc_filter_wq ) ;
return err ;
2005-04-17 02:20:36 +04:00
}
subsys_initcall ( tc_filter_init ) ;