net: sched: register callbacks for indirect tc block binds

Currently drivers can register to receive TC block bind/unbind callbacks
by implementing the setup_tc ndo in any of their given netdevs. However,
drivers may also be interested in binds to higher level devices (e.g.
tunnel drivers) to potentially offload filters applied to them.

Introduce indirect block devs which allows drivers to register callbacks
for block binds on other devices. The callback is triggered when the
device is bound to a block, allowing the driver to register for rules
applied to that block using already available functions.

Freeing an indirect block callback will trigger an unbind event (if
necessary) to direct the driver to remove any offloaded rules and unreg
any block rule callbacks. It is the responsibility of the implementing
driver to clean any registered indirect block callbacks before exiting,
if the block it still active at such a time.

Allow registering an indirect block dev callback for a device that is
already bound to a block. In this case (if it is an ingress block),
register and also trigger the callback meaning that any already installed
rules can be replayed to the calling driver.

Signed-off-by: John Hurley <john.hurley@netronome.com>
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
John Hurley 2018-11-09 21:21:26 -08:00 committed by David S. Miller
parent d1ce01144e
commit 7f76fa3675
3 changed files with 292 additions and 1 deletions

View File

@ -81,6 +81,14 @@ void __tcf_block_cb_unregister(struct tcf_block *block,
struct tcf_block_cb *block_cb); struct tcf_block_cb *block_cb);
void tcf_block_cb_unregister(struct tcf_block *block, void tcf_block_cb_unregister(struct tcf_block *block,
tc_setup_cb_t *cb, void *cb_ident); tc_setup_cb_t *cb, void *cb_ident);
int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
tc_indr_block_bind_cb_t *cb, void *cb_ident);
int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
tc_indr_block_bind_cb_t *cb, void *cb_ident);
void __tc_indr_block_cb_unregister(struct net_device *dev,
tc_indr_block_bind_cb_t *cb, void *cb_ident);
void tc_indr_block_cb_unregister(struct net_device *dev,
tc_indr_block_bind_cb_t *cb, void *cb_ident);
int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode); struct tcf_result *res, bool compat_mode);
@ -183,6 +191,32 @@ void tcf_block_cb_unregister(struct tcf_block *block,
{ {
} }
static inline
int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
return 0;
}
static inline
int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
return 0;
}
static inline
void __tc_indr_block_cb_unregister(struct net_device *dev,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
}
static inline
void tc_indr_block_cb_unregister(struct net_device *dev,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
}
static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res, bool compat_mode) struct tcf_result *res, bool compat_mode)
{ {

View File

@ -24,6 +24,9 @@ struct bpf_flow_keys;
typedef int tc_setup_cb_t(enum tc_setup_type type, typedef int tc_setup_cb_t(enum tc_setup_type type,
void *type_data, void *cb_priv); void *type_data, void *cb_priv);
typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
enum tc_setup_type type, void *type_data);
struct qdisc_rate_table { struct qdisc_rate_table {
struct tc_ratespec rate; struct tc_ratespec rate;
u32 data[256]; u32 data[256];

View File

@ -25,6 +25,7 @@
#include <linux/kmod.h> #include <linux/kmod.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/idr.h> #include <linux/idr.h>
#include <linux/rhashtable.h>
#include <net/net_namespace.h> #include <net/net_namespace.h>
#include <net/sock.h> #include <net/sock.h>
#include <net/netlink.h> #include <net/netlink.h>
@ -365,6 +366,245 @@ static void tcf_chain_flush(struct tcf_chain *chain)
} }
} }
static struct tcf_block *tc_dev_ingress_block(struct net_device *dev)
{
const struct Qdisc_class_ops *cops;
struct Qdisc *qdisc;
if (!dev_ingress_queue(dev))
return NULL;
qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
if (!qdisc)
return NULL;
cops = qdisc->ops->cl_ops;
if (!cops)
return NULL;
if (!cops->tcf_block)
return NULL;
return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL);
}
static struct rhashtable indr_setup_block_ht;
struct tc_indr_block_dev {
struct rhash_head ht_node;
struct net_device *dev;
unsigned int refcnt;
struct list_head cb_list;
struct tcf_block *block;
};
struct tc_indr_block_cb {
struct list_head list;
void *cb_priv;
tc_indr_block_bind_cb_t *cb;
void *cb_ident;
};
static const struct rhashtable_params tc_indr_setup_block_ht_params = {
.key_offset = offsetof(struct tc_indr_block_dev, dev),
.head_offset = offsetof(struct tc_indr_block_dev, ht_node),
.key_len = sizeof(struct net_device *),
};
static struct tc_indr_block_dev *
tc_indr_block_dev_lookup(struct net_device *dev)
{
return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
tc_indr_setup_block_ht_params);
}
static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev)
{
struct tc_indr_block_dev *indr_dev;
indr_dev = tc_indr_block_dev_lookup(dev);
if (indr_dev)
goto inc_ref;
indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
if (!indr_dev)
return NULL;
INIT_LIST_HEAD(&indr_dev->cb_list);
indr_dev->dev = dev;
indr_dev->block = tc_dev_ingress_block(dev);
if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
tc_indr_setup_block_ht_params)) {
kfree(indr_dev);
return NULL;
}
inc_ref:
indr_dev->refcnt++;
return indr_dev;
}
static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev)
{
if (--indr_dev->refcnt)
return;
rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
tc_indr_setup_block_ht_params);
kfree(indr_dev);
}
static struct tc_indr_block_cb *
tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
struct tc_indr_block_cb *indr_block_cb;
list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
if (indr_block_cb->cb == cb &&
indr_block_cb->cb_ident == cb_ident)
return indr_block_cb;
return NULL;
}
static struct tc_indr_block_cb *
tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
struct tc_indr_block_cb *indr_block_cb;
indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
if (indr_block_cb)
return ERR_PTR(-EEXIST);
indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
if (!indr_block_cb)
return ERR_PTR(-ENOMEM);
indr_block_cb->cb_priv = cb_priv;
indr_block_cb->cb = cb;
indr_block_cb->cb_ident = cb_ident;
list_add(&indr_block_cb->list, &indr_dev->cb_list);
return indr_block_cb;
}
static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb)
{
list_del(&indr_block_cb->list);
kfree(indr_block_cb);
}
static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev,
struct tc_indr_block_cb *indr_block_cb,
enum tc_block_command command)
{
struct tc_block_offload bo = {
.command = command,
.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS,
.block = indr_dev->block,
};
if (!indr_dev->block)
return;
indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
&bo);
}
int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
struct tc_indr_block_cb *indr_block_cb;
struct tc_indr_block_dev *indr_dev;
int err;
indr_dev = tc_indr_block_dev_get(dev);
if (!indr_dev)
return -ENOMEM;
indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
err = PTR_ERR_OR_ZERO(indr_block_cb);
if (err)
goto err_dev_put;
tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_BIND);
return 0;
err_dev_put:
tc_indr_block_dev_put(indr_dev);
return err;
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register);
int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
int err;
rtnl_lock();
err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
rtnl_unlock();
return err;
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_register);
void __tc_indr_block_cb_unregister(struct net_device *dev,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
struct tc_indr_block_cb *indr_block_cb;
struct tc_indr_block_dev *indr_dev;
indr_dev = tc_indr_block_dev_lookup(dev);
if (!indr_dev)
return;
indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident);
if (!indr_block_cb)
return;
/* Send unbind message if required to free any block cbs. */
tc_indr_block_ing_cmd(indr_dev, indr_block_cb, TC_BLOCK_UNBIND);
tc_indr_block_cb_del(indr_block_cb);
tc_indr_block_dev_put(indr_dev);
}
EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister);
void tc_indr_block_cb_unregister(struct net_device *dev,
tc_indr_block_bind_cb_t *cb, void *cb_ident)
{
rtnl_lock();
__tc_indr_block_cb_unregister(dev, cb, cb_ident);
rtnl_unlock();
}
EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister);
static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev,
struct tcf_block_ext_info *ei,
enum tc_block_command command,
struct netlink_ext_ack *extack)
{
struct tc_indr_block_cb *indr_block_cb;
struct tc_indr_block_dev *indr_dev;
struct tc_block_offload bo = {
.command = command,
.binder_type = ei->binder_type,
.block = block,
.extack = extack,
};
indr_dev = tc_indr_block_dev_lookup(dev);
if (!indr_dev)
return;
indr_dev->block = command == TC_BLOCK_BIND ? block : NULL;
list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
&bo);
}
static bool tcf_block_offload_in_use(struct tcf_block *block) static bool tcf_block_offload_in_use(struct tcf_block *block)
{ {
return block->offloadcnt; return block->offloadcnt;
@ -406,12 +646,17 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack); err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_BIND, extack);
if (err == -EOPNOTSUPP) if (err == -EOPNOTSUPP)
goto no_offload_dev_inc; goto no_offload_dev_inc;
return err; if (err)
return err;
tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
return 0;
no_offload_dev_inc: no_offload_dev_inc:
if (tcf_block_offload_in_use(block)) if (tcf_block_offload_in_use(block))
return -EOPNOTSUPP; return -EOPNOTSUPP;
block->nooffloaddevcnt++; block->nooffloaddevcnt++;
tc_indr_block_call(block, dev, ei, TC_BLOCK_BIND, extack);
return 0; return 0;
} }
@ -421,6 +666,8 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
struct net_device *dev = q->dev_queue->dev; struct net_device *dev = q->dev_queue->dev;
int err; int err;
tc_indr_block_call(block, dev, ei, TC_BLOCK_UNBIND, NULL);
if (!dev->netdev_ops->ndo_setup_tc) if (!dev->netdev_ops->ndo_setup_tc)
goto no_offload_dev_dec; goto no_offload_dev_dec;
err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL); err = tcf_block_offload_cmd(block, dev, ei, TC_BLOCK_UNBIND, NULL);
@ -2355,6 +2602,11 @@ static int __init tc_filter_init(void)
if (err) if (err)
goto err_register_pernet_subsys; goto err_register_pernet_subsys;
err = rhashtable_init(&indr_setup_block_ht,
&tc_indr_setup_block_ht_params);
if (err)
goto err_rhash_setup_block_ht;
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter, rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
@ -2366,6 +2618,8 @@ static int __init tc_filter_init(void)
return 0; return 0;
err_rhash_setup_block_ht:
unregister_pernet_subsys(&tcf_net_ops);
err_register_pernet_subsys: err_register_pernet_subsys:
destroy_workqueue(tc_filter_wq); destroy_workqueue(tc_filter_wq);
return err; return err;