linux/include/net/pkt_cls.h

362 lines
9.1 KiB
C
Raw Normal View History

#ifndef __NET_PKT_CLS_H
#define __NET_PKT_CLS_H
#include <linux/pkt_cls.h>
#include <net/sch_generic.h>
#include <net/act_api.h>
/* Basic packet classifier frontend definitions. */
struct tcf_walker {
int stop;
int skip;
int count;
int (*fn)(struct tcf_proto *, unsigned long node, struct tcf_walker *);
};
int register_tcf_proto_ops(struct tcf_proto_ops *ops);
int unregister_tcf_proto_ops(struct tcf_proto_ops *ops);
static inline unsigned long
__cls_set_class(unsigned long *clp, unsigned long cl)
{
net_sched: avoid calling tcf_unbind_filter() in call_rcu callback This fixes the following crash: [ 63.976822] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 63.980094] CPU: 1 PID: 15 Comm: ksoftirqd/1 Not tainted 3.17.0-rc6+ #648 [ 63.980094] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 63.980094] task: ffff880117dea690 ti: ffff880117dfc000 task.ti: ffff880117dfc000 [ 63.980094] RIP: 0010:[<ffffffff817e6d07>] [<ffffffff817e6d07>] u32_destroy_key+0x27/0x6d [ 63.980094] RSP: 0018:ffff880117dffcc0 EFLAGS: 00010202 [ 63.980094] RAX: ffff880117dea690 RBX: ffff8800d02e0820 RCX: 0000000000000000 [ 63.980094] RDX: 0000000000000001 RSI: 0000000000000002 RDI: 6b6b6b6b6b6b6b6b [ 63.980094] RBP: ffff880117dffcd0 R08: 0000000000000000 R09: 0000000000000000 [ 63.980094] R10: 00006c0900006ba8 R11: 00006ba100006b9d R12: 0000000000000001 [ 63.980094] R13: ffff8800d02e0898 R14: ffffffff817e6d4d R15: ffff880117387a30 [ 63.980094] FS: 0000000000000000(0000) GS:ffff88011a800000(0000) knlGS:0000000000000000 [ 63.980094] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 63.980094] CR2: 00007f07e6732fed CR3: 000000011665b000 CR4: 00000000000006e0 [ 63.980094] Stack: [ 63.980094] ffff88011a9cd300 ffffffff82051ac0 ffff880117dffce0 ffffffff817e6d68 [ 63.980094] ffff880117dffd70 ffffffff810cb4c7 ffffffff810cb3cd ffff880117dfffd8 [ 63.980094] ffff880117dea690 ffff880117dea690 ffff880117dfffd8 000000000000000a [ 63.980094] Call Trace: [ 63.980094] [<ffffffff817e6d68>] u32_delete_key_freepf_rcu+0x1b/0x1d [ 63.980094] [<ffffffff810cb4c7>] rcu_process_callbacks+0x3bb/0x691 [ 63.980094] [<ffffffff810cb3cd>] ? rcu_process_callbacks+0x2c1/0x691 [ 63.980094] [<ffffffff817e6d4d>] ? u32_destroy_key+0x6d/0x6d [ 63.980094] [<ffffffff810780a4>] __do_softirq+0x142/0x323 [ 63.980094] [<ffffffff810782a8>] run_ksoftirqd+0x23/0x53 [ 63.980094] [<ffffffff81092126>] smpboot_thread_fn+0x203/0x221 [ 63.980094] [<ffffffff81091f23>] ? smpboot_unpark_thread+0x33/0x33 [ 63.980094] [<ffffffff8108e44d>] kthread+0xc9/0xd1 [ 63.980094] [<ffffffff819e00ea>] ? do_wait_for_common+0xf8/0x125 [ 63.980094] [<ffffffff8108e384>] ? __kthread_parkme+0x61/0x61 [ 63.980094] [<ffffffff819e43ec>] ret_from_fork+0x7c/0xb0 [ 63.980094] [<ffffffff8108e384>] ? __kthread_parkme+0x61/0x61 tp could be freed in call_rcu callback too, the order is not guaranteed. John Fastabend says: ==================== Its worth noting why this is safe. Any running schedulers will either read the valid class field or it will be zeroed. All schedulers today when the class is 0 do a lookup using the same call used by the tcf_exts_bind(). So even if we have a running classifier hit the null class pointer it will do a lookup and get to the same result. This is particularly fragile at the moment because the only way to verify this is to audit the schedulers call sites. ==================== Cc: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Acked-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
2014-09-30 16:07:24 -07:00
return xchg(clp, cl);
}
static inline unsigned long
cls_set_class(struct tcf_proto *tp, unsigned long *clp,
unsigned long cl)
{
unsigned long old_cl;
tcf_tree_lock(tp);
old_cl = __cls_set_class(clp, cl);
tcf_tree_unlock(tp);
return old_cl;
}
static inline void
tcf_bind_filter(struct tcf_proto *tp, struct tcf_result *r, unsigned long base)
{
unsigned long cl;
cl = tp->q->ops->cl_ops->bind_tcf(tp->q, base, r->classid);
cl = cls_set_class(tp, &r->class, cl);
if (cl)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
}
static inline void
tcf_unbind_filter(struct tcf_proto *tp, struct tcf_result *r)
{
unsigned long cl;
if ((cl = __cls_set_class(&r->class, 0)) != 0)
tp->q->ops->cl_ops->unbind_tcf(tp->q, cl);
}
struct tcf_exts {
#ifdef CONFIG_NET_CLS_ACT
__u32 type; /* for backward compat(TCA_OLD_COMPAT) */
struct list_head actions;
#endif
/* Map to export classifier specific extension TLV types to the
* generic extensions API. Unsupported extensions must be set to 0.
*/
int action;
int police;
};
static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police)
{
#ifdef CONFIG_NET_CLS_ACT
exts->type = 0;
INIT_LIST_HEAD(&exts->actions);
#endif
exts->action = action;
exts->police = police;
}
/**
* tcf_exts_is_predicative - check if a predicative extension is present
* @exts: tc filter extensions handle
*
* Returns 1 if a predicative extension is present, i.e. an extension which
* might cause further actions and thus overrule the regular tcf_result.
*/
static inline int
tcf_exts_is_predicative(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
return !list_empty(&exts->actions);
#else
return 0;
#endif
}
/**
* tcf_exts_is_available - check if at least one extension is present
* @exts: tc filter extensions handle
*
* Returns 1 if at least one extension is present.
*/
static inline int
tcf_exts_is_available(struct tcf_exts *exts)
{
/* All non-predicative extensions must be added here. */
return tcf_exts_is_predicative(exts);
}
/**
* tcf_exts_exec - execute tc filter extensions
* @skb: socket buffer
* @exts: tc filter extensions handle
* @res: desired result
*
* Executes all configured extensions. Returns 0 on a normal execution,
* a negative number if the filter must be considered unmatched or
* a positive action code (TC_ACT_*) which must be returned to the
* underlying layer.
*/
static inline int
tcf_exts_exec(struct sk_buff *skb, struct tcf_exts *exts,
struct tcf_result *res)
{
#ifdef CONFIG_NET_CLS_ACT
if (!list_empty(&exts->actions))
return tcf_action_exec(skb, &exts->actions, res);
#endif
return 0;
}
int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
struct nlattr **tb, struct nlattr *rate_tlv,
struct tcf_exts *exts, bool ovr);
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst,
struct tcf_exts *src);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
/**
* struct tcf_pkt_info - packet information
*/
struct tcf_pkt_info {
unsigned char * ptr;
int nexthdr;
};
#ifdef CONFIG_NET_EMATCH
struct tcf_ematch_ops;
/**
* struct tcf_ematch - extended match (ematch)
*
* @matchid: identifier to allow userspace to reidentify a match
* @flags: flags specifying attributes and the relation to other matches
* @ops: the operations lookup table of the corresponding ematch module
* @datalen: length of the ematch specific configuration data
* @data: ematch specific data
*/
struct tcf_ematch {
struct tcf_ematch_ops * ops;
unsigned long data;
unsigned int datalen;
u16 matchid;
u16 flags;
struct net *net;
};
static inline int tcf_em_is_container(struct tcf_ematch *em)
{
return !em->ops;
}
static inline int tcf_em_is_simple(struct tcf_ematch *em)
{
return em->flags & TCF_EM_SIMPLE;
}
static inline int tcf_em_is_inverted(struct tcf_ematch *em)
{
return em->flags & TCF_EM_INVERT;
}
static inline int tcf_em_last_match(struct tcf_ematch *em)
{
return (em->flags & TCF_EM_REL_MASK) == TCF_EM_REL_END;
}
static inline int tcf_em_early_end(struct tcf_ematch *em, int result)
{
if (tcf_em_last_match(em))
return 1;
if (result == 0 && em->flags & TCF_EM_REL_AND)
return 1;
if (result != 0 && em->flags & TCF_EM_REL_OR)
return 1;
return 0;
}
/**
* struct tcf_ematch_tree - ematch tree handle
*
* @hdr: ematch tree header supplied by userspace
* @matches: array of ematches
*/
struct tcf_ematch_tree {
struct tcf_ematch_tree_hdr hdr;
struct tcf_ematch * matches;
};
/**
* struct tcf_ematch_ops - ematch module operations
*
* @kind: identifier (kind) of this ematch module
* @datalen: length of expected configuration data (optional)
* @change: called during validation (optional)
* @match: called during ematch tree evaluation, must return 1/0
* @destroy: called during destroyage (optional)
* @dump: called during dumping process (optional)
* @owner: owner, must be set to THIS_MODULE
* @link: link to previous/next ematch module (internal use)
*/
struct tcf_ematch_ops {
int kind;
int datalen;
int (*change)(struct net *net, void *,
int, struct tcf_ematch *);
int (*match)(struct sk_buff *, struct tcf_ematch *,
struct tcf_pkt_info *);
void (*destroy)(struct tcf_ematch *);
int (*dump)(struct sk_buff *, struct tcf_ematch *);
struct module *owner;
struct list_head link;
};
int tcf_em_register(struct tcf_ematch_ops *);
void tcf_em_unregister(struct tcf_ematch_ops *);
int tcf_em_tree_validate(struct tcf_proto *, struct nlattr *,
struct tcf_ematch_tree *);
void tcf_em_tree_destroy(struct tcf_ematch_tree *);
int tcf_em_tree_dump(struct sk_buff *, struct tcf_ematch_tree *, int);
int __tcf_em_tree_match(struct sk_buff *, struct tcf_ematch_tree *,
struct tcf_pkt_info *);
/**
* tcf_em_tree_change - replace ematch tree of a running classifier
*
* @tp: classifier kind handle
* @dst: destination ematch tree variable
* @src: source ematch tree (temporary tree from tcf_em_tree_validate)
*
* This functions replaces the ematch tree in @dst with the ematch
* tree in @src. The classifier in charge of the ematch tree may be
* running.
*/
static inline void tcf_em_tree_change(struct tcf_proto *tp,
struct tcf_ematch_tree *dst,
struct tcf_ematch_tree *src)
{
tcf_tree_lock(tp);
memcpy(dst, src, sizeof(*dst));
tcf_tree_unlock(tp);
}
/**
* tcf_em_tree_match - evaulate an ematch tree
*
* @skb: socket buffer of the packet in question
* @tree: ematch tree to be used for evaluation
* @info: packet information examined by classifier
*
* This function matches @skb against the ematch tree in @tree by going
* through all ematches respecting their logic relations returning
* as soon as the result is obvious.
*
* Returns 1 if the ematch tree as-one matches, no ematches are configured
* or ematch is not enabled in the kernel, otherwise 0 is returned.
*/
static inline int tcf_em_tree_match(struct sk_buff *skb,
struct tcf_ematch_tree *tree,
struct tcf_pkt_info *info)
{
if (tree->hdr.nmatches)
return __tcf_em_tree_match(skb, tree, info);
else
return 1;
}
#define MODULE_ALIAS_TCF_EMATCH(kind) MODULE_ALIAS("ematch-kind-" __stringify(kind))
#else /* CONFIG_NET_EMATCH */
struct tcf_ematch_tree {
};
#define tcf_em_tree_validate(tp, tb, t) ((void)(t), 0)
#define tcf_em_tree_destroy(t) do { (void)(t); } while(0)
#define tcf_em_tree_dump(skb, t, tlv) (0)
#define tcf_em_tree_change(tp, dst, src) do { } while(0)
#define tcf_em_tree_match(skb, t, info) ((void)(info), 1)
#endif /* CONFIG_NET_EMATCH */
static inline unsigned char * tcf_get_base_ptr(struct sk_buff *skb, int layer)
{
switch (layer) {
case TCF_LAYER_LINK:
return skb->data;
case TCF_LAYER_NETWORK:
return skb_network_header(skb);
case TCF_LAYER_TRANSPORT:
return skb_transport_header(skb);
}
return NULL;
}
static inline int tcf_valid_offset(const struct sk_buff *skb,
const unsigned char *ptr, const int len)
{
return likely((ptr + len) <= skb_tail_pointer(skb) &&
ptr >= skb->head &&
(ptr <= (ptr + len)));
}
#ifdef CONFIG_NET_CLS_IND
#include <net/net_namespace.h>
static inline int
tcf_change_indev(struct net *net, struct nlattr *indev_tlv)
{
char indev[IFNAMSIZ];
struct net_device *dev;
if (nla_strlcpy(indev, indev_tlv, IFNAMSIZ) >= IFNAMSIZ)
return -EINVAL;
dev = __dev_get_by_name(net, indev);
if (!dev)
return -ENODEV;
return dev->ifindex;
}
static inline bool
tcf_match_indev(struct sk_buff *skb, int ifindex)
{
if (!ifindex)
return true;
if (!skb->skb_iif)
return false;
return ifindex == skb->skb_iif;
}
#endif /* CONFIG_NET_CLS_IND */
#endif