Merge branch 'net-sched-fix-ct-zone-matching-for-invalid-conntrack-state'
Paul Blakey says: ==================== net/sched: Fix ct zone matching for invalid conntrack state Currently, when a packet is marked as invalid conntrack_in in act_ct, post_ct will be set, and connection info (nf_conn) will be removed from the skb. Later openvswitch and flower matching will parse this as ct_state=+trk+inv. But because the connection info is missing, there is also no zone info to match against even though the packet is tracked. This series fixes that, by passing the last executed zone by act_ct. The zone info is passed along from act_ct to the ct flow dissector (used by flower to extract zone info) and to ovs, the same way as post_ct is passed, via qdisc layer skb cb to dissector, and via skb extension to OVS. Since adding any more data to qdisc skb cb, there will be no room for BPF skb cb to extend it and stay under skb->cb size, this series moves the tc related info from within qdisc skb cb to a tc specific cb that also extends it. ==================== Link: https://lore.kernel.org/r/20211214172435.24207-1-paulb@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
14193d57c8
@ -286,6 +286,7 @@ struct nf_bridge_info {
|
||||
struct tc_skb_ext {
|
||||
__u32 chain;
|
||||
__u16 mru;
|
||||
__u16 zone;
|
||||
bool post_ct;
|
||||
};
|
||||
#endif
|
||||
@ -1380,7 +1381,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
|
||||
struct flow_dissector *flow_dissector,
|
||||
void *target_container,
|
||||
u16 *ctinfo_map, size_t mapsize,
|
||||
bool post_ct);
|
||||
bool post_ct, u16 zone);
|
||||
void
|
||||
skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
|
||||
struct flow_dissector *flow_dissector,
|
||||
|
@ -193,4 +193,20 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
|
||||
skb->tstamp = ktime_set(0, 0);
|
||||
}
|
||||
|
||||
struct tc_skb_cb {
|
||||
struct qdisc_skb_cb qdisc_cb;
|
||||
|
||||
u16 mru;
|
||||
bool post_ct;
|
||||
u16 zone; /* Only valid if post_ct = true */
|
||||
};
|
||||
|
||||
static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
|
||||
{
|
||||
struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
|
||||
|
||||
BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
|
||||
return cb;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -447,8 +447,6 @@ struct qdisc_skb_cb {
|
||||
};
|
||||
#define QDISC_CB_PRIV_LEN 20
|
||||
unsigned char data[QDISC_CB_PRIV_LEN];
|
||||
u16 mru;
|
||||
bool post_ct;
|
||||
};
|
||||
|
||||
typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
|
||||
|
@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
|
||||
return skb;
|
||||
|
||||
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
|
||||
qdisc_skb_cb(skb)->mru = 0;
|
||||
qdisc_skb_cb(skb)->post_ct = false;
|
||||
tc_skb_cb(skb)->mru = 0;
|
||||
tc_skb_cb(skb)->post_ct = false;
|
||||
mini_qdisc_bstats_cpu_update(miniq, skb);
|
||||
|
||||
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
|
||||
@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
|
||||
}
|
||||
|
||||
qdisc_skb_cb(skb)->pkt_len = skb->len;
|
||||
qdisc_skb_cb(skb)->mru = 0;
|
||||
qdisc_skb_cb(skb)->post_ct = false;
|
||||
tc_skb_cb(skb)->mru = 0;
|
||||
tc_skb_cb(skb)->post_ct = false;
|
||||
skb->tc_at_ingress = 1;
|
||||
mini_qdisc_bstats_cpu_update(miniq, skb);
|
||||
|
||||
|
@ -238,7 +238,7 @@ void
|
||||
skb_flow_dissect_ct(const struct sk_buff *skb,
|
||||
struct flow_dissector *flow_dissector,
|
||||
void *target_container, u16 *ctinfo_map,
|
||||
size_t mapsize, bool post_ct)
|
||||
size_t mapsize, bool post_ct, u16 zone)
|
||||
{
|
||||
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
|
||||
struct flow_dissector_key_ct *key;
|
||||
@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
|
||||
if (!ct) {
|
||||
key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
|
||||
TCA_FLOWER_KEY_CT_FLAGS_INVALID;
|
||||
key->ct_zone = zone;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <net/mpls.h>
|
||||
#include <net/ndisc.h>
|
||||
#include <net/nsh.h>
|
||||
#include <net/netfilter/nf_conntrack_zones.h>
|
||||
|
||||
#include "conntrack.h"
|
||||
#include "datapath.h"
|
||||
@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
|
||||
#endif
|
||||
bool post_ct = false;
|
||||
int res, err;
|
||||
u16 zone = 0;
|
||||
|
||||
/* Extract metadata from packet. */
|
||||
if (tun_info) {
|
||||
@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
|
||||
key->recirc_id = tc_ext ? tc_ext->chain : 0;
|
||||
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
|
||||
post_ct = tc_ext ? tc_ext->post_ct : false;
|
||||
zone = post_ct ? tc_ext->zone : 0;
|
||||
} else {
|
||||
key->recirc_id = 0;
|
||||
}
|
||||
@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
|
||||
#endif
|
||||
|
||||
err = key_extract(skb, key);
|
||||
if (!err)
|
||||
if (!err) {
|
||||
ovs_ct_fill_key(skb, key, post_ct); /* Must be after key_extract(). */
|
||||
if (post_ct && !skb_get_nfct(skb))
|
||||
key->ct_zone = zone;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
|
||||
u8 family, u16 zone, bool *defrag)
|
||||
{
|
||||
enum ip_conntrack_info ctinfo;
|
||||
struct qdisc_skb_cb cb;
|
||||
struct nf_conn *ct;
|
||||
int err = 0;
|
||||
bool frag;
|
||||
u16 mru;
|
||||
|
||||
/* Previously seen (loopback)? Ignore. */
|
||||
ct = nf_ct_get(skb, &ctinfo);
|
||||
@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
|
||||
return err;
|
||||
|
||||
skb_get(skb);
|
||||
cb = *qdisc_skb_cb(skb);
|
||||
mru = tc_skb_cb(skb)->mru;
|
||||
|
||||
if (family == NFPROTO_IPV4) {
|
||||
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
|
||||
@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
|
||||
|
||||
if (!err) {
|
||||
*defrag = true;
|
||||
cb.mru = IPCB(skb)->frag_max_size;
|
||||
mru = IPCB(skb)->frag_max_size;
|
||||
}
|
||||
} else { /* NFPROTO_IPV6 */
|
||||
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
|
||||
@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
|
||||
|
||||
if (!err) {
|
||||
*defrag = true;
|
||||
cb.mru = IP6CB(skb)->frag_max_size;
|
||||
mru = IP6CB(skb)->frag_max_size;
|
||||
}
|
||||
#else
|
||||
err = -EOPNOTSUPP;
|
||||
@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
|
||||
}
|
||||
|
||||
if (err != -EINPROGRESS)
|
||||
*qdisc_skb_cb(skb) = cb;
|
||||
tc_skb_cb(skb)->mru = mru;
|
||||
skb_clear_hash(skb);
|
||||
skb->ignore_df = 1;
|
||||
return err;
|
||||
@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
|
||||
tcf_action_update_bstats(&c->common, skb);
|
||||
|
||||
if (clear) {
|
||||
qdisc_skb_cb(skb)->post_ct = false;
|
||||
tc_skb_cb(skb)->post_ct = false;
|
||||
ct = nf_ct_get(skb, &ctinfo);
|
||||
if (ct) {
|
||||
nf_conntrack_put(&ct->ct_general);
|
||||
@ -1048,7 +1048,8 @@ do_nat:
|
||||
out_push:
|
||||
skb_push_rcsum(skb, nh_ofs);
|
||||
|
||||
qdisc_skb_cb(skb)->post_ct = true;
|
||||
tc_skb_cb(skb)->post_ct = true;
|
||||
tc_skb_cb(skb)->zone = p->zone;
|
||||
out_clear:
|
||||
if (defrag)
|
||||
qdisc_skb_cb(skb)->pkt_len = skb->len;
|
||||
|
@ -1617,12 +1617,15 @@ int tcf_classify(struct sk_buff *skb,
|
||||
|
||||
/* If we missed on some chain */
|
||||
if (ret == TC_ACT_UNSPEC && last_executed_chain) {
|
||||
struct tc_skb_cb *cb = tc_skb_cb(skb);
|
||||
|
||||
ext = tc_skb_ext_alloc(skb);
|
||||
if (WARN_ON_ONCE(!ext))
|
||||
return TC_ACT_SHOT;
|
||||
ext->chain = last_executed_chain;
|
||||
ext->mru = qdisc_skb_cb(skb)->mru;
|
||||
ext->post_ct = qdisc_skb_cb(skb)->post_ct;
|
||||
ext->mru = cb->mru;
|
||||
ext->post_ct = cb->post_ct;
|
||||
ext->zone = cb->zone;
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -19,6 +19,7 @@
|
||||
|
||||
#include <net/sch_generic.h>
|
||||
#include <net/pkt_cls.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/flow_dissector.h>
|
||||
#include <net/geneve.h>
|
||||
@ -309,7 +310,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
|
||||
struct tcf_result *res)
|
||||
{
|
||||
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
|
||||
bool post_ct = qdisc_skb_cb(skb)->post_ct;
|
||||
bool post_ct = tc_skb_cb(skb)->post_ct;
|
||||
u16 zone = tc_skb_cb(skb)->zone;
|
||||
struct fl_flow_key skb_key;
|
||||
struct fl_flow_mask *mask;
|
||||
struct cls_fl_filter *f;
|
||||
@ -327,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
|
||||
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
|
||||
fl_ct_info_to_flower_map,
|
||||
ARRAY_SIZE(fl_ct_info_to_flower_map),
|
||||
post_ct);
|
||||
post_ct, zone);
|
||||
skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
|
||||
skb_flow_dissect(skb, &mask->dissector, &skb_key,
|
||||
FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
|
||||
#include <net/netlink.h>
|
||||
#include <net/sch_generic.h>
|
||||
#include <net/pkt_sched.h>
|
||||
#include <net/dst.h>
|
||||
#include <net/ip.h>
|
||||
#include <net/ip6_fib.h>
|
||||
@ -137,7 +138,7 @@ err:
|
||||
|
||||
int sch_frag_xmit_hook(struct sk_buff *skb, int (*xmit)(struct sk_buff *skb))
|
||||
{
|
||||
u16 mru = qdisc_skb_cb(skb)->mru;
|
||||
u16 mru = tc_skb_cb(skb)->mru;
|
||||
int err;
|
||||
|
||||
if (mru && skb->len > mru + skb->dev->hard_header_len)
|
||||
|
Loading…
Reference in New Issue
Block a user