cls_flower: Fix inability to match GRE/IPIP packets
When a packet of a new flow arrives in openvswitch kernel module, it dissects the packet and passes the extracted flow key to ovs-vswtichd daemon. If hw- offload configuration is enabled, the daemon creates a new TC flower entry to bypass openvswitch kernel module for the flow (TC flower can also offload flows to NICs but this time that does not matter). In this processing flow, I found the following issue in cases of GRE/IPIP packets. When ovs_flow_key_extract() in openvswitch module parses a packet of a new GRE (or IPIP) flow received on non-tunneling vports, it extracts information of the outer IP header for ip_proto/src_ip/dst_ip match keys. This means ovs-vswitchd creates a TC flower entry with IP protocol/addresses match keys whose values are those of the outer IP header. OTOH, TC flower, which uses flow_dissector (different parser from openvswitch module), extracts information of the inner IP header. The following flow is an example to describe the issue in more detail. <----------- Outer IP -----------------> <---------- Inner IP ----------> +----------+--------------+--------------+----------+----------+----------+ | ip_proto | src_ip | dst_ip | ip_proto | src_ip | dst_ip | | 47 (GRE) | 192.168.10.1 | 192.168.10.2 | 6 (TCP) | 10.0.0.1 | 10.0.0.2 | +----------+--------------+--------------+----------+----------+----------+ In this case, TC flower entry and extracted information are shown as below: - ovs-vswitchd creates TC flower entry with: - ip_proto: 47 - src_ip: 192.168.10.1 - dst_ip: 192.168.10.2 - TC flower extracts below for IP header matches: - ip_proto: 6 - src_ip: 10.0.0.1 - dst_ip: 10.0.0.2 Thus, GRE or IPIP packets never match the TC flower entry, as each dissector behaves differently. IMHO, the behavior of TC flower (flow dissector) does not look correct, as ip_proto/src_ip/dst_ip in TC flower match means the outermost IP header information except for GRE/IPIP cases. This patch adds a new flow_dissector flag FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP which skips dissection of the encapsulated inner GRE/IPIP header in TC flower classifier. Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
34d7ecb3d4
commit
6de6e46d27
@ -287,6 +287,7 @@ enum flow_dissector_key_id {
|
||||
#define FLOW_DISSECTOR_F_PARSE_1ST_FRAG BIT(0)
|
||||
#define FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL BIT(1)
|
||||
#define FLOW_DISSECTOR_F_STOP_AT_ENCAP BIT(2)
|
||||
#define FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP BIT(3)
|
||||
|
||||
struct flow_dissector_key {
|
||||
enum flow_dissector_key_id key_id;
|
||||
|
@ -1307,6 +1307,11 @@ ip_proto_again:
|
||||
|
||||
switch (ip_proto) {
|
||||
case IPPROTO_GRE:
|
||||
if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
|
||||
fdret = FLOW_DISSECT_RET_OUT_GOOD;
|
||||
break;
|
||||
}
|
||||
|
||||
fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector,
|
||||
target_container, data,
|
||||
&proto, &nhoff, &hlen, flags);
|
||||
@ -1364,6 +1369,11 @@ ip_proto_again:
|
||||
break;
|
||||
}
|
||||
case IPPROTO_IPIP:
|
||||
if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
|
||||
fdret = FLOW_DISSECT_RET_OUT_GOOD;
|
||||
break;
|
||||
}
|
||||
|
||||
proto = htons(ETH_P_IP);
|
||||
|
||||
key_control->flags |= FLOW_DIS_ENCAPSULATION;
|
||||
@ -1376,6 +1386,11 @@ ip_proto_again:
|
||||
break;
|
||||
|
||||
case IPPROTO_IPV6:
|
||||
if (flags & FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP) {
|
||||
fdret = FLOW_DISSECT_RET_OUT_GOOD;
|
||||
break;
|
||||
}
|
||||
|
||||
proto = htons(ETH_P_IPV6);
|
||||
|
||||
key_control->flags |= FLOW_DIS_ENCAPSULATION;
|
||||
|
@ -329,7 +329,8 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
|
||||
ARRAY_SIZE(fl_ct_info_to_flower_map),
|
||||
post_ct);
|
||||
skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
|
||||
skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
|
||||
skb_flow_dissect(skb, &mask->dissector, &skb_key,
|
||||
FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
|
||||
|
||||
f = fl_mask_lookup(mask, &skb_key);
|
||||
if (f && !tc_skip_sw(f->flags)) {
|
||||
|
Loading…
Reference in New Issue
Block a user