linux/tools/testing/selftests/bpf/progs/test_tc_dtime.c
Martin KaFai Lau e6ff92f41b selftests/bpf: Fix tc_redirect_dtime
tc_redirect_dtime was reported flaky from time to time.  It
always fails at the udp test and complains about the bpf@tc-ingress
got a skb->tstamp when handling udp packet.  It is unexpected
because the skb->tstamp should have been cleared when crossing
different netns.

The most likely cause is that the skb is actually a tcp packet
from the earlier tcp test.  It could be the final TCP_FIN handling.

This patch tightens the skb->tstamp check in the bpf prog.  It ensures
the skb is the current testing traffic.  First, it checks that skb
matches the IPPROTO of the running test (i.e. tcp vs udp).
Second, it checks the server port (dst_ns_port).  The server
port is unique for each test (50000 + test_enum).

Also fixed a typo in test_udp_dtime(): s/P100/P101/

Fixes: c803475fd8dd ("bpf: selftests: test skb->tstamp in redirect_neigh")
Reported-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20220601234050.2572671-1-kafai@fb.com
2022-06-03 14:53:33 -07:00

399 lines
8.3 KiB
C

// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2022 Meta
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <linux/bpf.h>
#include <linux/stddef.h>
#include <linux/pkt_cls.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <sys/socket.h>
/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
* | |
* ns_src | ns_fwd | ns_dst
*
* ns_src and ns_dst: ENDHOST namespace
* ns_fwd: Fowarding namespace
*/
#define ctx_ptr(field) (void *)(long)(field)
#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */
#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */
#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
a.s6_addr32[1] == b.s6_addr32[1] && \
a.s6_addr32[2] == b.s6_addr32[2] && \
a.s6_addr32[3] == b.s6_addr32[3])
volatile const __u32 IFINDEX_SRC;
volatile const __u32 IFINDEX_DST;
#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef
#define INGRESS_FWDNS_MAGIC 0x1b9fbeef
#define EGRESS_FWDNS_MAGIC 0x2b9fbeef
enum {
INGRESS_FWDNS_P100,
INGRESS_FWDNS_P101,
EGRESS_FWDNS_P100,
EGRESS_FWDNS_P101,
INGRESS_ENDHOST,
EGRESS_ENDHOST,
SET_DTIME,
__MAX_CNT,
};
enum {
TCP_IP6_CLEAR_DTIME,
TCP_IP4,
TCP_IP6,
UDP_IP4,
UDP_IP6,
TCP_IP4_RT_FWD,
TCP_IP6_RT_FWD,
UDP_IP4_RT_FWD,
UDP_IP6_RT_FWD,
UKN_TEST,
__NR_TESTS,
};
enum {
SRC_NS = 1,
DST_NS,
};
__u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
__u32 errs[__NR_TESTS][__MAX_CNT] = {};
__u32 test = 0;
static void inc_dtimes(__u32 idx)
{
if (test < __NR_TESTS)
dtimes[test][idx]++;
else
dtimes[UKN_TEST][idx]++;
}
static void inc_errs(__u32 idx)
{
if (test < __NR_TESTS)
errs[test][idx]++;
else
errs[UKN_TEST][idx]++;
}
static int skb_proto(int type)
{
return type & 0xff;
}
static int skb_ns(int type)
{
return (type >> 8) & 0xff;
}
static bool fwdns_clear_dtime(void)
{
return test == TCP_IP6_CLEAR_DTIME;
}
static bool bpf_fwd(void)
{
return test < TCP_IP4_RT_FWD;
}
static __u8 get_proto(void)
{
switch (test) {
case UDP_IP4:
case UDP_IP6:
case UDP_IP4_RT_FWD:
case UDP_IP6_RT_FWD:
return IPPROTO_UDP;
default:
return IPPROTO_TCP;
}
}
/* -1: parse error: TC_ACT_SHOT
* 0: not testing traffic: TC_ACT_OK
* >0: first byte is the inet_proto, second byte has the netns
* of the sender
*/
static int skb_get_type(struct __sk_buff *skb)
{
__u16 dst_ns_port = __bpf_htons(50000 + test);
void *data_end = ctx_ptr(skb->data_end);
void *data = ctx_ptr(skb->data);
__u8 inet_proto = 0, ns = 0;
struct ipv6hdr *ip6h;
__u16 sport, dport;
struct iphdr *iph;
struct tcphdr *th;
struct udphdr *uh;
void *trans;
switch (skb->protocol) {
case __bpf_htons(ETH_P_IP):
iph = data + sizeof(struct ethhdr);
if (iph + 1 > data_end)
return -1;
if (iph->saddr == ip4_src)
ns = SRC_NS;
else if (iph->saddr == ip4_dst)
ns = DST_NS;
inet_proto = iph->protocol;
trans = iph + 1;
break;
case __bpf_htons(ETH_P_IPV6):
ip6h = data + sizeof(struct ethhdr);
if (ip6h + 1 > data_end)
return -1;
if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_src))
ns = SRC_NS;
else if (v6_equal(ip6h->saddr, (struct in6_addr)ip6_dst))
ns = DST_NS;
inet_proto = ip6h->nexthdr;
trans = ip6h + 1;
break;
default:
return 0;
}
/* skb is not from src_ns or dst_ns.
* skb is not the testing IPPROTO.
*/
if (!ns || inet_proto != get_proto())
return 0;
switch (inet_proto) {
case IPPROTO_TCP:
th = trans;
if (th + 1 > data_end)
return -1;
sport = th->source;
dport = th->dest;
break;
case IPPROTO_UDP:
uh = trans;
if (uh + 1 > data_end)
return -1;
sport = uh->source;
dport = uh->dest;
break;
default:
return 0;
}
/* The skb is the testing traffic */
if ((ns == SRC_NS && dport == dst_ns_port) ||
(ns == DST_NS && sport == dst_ns_port))
return (ns << 8 | inet_proto);
return 0;
}
/* format: direction@iface@netns
* egress@veth_(src|dst)@ns_(src|dst)
*/
SEC("tc")
int egress_host(struct __sk_buff *skb)
{
int skb_type;
skb_type = skb_get_type(skb);
if (skb_type == -1)
return TC_ACT_SHOT;
if (!skb_type)
return TC_ACT_OK;
if (skb_proto(skb_type) == IPPROTO_TCP) {
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
skb->tstamp)
inc_dtimes(EGRESS_ENDHOST);
else
inc_errs(EGRESS_ENDHOST);
} else {
if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
skb->tstamp)
inc_dtimes(EGRESS_ENDHOST);
else
inc_errs(EGRESS_ENDHOST);
}
skb->tstamp = EGRESS_ENDHOST_MAGIC;
return TC_ACT_OK;
}
/* ingress@veth_(src|dst)@ns_(src|dst) */
SEC("tc")
int ingress_host(struct __sk_buff *skb)
{
int skb_type;
skb_type = skb_get_type(skb);
if (skb_type == -1)
return TC_ACT_SHOT;
if (!skb_type)
return TC_ACT_OK;
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
skb->tstamp == EGRESS_FWDNS_MAGIC)
inc_dtimes(INGRESS_ENDHOST);
else
inc_errs(INGRESS_ENDHOST);
return TC_ACT_OK;
}
/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
SEC("tc")
int ingress_fwdns_prio100(struct __sk_buff *skb)
{
int skb_type;
skb_type = skb_get_type(skb);
if (skb_type == -1)
return TC_ACT_SHOT;
if (!skb_type)
return TC_ACT_OK;
/* delivery_time is only available to the ingress
* if the tc-bpf checks the skb->tstamp_type.
*/
if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
inc_errs(INGRESS_FWDNS_P100);
if (fwdns_clear_dtime())
skb->tstamp = 0;
return TC_ACT_UNSPEC;
}
/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
SEC("tc")
int egress_fwdns_prio100(struct __sk_buff *skb)
{
int skb_type;
skb_type = skb_get_type(skb);
if (skb_type == -1)
return TC_ACT_SHOT;
if (!skb_type)
return TC_ACT_OK;
/* delivery_time is always available to egress even
* the tc-bpf did not use the tstamp_type.
*/
if (skb->tstamp == INGRESS_FWDNS_MAGIC)
inc_dtimes(EGRESS_FWDNS_P100);
else
inc_errs(EGRESS_FWDNS_P100);
if (fwdns_clear_dtime())
skb->tstamp = 0;
return TC_ACT_UNSPEC;
}
/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
SEC("tc")
int ingress_fwdns_prio101(struct __sk_buff *skb)
{
__u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
int skb_type;
skb_type = skb_get_type(skb);
if (skb_type == -1 || !skb_type)
/* Should have handled in prio100 */
return TC_ACT_SHOT;
if (skb_proto(skb_type) == IPPROTO_UDP)
expected_dtime = 0;
if (skb->tstamp_type) {
if (fwdns_clear_dtime() ||
skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
skb->tstamp != expected_dtime)
inc_errs(INGRESS_FWDNS_P101);
else
inc_dtimes(INGRESS_FWDNS_P101);
} else {
if (!fwdns_clear_dtime() && expected_dtime)
inc_errs(INGRESS_FWDNS_P101);
}
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
skb->tstamp = INGRESS_FWDNS_MAGIC;
} else {
if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
BPF_SKB_TSTAMP_DELIVERY_MONO))
inc_errs(SET_DTIME);
if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
BPF_SKB_TSTAMP_UNSPEC))
inc_errs(SET_DTIME);
}
if (skb_ns(skb_type) == SRC_NS)
return bpf_fwd() ?
bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
else
return bpf_fwd() ?
bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
}
/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
SEC("tc")
int egress_fwdns_prio101(struct __sk_buff *skb)
{
int skb_type;
skb_type = skb_get_type(skb);
if (skb_type == -1 || !skb_type)
/* Should have handled in prio100 */
return TC_ACT_SHOT;
if (skb->tstamp_type) {
if (fwdns_clear_dtime() ||
skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
skb->tstamp != INGRESS_FWDNS_MAGIC)
inc_errs(EGRESS_FWDNS_P101);
else
inc_dtimes(EGRESS_FWDNS_P101);
} else {
if (!fwdns_clear_dtime())
inc_errs(EGRESS_FWDNS_P101);
}
if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
skb->tstamp = EGRESS_FWDNS_MAGIC;
} else {
if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
BPF_SKB_TSTAMP_DELIVERY_MONO))
inc_errs(SET_DTIME);
if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
BPF_SKB_TSTAMP_UNSPEC))
inc_errs(SET_DTIME);
}
return TC_ACT_OK;
}
char __license[] SEC("license") = "GPL";