97396ff0bc
This XDP selftest also contain a small TC-bpf component. It provoke the generic-XDP bug fixed in previous commit. The selftest itself shows how to do VLAN manipulation from XDP and TC. The test demonstrate how XDP ingress can remove a VLAN tag, and how TC egress can add back a VLAN tag. This use-case originates from a production need by ISP (kviknet.dk), who gets DSL-lines terminated as VLAN Q-in-Q tagged packets, and want to avoid having an net_device for every end-customer on the box doing the L2 to L3 termination. The test-setup is done via a veth-pair and creating two network namespaces (ns1 and ns2). The 'ns1' simulate the ISP network that are loading the BPF-progs stripping and adding VLAN IDs. The 'ns2' simulate the DSL-customer that are using VLAN tagged packets. Running the script with --interactive, will simply not call the cleanup function. This gives the effect of creating a testlab, that the users can inspect and play with. The --verbose option will simply request that the shell will print input lines as they are read, this include comments, which in effect make the comments visible docs. Reported-by: Yoel Caspersen <yoel@kviknet.dk> Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: Alexei Starovoitov <ast@kernel.org>
293 lines
7.3 KiB
C
293 lines
7.3 KiB
C
/* SPDX-License-Identifier: GPL-2.0
|
|
* Copyright(c) 2018 Jesper Dangaard Brouer.
|
|
*
|
|
* XDP/TC VLAN manipulation example
|
|
*
|
|
* GOTCHA: Remember to disable NIC hardware offloading of VLANs,
|
|
* else the VLAN tags are NOT inlined in the packet payload:
|
|
*
|
|
* # ethtool -K ixgbe2 rxvlan off
|
|
*
|
|
* Verify setting:
|
|
* # ethtool -k ixgbe2 | grep rx-vlan-offload
|
|
* rx-vlan-offload: off
|
|
*
|
|
*/
|
|
#include <stddef.h>
|
|
#include <stdbool.h>
|
|
#include <string.h>
|
|
#include <linux/bpf.h>
|
|
#include <linux/if_ether.h>
|
|
#include <linux/if_vlan.h>
|
|
#include <linux/in.h>
|
|
#include <linux/pkt_cls.h>
|
|
|
|
#include "bpf_helpers.h"
|
|
#include "bpf_endian.h"
|
|
|
|
/* linux/if_vlan.h have not exposed this as UAPI, thus mirror some here
|
|
*
|
|
* struct vlan_hdr - vlan header
|
|
* @h_vlan_TCI: priority and VLAN ID
|
|
* @h_vlan_encapsulated_proto: packet type ID or len
|
|
*/
|
|
struct _vlan_hdr {
|
|
__be16 h_vlan_TCI;
|
|
__be16 h_vlan_encapsulated_proto;
|
|
};
|
|
#define VLAN_PRIO_MASK 0xe000 /* Priority Code Point */
|
|
#define VLAN_PRIO_SHIFT 13
|
|
#define VLAN_CFI_MASK 0x1000 /* Canonical Format Indicator */
|
|
#define VLAN_TAG_PRESENT VLAN_CFI_MASK
|
|
#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */
|
|
#define VLAN_N_VID 4096
|
|
|
|
struct parse_pkt {
|
|
__u16 l3_proto;
|
|
__u16 l3_offset;
|
|
__u16 vlan_outer;
|
|
__u16 vlan_inner;
|
|
__u8 vlan_outer_offset;
|
|
__u8 vlan_inner_offset;
|
|
};
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
static __always_inline
|
|
bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
|
|
{
|
|
__u16 eth_type;
|
|
__u8 offset;
|
|
|
|
offset = sizeof(*eth);
|
|
/* Make sure packet is large enough for parsing eth + 2 VLAN headers */
|
|
if ((void *)eth + offset + (2*sizeof(struct _vlan_hdr)) > data_end)
|
|
return false;
|
|
|
|
eth_type = eth->h_proto;
|
|
|
|
/* Handle outer VLAN tag */
|
|
if (eth_type == bpf_htons(ETH_P_8021Q)
|
|
|| eth_type == bpf_htons(ETH_P_8021AD)) {
|
|
struct _vlan_hdr *vlan_hdr;
|
|
|
|
vlan_hdr = (void *)eth + offset;
|
|
pkt->vlan_outer_offset = offset;
|
|
pkt->vlan_outer = bpf_ntohs(vlan_hdr->h_vlan_TCI)
|
|
& VLAN_VID_MASK;
|
|
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
|
|
offset += sizeof(*vlan_hdr);
|
|
}
|
|
|
|
/* Handle inner (double) VLAN tag */
|
|
if (eth_type == bpf_htons(ETH_P_8021Q)
|
|
|| eth_type == bpf_htons(ETH_P_8021AD)) {
|
|
struct _vlan_hdr *vlan_hdr;
|
|
|
|
vlan_hdr = (void *)eth + offset;
|
|
pkt->vlan_inner_offset = offset;
|
|
pkt->vlan_inner = bpf_ntohs(vlan_hdr->h_vlan_TCI)
|
|
& VLAN_VID_MASK;
|
|
eth_type = vlan_hdr->h_vlan_encapsulated_proto;
|
|
offset += sizeof(*vlan_hdr);
|
|
}
|
|
|
|
pkt->l3_proto = bpf_ntohs(eth_type); /* Convert to host-byte-order */
|
|
pkt->l3_offset = offset;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* Hint, VLANs are choosen to hit network-byte-order issues */
|
|
#define TESTVLAN 4011 /* 0xFAB */
|
|
// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
|
|
|
|
SEC("xdp_drop_vlan_4011")
|
|
int xdp_prognum0(struct xdp_md *ctx)
|
|
{
|
|
void *data_end = (void *)(long)ctx->data_end;
|
|
void *data = (void *)(long)ctx->data;
|
|
struct parse_pkt pkt = { 0 };
|
|
|
|
if (!parse_eth_frame(data, data_end, &pkt))
|
|
return XDP_ABORTED;
|
|
|
|
/* Drop specific VLAN ID example */
|
|
if (pkt.vlan_outer == TESTVLAN)
|
|
return XDP_ABORTED;
|
|
/*
|
|
* Using XDP_ABORTED makes it possible to record this event,
|
|
* via tracepoint xdp:xdp_exception like:
|
|
* # perf record -a -e xdp:xdp_exception
|
|
* # perf script
|
|
*/
|
|
return XDP_PASS;
|
|
}
|
|
/*
|
|
Commands to setup VLAN on Linux to test packets gets dropped:
|
|
|
|
export ROOTDEV=ixgbe2
|
|
export VLANID=4011
|
|
ip link add link $ROOTDEV name $ROOTDEV.$VLANID type vlan id $VLANID
|
|
ip link set dev $ROOTDEV.$VLANID up
|
|
|
|
ip link set dev $ROOTDEV mtu 1508
|
|
ip addr add 100.64.40.11/24 dev $ROOTDEV.$VLANID
|
|
|
|
Load prog with ip tool:
|
|
|
|
ip link set $ROOTDEV xdp off
|
|
ip link set $ROOTDEV xdp object xdp_vlan01_kern.o section xdp_drop_vlan_4011
|
|
|
|
*/
|
|
|
|
/* Changing VLAN to zero, have same practical effect as removing the VLAN. */
|
|
#define TO_VLAN 0
|
|
|
|
SEC("xdp_vlan_change")
|
|
int xdp_prognum1(struct xdp_md *ctx)
|
|
{
|
|
void *data_end = (void *)(long)ctx->data_end;
|
|
void *data = (void *)(long)ctx->data;
|
|
struct parse_pkt pkt = { 0 };
|
|
|
|
if (!parse_eth_frame(data, data_end, &pkt))
|
|
return XDP_ABORTED;
|
|
|
|
/* Change specific VLAN ID */
|
|
if (pkt.vlan_outer == TESTVLAN) {
|
|
struct _vlan_hdr *vlan_hdr = data + pkt.vlan_outer_offset;
|
|
|
|
/* Modifying VLAN, preserve top 4 bits */
|
|
vlan_hdr->h_vlan_TCI =
|
|
bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
|
|
| TO_VLAN);
|
|
}
|
|
|
|
return XDP_PASS;
|
|
}
|
|
|
|
/*
|
|
* Show XDP+TC can cooperate, on creating a VLAN rewriter.
|
|
* 1. Create a XDP prog that can "pop"/remove a VLAN header.
|
|
* 2. Create a TC-bpf prog that egress can add a VLAN header.
|
|
*/
|
|
|
|
#ifndef ETH_ALEN /* Ethernet MAC address length */
|
|
#define ETH_ALEN 6 /* bytes */
|
|
#endif
|
|
#define VLAN_HDR_SZ 4 /* bytes */
|
|
|
|
SEC("xdp_vlan_remove_outer")
|
|
int xdp_prognum2(struct xdp_md *ctx)
|
|
{
|
|
void *data_end = (void *)(long)ctx->data_end;
|
|
void *data = (void *)(long)ctx->data;
|
|
struct parse_pkt pkt = { 0 };
|
|
char *dest;
|
|
|
|
if (!parse_eth_frame(data, data_end, &pkt))
|
|
return XDP_ABORTED;
|
|
|
|
/* Skip packet if no outer VLAN was detected */
|
|
if (pkt.vlan_outer_offset == 0)
|
|
return XDP_PASS;
|
|
|
|
/* Moving Ethernet header, dest overlap with src, memmove handle this */
|
|
dest = data;
|
|
dest+= VLAN_HDR_SZ;
|
|
/*
|
|
* Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
|
|
* only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
|
|
*/
|
|
__builtin_memmove(dest, data, ETH_ALEN * 2);
|
|
/* Note: LLVM built-in memmove inlining require size to be constant */
|
|
|
|
/* Move start of packet header seen by Linux kernel stack */
|
|
bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
|
|
|
|
return XDP_PASS;
|
|
}
|
|
|
|
static __always_inline
|
|
void shift_mac_4bytes_16bit(void *data)
|
|
{
|
|
__u16 *p = data;
|
|
|
|
p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
|
|
p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
|
|
p[5] = p[3];
|
|
p[4] = p[2];
|
|
p[3] = p[1];
|
|
p[2] = p[0];
|
|
}
|
|
|
|
static __always_inline
|
|
void shift_mac_4bytes_32bit(void *data)
|
|
{
|
|
__u32 *p = data;
|
|
|
|
/* Assuming VLAN hdr present. The 4 bytes in p[3] that gets
|
|
* overwritten, is ethhdr->h_proto and vlan_hdr->h_vlan_TCI.
|
|
* The vlan_hdr->h_vlan_encapsulated_proto take over role as
|
|
* ethhdr->h_proto.
|
|
*/
|
|
p[3] = p[2];
|
|
p[2] = p[1];
|
|
p[1] = p[0];
|
|
}
|
|
|
|
SEC("xdp_vlan_remove_outer2")
|
|
int xdp_prognum3(struct xdp_md *ctx)
|
|
{
|
|
void *data_end = (void *)(long)ctx->data_end;
|
|
void *data = (void *)(long)ctx->data;
|
|
struct ethhdr *orig_eth = data;
|
|
struct parse_pkt pkt = { 0 };
|
|
|
|
if (!parse_eth_frame(orig_eth, data_end, &pkt))
|
|
return XDP_ABORTED;
|
|
|
|
/* Skip packet if no outer VLAN was detected */
|
|
if (pkt.vlan_outer_offset == 0)
|
|
return XDP_PASS;
|
|
|
|
/* Simply shift down MAC addrs 4 bytes, overwrite h_proto + TCI */
|
|
shift_mac_4bytes_32bit(data);
|
|
|
|
/* Move start of packet header seen by Linux kernel stack */
|
|
bpf_xdp_adjust_head(ctx, VLAN_HDR_SZ);
|
|
|
|
return XDP_PASS;
|
|
}
|
|
|
|
/*=====================================
|
|
* BELOW: TC-hook based ebpf programs
|
|
* ====================================
|
|
* The TC-clsact eBPF programs (currently) need to be attach via TC commands
|
|
*/
|
|
|
|
SEC("tc_vlan_push")
|
|
int _tc_progA(struct __sk_buff *ctx)
|
|
{
|
|
bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
|
|
|
|
return TC_ACT_OK;
|
|
}
|
|
/*
|
|
Commands to setup TC to use above bpf prog:
|
|
|
|
export ROOTDEV=ixgbe2
|
|
export FILE=xdp_vlan01_kern.o
|
|
|
|
# Re-attach clsact to clear/flush existing role
|
|
tc qdisc del dev $ROOTDEV clsact 2> /dev/null ;\
|
|
tc qdisc add dev $ROOTDEV clsact
|
|
|
|
# Attach BPF prog EGRESS
|
|
tc filter add dev $ROOTDEV egress \
|
|
prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
|
|
|
|
tc filter show dev $ROOTDEV egress
|
|
*/
|