netkit: Fix pkt_type override upon netkit pass verdict
When running Cilium connectivity test suite with netkit in L2 mode, we
found that compared to tcx a few tests were failing which pushed traffic
into an L7 proxy sitting in host namespace. The problem in particular is
around the invocation of eth_type_trans() in netkit.
In case of tcx, this is run before the tcx ingress is triggered inside
host namespace and thus if the BPF program uses the bpf_skb_change_type()
helper the newly set type is retained. However, in case of netkit, the
late eth_type_trans() invocation overrides the earlier decision from the
BPF program which eventually leads to the test failure.
Instead of eth_type_trans(), split out the relevant parts, meaning, reset
of mac header and call to eth_skb_pkt_type() before the BPF program is run
in order to have the same behavior as with tcx, and refactor a small helper
called eth_skb_pull_mac() which is run in case it's passed up the stack
where the mac header must be pulled. With this all connectivity tests pass.
Fixes: 35dfaad718
("netkit, bpf: Add bpf programmable net device")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/r/20240524163619.26001-2-daniel@iogearbox.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
parent
d6fe532b74
commit
3998d18426
@ -55,6 +55,7 @@ static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
|
|||||||
skb_scrub_packet(skb, xnet);
|
skb_scrub_packet(skb, xnet);
|
||||||
skb->priority = 0;
|
skb->priority = 0;
|
||||||
nf_skip_egress(skb, true);
|
nf_skip_egress(skb, true);
|
||||||
|
skb_reset_mac_header(skb);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct netkit *netkit_priv(const struct net_device *dev)
|
static struct netkit *netkit_priv(const struct net_device *dev)
|
||||||
@ -78,6 +79,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
|
|||||||
skb_orphan_frags(skb, GFP_ATOMIC)))
|
skb_orphan_frags(skb, GFP_ATOMIC)))
|
||||||
goto drop;
|
goto drop;
|
||||||
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
|
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
|
||||||
|
eth_skb_pkt_type(skb, peer);
|
||||||
skb->dev = peer;
|
skb->dev = peer;
|
||||||
entry = rcu_dereference(nk->active);
|
entry = rcu_dereference(nk->active);
|
||||||
if (entry)
|
if (entry)
|
||||||
@ -85,7 +87,7 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
|
|||||||
switch (ret) {
|
switch (ret) {
|
||||||
case NETKIT_NEXT:
|
case NETKIT_NEXT:
|
||||||
case NETKIT_PASS:
|
case NETKIT_PASS:
|
||||||
skb->protocol = eth_type_trans(skb, skb->dev);
|
eth_skb_pull_mac(skb);
|
||||||
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
|
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
|
||||||
if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
|
if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) {
|
||||||
dev_sw_netstats_tx_add(dev, 1, len);
|
dev_sw_netstats_tx_add(dev, 1, len);
|
||||||
|
@ -636,6 +636,14 @@ static inline void eth_skb_pkt_type(struct sk_buff *skb,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct ethhdr *eth_skb_pull_mac(struct sk_buff *skb)
|
||||||
|
{
|
||||||
|
struct ethhdr *eth = (struct ethhdr *)skb->data;
|
||||||
|
|
||||||
|
skb_pull_inline(skb, ETH_HLEN);
|
||||||
|
return eth;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
|
* eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
|
||||||
* @skb: Buffer to pad
|
* @skb: Buffer to pad
|
||||||
|
@ -161,9 +161,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
|
|||||||
skb->dev = dev;
|
skb->dev = dev;
|
||||||
skb_reset_mac_header(skb);
|
skb_reset_mac_header(skb);
|
||||||
|
|
||||||
eth = (struct ethhdr *)skb->data;
|
eth = eth_skb_pull_mac(skb);
|
||||||
skb_pull_inline(skb, ETH_HLEN);
|
|
||||||
|
|
||||||
eth_skb_pkt_type(skb, dev);
|
eth_skb_pkt_type(skb, dev);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Loading…
Reference in New Issue
Block a user