Merge branch 'add-bpf_xdp_get_xfrm_state-kfunc'
Daniel Xu says:
====================
Add bpf_xdp_get_xfrm_state() kfunc
This patchset adds two kfunc helpers, bpf_xdp_get_xfrm_state() and
bpf_xdp_xfrm_state_release() that wrap xfrm_state_lookup() and
xfrm_state_put(). The intent is to support software RSS (via XDP) for
the ongoing/upcoming ipsec pcpu work [0]. Recent experiments performed
on (hopefully) reproducible AWS testbeds indicate that single tunnel
pcpu ipsec can reach line rate on 100G ENA nics.
Note this patchset only tests/shows generic xfrm_state access. The
"secret sauce" (if you can really even call it that) involves accessing
a soon-to-be-upstreamed pcpu_num field in xfrm_state. Early example is
available here [1].
[0]: https://datatracker.ietf.org/doc/draft-ietf-ipsecme-multi-sa-performance/03/
[1]: e89a1c617a/xdp-bench/xdp_redirect_cpumap.bpf.c (L385-L406)
Changes from v5:
* Improve kfunc doc comments
* Remove extraneous replay-window setting on selftest reverse path
* Squash two kfunc commits into one
* Rebase to bpf-next to pick up bitfield write patches
* Remove testing of opts.error in selftest prog
Changes from v4:
* Fixup commit message for selftest
* Set opts->error -ENOENT for !x
* Revert single file xfrm + bpf
Changes from v3:
* Place all xfrm bpf integrations in xfrm_bpf.c
* Avoid using nval as a temporary
* Rebase to bpf-next
* Remove extraneous __failure_unpriv annotation for verifier tests
Changes from v2:
* Fix/simplify BPF_CORE_WRITE_BITFIELD() algorithm
* Added verifier tests for bitfield writes
* Fix state leakage across test_tunnel subtests
Changes from v1:
* Move xfrm tunnel tests to test_progs
* Fix writing to opts->error when opts is invalid
* Use __bpf_kfunc_start_defs()
* Remove unused vxlanhdr definition
* Add and use BPF_CORE_WRITE_BITFIELD() macro
* Make series bisect clean
Changes from RFCv2:
* Rebased to ipsec-next
* Fix netns leak
Changes from RFCv1:
* Add Antony's commit tags
* Add KF_ACQUIRE and KF_RELEASE semantics
====================
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Link: https://lore.kernel.org/r/cover.1702593901.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
commit
403f3e8fda
@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void)
|
||||
|
||||
#endif
|
||||
|
||||
#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF)
|
||||
int register_xfrm_state_bpf(void);
|
||||
#else
|
||||
static inline int register_xfrm_state_bpf(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _NET_XFRM_H */
|
||||
|
@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o
|
||||
obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
|
||||
obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
|
||||
obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
|
||||
obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o
|
||||
|
@ -4218,6 +4218,8 @@ void __init xfrm_init(void)
|
||||
#ifdef CONFIG_XFRM_ESPINTCP
|
||||
espintcp_init();
|
||||
#endif
|
||||
|
||||
register_xfrm_state_bpf();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_AUDITSYSCALL
|
||||
|
134
net/xfrm/xfrm_state_bpf.c
Normal file
134
net/xfrm/xfrm_state_bpf.c
Normal file
@ -0,0 +1,134 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/* Unstable XFRM state BPF helpers.
|
||||
*
|
||||
* Note that it is allowed to break compatibility for these functions since the
|
||||
* interface they are exposed through to BPF programs is explicitly unstable.
|
||||
*/
|
||||
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/btf.h>
|
||||
#include <linux/btf_ids.h>
|
||||
#include <net/xdp.h>
|
||||
#include <net/xfrm.h>
|
||||
|
||||
/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers
|
||||
*
|
||||
* Members:
|
||||
* @error - Out parameter, set for any errors encountered
|
||||
* Values:
|
||||
* -EINVAL - netns_id is less than -1
|
||||
* -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ
|
||||
* -ENONET - No network namespace found for netns_id
|
||||
* -ENOENT - No xfrm_state found
|
||||
* @netns_id - Specify the network namespace for lookup
|
||||
* Values:
|
||||
* BPF_F_CURRENT_NETNS (-1)
|
||||
* Use namespace associated with ctx
|
||||
* [0, S32_MAX]
|
||||
* Network Namespace ID
|
||||
* @mark - XFRM mark to match on
|
||||
* @daddr - Destination address to match on
|
||||
* @spi - Security parameter index to match on
|
||||
* @proto - IP protocol to match on (eg. IPPROTO_ESP)
|
||||
* @family - Protocol family to match on (AF_INET/AF_INET6)
|
||||
*/
|
||||
struct bpf_xfrm_state_opts {
|
||||
s32 error;
|
||||
s32 netns_id;
|
||||
u32 mark;
|
||||
xfrm_address_t daddr;
|
||||
__be32 spi;
|
||||
u8 proto;
|
||||
u16 family;
|
||||
};
|
||||
|
||||
enum {
|
||||
BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts),
|
||||
};
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
/* bpf_xdp_get_xfrm_state - Get XFRM state
|
||||
*
|
||||
* A `struct xfrm_state *`, if found, must be released with a corresponding
|
||||
* bpf_xdp_xfrm_state_release.
|
||||
*
|
||||
* Parameters:
|
||||
* @ctx - Pointer to ctx (xdp_md) in XDP program
|
||||
* Cannot be NULL
|
||||
* @opts - Options for lookup (documented above)
|
||||
* Cannot be NULL
|
||||
* @opts__sz - Length of the bpf_xfrm_state_opts structure
|
||||
* Must be BPF_XFRM_STATE_OPTS_SZ
|
||||
*/
|
||||
__bpf_kfunc struct xfrm_state *
|
||||
bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz)
|
||||
{
|
||||
struct xdp_buff *xdp = (struct xdp_buff *)ctx;
|
||||
struct net *net = dev_net(xdp->rxq->dev);
|
||||
struct xfrm_state *x;
|
||||
|
||||
if (!opts || opts__sz < sizeof(opts->error))
|
||||
return NULL;
|
||||
|
||||
if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) {
|
||||
opts->error = -EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) {
|
||||
opts->error = -EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (opts->netns_id >= 0) {
|
||||
net = get_net_ns_by_id(net, opts->netns_id);
|
||||
if (unlikely(!net)) {
|
||||
opts->error = -ENONET;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi,
|
||||
opts->proto, opts->family);
|
||||
|
||||
if (opts->netns_id >= 0)
|
||||
put_net(net);
|
||||
if (!x)
|
||||
opts->error = -ENOENT;
|
||||
|
||||
return x;
|
||||
}
|
||||
|
||||
/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object
|
||||
*
|
||||
* This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects
|
||||
* the program if any references remain in the program in all of the explored
|
||||
* states.
|
||||
*
|
||||
* Parameters:
|
||||
* @x - Pointer to referenced xfrm_state object, obtained using
|
||||
* bpf_xdp_get_xfrm_state.
|
||||
*/
|
||||
__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x)
|
||||
{
|
||||
xfrm_state_put(x);
|
||||
}
|
||||
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
BTF_SET8_START(xfrm_state_kfunc_set)
|
||||
BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE)
|
||||
BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE)
|
||||
BTF_SET8_END(xfrm_state_kfunc_set)
|
||||
|
||||
static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = {
|
||||
.owner = THIS_MODULE,
|
||||
.set = &xfrm_state_kfunc_set,
|
||||
};
|
||||
|
||||
int __init register_xfrm_state_bpf(void)
|
||||
{
|
||||
return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP,
|
||||
&xfrm_state_xdp_kfunc_set);
|
||||
}
|
@ -50,6 +50,7 @@
|
||||
*/
|
||||
|
||||
#include <arpa/inet.h>
|
||||
#include <linux/if_link.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/sysctl.h>
|
||||
@ -92,6 +93,11 @@
|
||||
#define IPIP_TUNL_DEV0 "ipip00"
|
||||
#define IPIP_TUNL_DEV1 "ipip11"
|
||||
|
||||
#define XFRM_AUTH "0x1111111111111111111111111111111111111111"
|
||||
#define XFRM_ENC "0x22222222222222222222222222222222"
|
||||
#define XFRM_SPI_IN_TO_OUT 0x1
|
||||
#define XFRM_SPI_OUT_TO_IN 0x2
|
||||
|
||||
#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
|
||||
|
||||
static int config_device(void)
|
||||
@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void)
|
||||
SYS_NOFAIL("ip fou del port 5555 2> /dev/null");
|
||||
}
|
||||
|
||||
static int add_xfrm_tunnel(void)
|
||||
{
|
||||
/* at_ns0 namespace
|
||||
* at_ns0 -> root
|
||||
*/
|
||||
SYS(fail,
|
||||
"ip netns exec at_ns0 "
|
||||
"ip xfrm state add src %s dst %s proto esp "
|
||||
"spi %d reqid 1 mode tunnel replay-window 42 "
|
||||
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
|
||||
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
|
||||
SYS(fail,
|
||||
"ip netns exec at_ns0 "
|
||||
"ip xfrm policy add src %s/32 dst %s/32 dir out "
|
||||
"tmpl src %s dst %s proto esp reqid 1 "
|
||||
"mode tunnel",
|
||||
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
|
||||
|
||||
/* root -> at_ns0 */
|
||||
SYS(fail,
|
||||
"ip netns exec at_ns0 "
|
||||
"ip xfrm state add src %s dst %s proto esp "
|
||||
"spi %d reqid 2 mode tunnel "
|
||||
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
|
||||
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
|
||||
SYS(fail,
|
||||
"ip netns exec at_ns0 "
|
||||
"ip xfrm policy add src %s/32 dst %s/32 dir in "
|
||||
"tmpl src %s dst %s proto esp reqid 2 "
|
||||
"mode tunnel",
|
||||
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
|
||||
|
||||
/* address & route */
|
||||
SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32",
|
||||
IP4_ADDR_TUNL_DEV0);
|
||||
SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s",
|
||||
IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0);
|
||||
|
||||
/* root namespace
|
||||
* at_ns0 -> root
|
||||
*/
|
||||
SYS(fail,
|
||||
"ip xfrm state add src %s dst %s proto esp "
|
||||
"spi %d reqid 1 mode tunnel replay-window 42 "
|
||||
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
|
||||
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
|
||||
SYS(fail,
|
||||
"ip xfrm policy add src %s/32 dst %s/32 dir in "
|
||||
"tmpl src %s dst %s proto esp reqid 1 "
|
||||
"mode tunnel",
|
||||
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
|
||||
|
||||
/* root -> at_ns0 */
|
||||
SYS(fail,
|
||||
"ip xfrm state add src %s dst %s proto esp "
|
||||
"spi %d reqid 2 mode tunnel "
|
||||
"auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
|
||||
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
|
||||
SYS(fail,
|
||||
"ip xfrm policy add src %s/32 dst %s/32 dir out "
|
||||
"tmpl src %s dst %s proto esp reqid 2 "
|
||||
"mode tunnel",
|
||||
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
|
||||
|
||||
/* address & route */
|
||||
SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1);
|
||||
SYS(fail, "ip route add %s dev veth1 via %s src %s",
|
||||
IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1);
|
||||
|
||||
return 0;
|
||||
fail:
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void delete_xfrm_tunnel(void)
|
||||
{
|
||||
SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null",
|
||||
IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
|
||||
SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null",
|
||||
IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
|
||||
SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
|
||||
IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
|
||||
SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null",
|
||||
IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
|
||||
}
|
||||
|
||||
static int test_ping(int family, const char *addr)
|
||||
{
|
||||
SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
|
||||
@ -532,25 +624,85 @@ done:
|
||||
test_tunnel_kern__destroy(skel);
|
||||
}
|
||||
|
||||
static void test_xfrm_tunnel(void)
|
||||
{
|
||||
DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
|
||||
.attach_point = BPF_TC_INGRESS);
|
||||
LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
|
||||
struct test_tunnel_kern *skel = NULL;
|
||||
struct nstoken *nstoken;
|
||||
int xdp_prog_fd;
|
||||
int tc_prog_fd;
|
||||
int ifindex;
|
||||
int err;
|
||||
|
||||
err = add_xfrm_tunnel();
|
||||
if (!ASSERT_OK(err, "add_xfrm_tunnel"))
|
||||
return;
|
||||
|
||||
skel = test_tunnel_kern__open_and_load();
|
||||
if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
|
||||
goto done;
|
||||
|
||||
ifindex = if_nametoindex("veth1");
|
||||
if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
|
||||
goto done;
|
||||
|
||||
/* attach tc prog to tunnel dev */
|
||||
tc_hook.ifindex = ifindex;
|
||||
tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
|
||||
if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
|
||||
goto done;
|
||||
if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
|
||||
goto done;
|
||||
|
||||
/* attach xdp prog to tunnel dev */
|
||||
xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
|
||||
if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
|
||||
goto done;
|
||||
err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts);
|
||||
if (!ASSERT_OK(err, "bpf_xdp_attach"))
|
||||
goto done;
|
||||
|
||||
/* ping from at_ns0 namespace test */
|
||||
nstoken = open_netns("at_ns0");
|
||||
err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
|
||||
close_netns(nstoken);
|
||||
if (!ASSERT_OK(err, "test_ping"))
|
||||
goto done;
|
||||
|
||||
if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
|
||||
goto done;
|
||||
if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi"))
|
||||
goto done;
|
||||
if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip"))
|
||||
goto done;
|
||||
if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window"))
|
||||
goto done;
|
||||
|
||||
done:
|
||||
delete_xfrm_tunnel();
|
||||
if (skel)
|
||||
test_tunnel_kern__destroy(skel);
|
||||
}
|
||||
|
||||
#define RUN_TEST(name, ...) \
|
||||
({ \
|
||||
if (test__start_subtest(#name)) { \
|
||||
config_device(); \
|
||||
test_ ## name(__VA_ARGS__); \
|
||||
cleanup(); \
|
||||
} \
|
||||
})
|
||||
|
||||
static void *test_tunnel_run_tests(void *arg)
|
||||
{
|
||||
cleanup();
|
||||
config_device();
|
||||
|
||||
RUN_TEST(vxlan_tunnel);
|
||||
RUN_TEST(ip6vxlan_tunnel);
|
||||
RUN_TEST(ipip_tunnel, NONE);
|
||||
RUN_TEST(ipip_tunnel, FOU);
|
||||
RUN_TEST(ipip_tunnel, GUE);
|
||||
|
||||
cleanup();
|
||||
RUN_TEST(xfrm_tunnel);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
@ -26,6 +26,7 @@
|
||||
#define IPV6_AUTOFLOWLABEL 70
|
||||
|
||||
#define TC_ACT_UNSPEC (-1)
|
||||
#define TC_ACT_OK 0
|
||||
#define TC_ACT_SHOT 2
|
||||
|
||||
#define SOL_TCP 6
|
||||
|
@ -6,66 +6,34 @@
|
||||
* modify it under the terms of version 2 of the GNU General Public
|
||||
* License as published by the Free Software Foundation.
|
||||
*/
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <linux/bpf.h>
|
||||
#include <linux/if_ether.h>
|
||||
#include <linux/if_packet.h>
|
||||
#include <linux/if_tunnel.h>
|
||||
#include <linux/ip.h>
|
||||
#include <linux/ipv6.h>
|
||||
#include <linux/icmp.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/pkt_cls.h>
|
||||
#include <linux/erspan.h>
|
||||
#include <linux/udp.h>
|
||||
#include "vmlinux.h"
|
||||
#include <bpf/bpf_core_read.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_endian.h>
|
||||
#include "bpf_kfuncs.h"
|
||||
#include "bpf_tracing_net.h"
|
||||
|
||||
#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
|
||||
|
||||
#define VXLAN_UDP_PORT 4789
|
||||
#define VXLAN_UDP_PORT 4789
|
||||
#define ETH_P_IP 0x0800
|
||||
#define PACKET_HOST 0
|
||||
#define TUNNEL_CSUM bpf_htons(0x01)
|
||||
#define TUNNEL_KEY bpf_htons(0x04)
|
||||
|
||||
/* Only IPv4 address assigned to veth1.
|
||||
* 172.16.1.200
|
||||
*/
|
||||
#define ASSIGNED_ADDR_VETH1 0xac1001c8
|
||||
|
||||
struct geneve_opt {
|
||||
__be16 opt_class;
|
||||
__u8 type;
|
||||
__u8 length:5;
|
||||
__u8 r3:1;
|
||||
__u8 r2:1;
|
||||
__u8 r1:1;
|
||||
__u8 opt_data[8]; /* hard-coded to 8 byte */
|
||||
};
|
||||
|
||||
struct vxlanhdr {
|
||||
__be32 vx_flags;
|
||||
__be32 vx_vni;
|
||||
} __attribute__((packed));
|
||||
|
||||
struct vxlan_metadata {
|
||||
__u32 gbp;
|
||||
};
|
||||
|
||||
struct bpf_fou_encap {
|
||||
__be16 sport;
|
||||
__be16 dport;
|
||||
};
|
||||
|
||||
enum bpf_fou_encap_type {
|
||||
FOU_BPF_ENCAP_FOU,
|
||||
FOU_BPF_ENCAP_GUE,
|
||||
};
|
||||
|
||||
int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
|
||||
struct bpf_fou_encap *encap, int type) __ksym;
|
||||
int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
|
||||
struct bpf_fou_encap *encap) __ksym;
|
||||
struct xfrm_state *
|
||||
bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts,
|
||||
u32 opts__sz) __ksym;
|
||||
void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym;
|
||||
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
@ -205,9 +173,9 @@ int erspan_set_tunnel(struct __sk_buff *skb)
|
||||
__u8 hwid = 7;
|
||||
|
||||
md.version = 2;
|
||||
md.u.md2.dir = direction;
|
||||
md.u.md2.hwid = hwid & 0xf;
|
||||
md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
|
||||
BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
|
||||
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
|
||||
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
|
||||
#endif
|
||||
|
||||
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
|
||||
@ -246,8 +214,9 @@ int erspan_get_tunnel(struct __sk_buff *skb)
|
||||
bpf_printk("\tindex %x\n", index);
|
||||
#else
|
||||
bpf_printk("\tdirection %d hwid %x timestamp %u\n",
|
||||
md.u.md2.dir,
|
||||
(md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
|
||||
BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
|
||||
(BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
|
||||
BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
|
||||
bpf_ntohl(md.u.md2.timestamp));
|
||||
#endif
|
||||
|
||||
@ -284,9 +253,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
|
||||
__u8 hwid = 17;
|
||||
|
||||
md.version = 2;
|
||||
md.u.md2.dir = direction;
|
||||
md.u.md2.hwid = hwid & 0xf;
|
||||
md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
|
||||
BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
|
||||
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
|
||||
BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
|
||||
#endif
|
||||
|
||||
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
|
||||
@ -326,8 +295,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
|
||||
bpf_printk("\tindex %x\n", index);
|
||||
#else
|
||||
bpf_printk("\tdirection %d hwid %x timestamp %u\n",
|
||||
md.u.md2.dir,
|
||||
(md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
|
||||
BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
|
||||
(BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
|
||||
BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
|
||||
bpf_ntohl(md.u.md2.timestamp));
|
||||
#endif
|
||||
|
||||
@ -963,6 +933,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb)
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
volatile int xfrm_reqid = 0;
|
||||
volatile int xfrm_spi = 0;
|
||||
volatile int xfrm_remote_ip = 0;
|
||||
|
||||
SEC("tc")
|
||||
int xfrm_get_state(struct __sk_buff *skb)
|
||||
{
|
||||
@ -973,10 +947,58 @@ int xfrm_get_state(struct __sk_buff *skb)
|
||||
if (ret < 0)
|
||||
return TC_ACT_OK;
|
||||
|
||||
bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n",
|
||||
x.reqid, bpf_ntohl(x.spi),
|
||||
bpf_ntohl(x.remote_ipv4));
|
||||
xfrm_reqid = x.reqid;
|
||||
xfrm_spi = bpf_ntohl(x.spi);
|
||||
xfrm_remote_ip = bpf_ntohl(x.remote_ipv4);
|
||||
|
||||
return TC_ACT_OK;
|
||||
}
|
||||
|
||||
volatile int xfrm_replay_window = 0;
|
||||
|
||||
SEC("xdp")
|
||||
int xfrm_get_state_xdp(struct xdp_md *xdp)
|
||||
{
|
||||
struct bpf_xfrm_state_opts opts = {};
|
||||
struct xfrm_state *x = NULL;
|
||||
struct ip_esp_hdr *esph;
|
||||
struct bpf_dynptr ptr;
|
||||
u8 esph_buf[8] = {};
|
||||
u8 iph_buf[20] = {};
|
||||
struct iphdr *iph;
|
||||
u32 off;
|
||||
|
||||
if (bpf_dynptr_from_xdp(xdp, 0, &ptr))
|
||||
goto out;
|
||||
|
||||
off = sizeof(struct ethhdr);
|
||||
iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf));
|
||||
if (!iph || iph->protocol != IPPROTO_ESP)
|
||||
goto out;
|
||||
|
||||
off += sizeof(struct iphdr);
|
||||
esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf));
|
||||
if (!esph)
|
||||
goto out;
|
||||
|
||||
opts.netns_id = BPF_F_CURRENT_NETNS;
|
||||
opts.daddr.a4 = iph->daddr;
|
||||
opts.spi = esph->spi;
|
||||
opts.proto = IPPROTO_ESP;
|
||||
opts.family = AF_INET;
|
||||
|
||||
x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts));
|
||||
if (!x)
|
||||
goto out;
|
||||
|
||||
if (!x->replay_esn)
|
||||
goto out;
|
||||
|
||||
xfrm_replay_window = x->replay_esn->replay_window;
|
||||
out:
|
||||
if (x)
|
||||
bpf_xdp_xfrm_state_release(x);
|
||||
return XDP_PASS;
|
||||
}
|
||||
|
||||
char _license[] SEC("license") = "GPL";
|
||||
|
@ -517,90 +517,6 @@ test_ip6ip6()
|
||||
echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
|
||||
}
|
||||
|
||||
setup_xfrm_tunnel()
|
||||
{
|
||||
auth=0x$(printf '1%.0s' {1..40})
|
||||
enc=0x$(printf '2%.0s' {1..32})
|
||||
spi_in_to_out=0x1
|
||||
spi_out_to_in=0x2
|
||||
# at_ns0 namespace
|
||||
# at_ns0 -> root
|
||||
ip netns exec at_ns0 \
|
||||
ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
|
||||
spi $spi_in_to_out reqid 1 mode tunnel \
|
||||
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
|
||||
ip netns exec at_ns0 \
|
||||
ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
|
||||
tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
|
||||
mode tunnel
|
||||
# root -> at_ns0
|
||||
ip netns exec at_ns0 \
|
||||
ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
|
||||
spi $spi_out_to_in reqid 2 mode tunnel \
|
||||
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
|
||||
ip netns exec at_ns0 \
|
||||
ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
|
||||
tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
|
||||
mode tunnel
|
||||
# address & route
|
||||
ip netns exec at_ns0 \
|
||||
ip addr add dev veth0 10.1.1.100/32
|
||||
ip netns exec at_ns0 \
|
||||
ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
|
||||
src 10.1.1.100
|
||||
|
||||
# root namespace
|
||||
# at_ns0 -> root
|
||||
ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
|
||||
spi $spi_in_to_out reqid 1 mode tunnel \
|
||||
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
|
||||
ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
|
||||
tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
|
||||
mode tunnel
|
||||
# root -> at_ns0
|
||||
ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
|
||||
spi $spi_out_to_in reqid 2 mode tunnel \
|
||||
auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
|
||||
ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
|
||||
tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
|
||||
mode tunnel
|
||||
# address & route
|
||||
ip addr add dev veth1 10.1.1.200/32
|
||||
ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
|
||||
}
|
||||
|
||||
test_xfrm_tunnel()
|
||||
{
|
||||
if [[ -e /sys/kernel/tracing/trace ]]; then
|
||||
TRACE=/sys/kernel/tracing/trace
|
||||
else
|
||||
TRACE=/sys/kernel/debug/tracing/trace
|
||||
fi
|
||||
config_device
|
||||
> ${TRACE}
|
||||
setup_xfrm_tunnel
|
||||
mkdir -p ${BPF_PIN_TUNNEL_DIR}
|
||||
bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}
|
||||
tc qdisc add dev veth1 clsact
|
||||
tc filter add dev veth1 proto ip ingress bpf da object-pinned \
|
||||
${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
|
||||
ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
|
||||
sleep 1
|
||||
grep "reqid 1" ${TRACE}
|
||||
check_err $?
|
||||
grep "spi 0x1" ${TRACE}
|
||||
check_err $?
|
||||
grep "remote ip 0xac100164" ${TRACE}
|
||||
check_err $?
|
||||
cleanup
|
||||
|
||||
if [ $ret -ne 0 ]; then
|
||||
echo -e ${RED}"FAIL: xfrm tunnel"${NC}
|
||||
return 1
|
||||
fi
|
||||
echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
|
||||
}
|
||||
|
||||
attach_bpf()
|
||||
{
|
||||
DEV=$1
|
||||
@ -630,10 +546,6 @@ cleanup()
|
||||
ip link del ip6geneve11 2> /dev/null
|
||||
ip link del erspan11 2> /dev/null
|
||||
ip link del ip6erspan11 2> /dev/null
|
||||
ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
|
||||
ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
|
||||
ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
|
||||
ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
|
||||
}
|
||||
|
||||
cleanup_exit()
|
||||
@ -716,10 +628,6 @@ bpf_tunnel_test()
|
||||
test_ip6ip6
|
||||
errors=$(( $errors + $? ))
|
||||
|
||||
echo "Testing IPSec tunnel..."
|
||||
test_xfrm_tunnel
|
||||
errors=$(( $errors + $? ))
|
||||
|
||||
return $errors
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user