From 781c53bc5d5628065a46c70f02f5a0450f5842f4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Jan 2016 01:16:38 +0100 Subject: [PATCH 1/2] bpf: export helper function flags and reject invalid ones Export flags used by eBPF helper functions through UAPI, so they can be used by programs (instead of them redefining all flags each time or just using the hard-coded values). It also gives a better overview what flags are used where and we can further get rid of the extra macros defined in filter.c. Moreover, reject invalid flags. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 16 ++++++++++++++++ net/core/filter.c | 37 +++++++++++++++++++++++-------------- 2 files changed, 39 insertions(+), 14 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8bed7f1176b8..d94797ce9a5a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -273,6 +273,22 @@ enum bpf_func_id { __BPF_FUNC_MAX_ID, }; +/* All flags used by eBPF helper functions, placed here. */ + +/* BPF_FUNC_skb_store_bytes flags. */ +#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) + +/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. + * First 4 bits are for passing the header field size. + */ +#define BPF_F_HDR_FIELD_MASK 0xfULL + +/* BPF_FUNC_l4_csum_replace flags. */ +#define BPF_F_PSEUDO_HDR (1ULL << 4) + +/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ +#define BPF_F_INGRESS (1ULL << 0) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ diff --git a/net/core/filter.c b/net/core/filter.c index 0db92b5e2cbf..7c55cadc0f38 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1328,8 +1328,7 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) return 0; } -#define BPF_RECOMPUTE_CSUM(flags) ((flags) & 1) -#define BPF_LDST_LEN 16U +#define BPF_LDST_LEN 16U static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) { @@ -1340,6 +1339,9 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) char buf[BPF_LDST_LEN]; void *ptr; + if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM))) + return -EINVAL; + /* bpf verifier guarantees that: * 'from' pointer points to bpf program stack * 'len' bytes of it were initialized @@ -1359,7 +1361,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) if (unlikely(!ptr)) return -EFAULT; - if (BPF_RECOMPUTE_CSUM(flags)) + if (flags & BPF_F_RECOMPUTE_CSUM) skb_postpull_rcsum(skb, ptr, len); memcpy(ptr, from, len); @@ -1368,7 +1370,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) /* skb_store_bits cannot return -EFAULT here */ skb_store_bits(skb, offset, ptr, len); - if (BPF_RECOMPUTE_CSUM(flags)) + if (flags & BPF_F_RECOMPUTE_CSUM) skb_postpush_rcsum(skb, ptr, len); return 0; @@ -1415,15 +1417,14 @@ const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; -#define BPF_HEADER_FIELD_SIZE(flags) ((flags) & 0x0f) -#define BPF_IS_PSEUDO_HEADER(flags) ((flags) & 0x10) - static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; int offset = (int) r2; __sum16 sum, *ptr; + if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) + return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; @@ -1435,7 +1436,7 @@ static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely(!ptr)) return -EFAULT; - switch (BPF_HEADER_FIELD_SIZE(flags)) { + switch (flags & BPF_F_HDR_FIELD_MASK) { case 2: csum_replace2(ptr, from, to); break; @@ -1467,10 +1468,12 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = { static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) { struct sk_buff *skb = (struct sk_buff *) (long) r1; - bool is_pseudo = !!BPF_IS_PSEUDO_HEADER(flags); + bool is_pseudo = flags & BPF_F_PSEUDO_HDR; int offset = (int) r2; __sum16 sum, *ptr; + if (unlikely(flags & ~(BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) + return -EINVAL; if (unlikely((u32) offset > 0xffff)) return -EFAULT; @@ -1482,7 +1485,7 @@ static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) if (unlikely(!ptr)) return -EFAULT; - switch (BPF_HEADER_FIELD_SIZE(flags)) { + switch (flags & BPF_F_HDR_FIELD_MASK) { case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); break; @@ -1511,13 +1514,14 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -#define BPF_IS_REDIRECT_INGRESS(flags) ((flags) & 1) - static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2; struct net_device *dev; + if (unlikely(flags & ~(BPF_F_INGRESS))) + return -EINVAL; + dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex); if (unlikely(!dev)) return -EINVAL; @@ -1526,7 +1530,7 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!skb2)) return -ENOMEM; - if (BPF_IS_REDIRECT_INGRESS(flags)) { + if (flags & BPF_F_INGRESS) { if (skb_at_tc_ingress(skb2)) skb_postpush_rcsum(skb2, skb_mac_header(skb2), skb2->mac_len); @@ -1553,12 +1557,17 @@ struct redirect_info { }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); + static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); + if (unlikely(flags & ~(BPF_F_INGRESS))) + return TC_ACT_SHOT; + ri->ifindex = ifindex; ri->flags = flags; + return TC_ACT_REDIRECT; } @@ -1574,7 +1583,7 @@ int skb_do_redirect(struct sk_buff *skb) return -EINVAL; } - if (BPF_IS_REDIRECT_INGRESS(ri->flags)) { + if (ri->flags & BPF_F_INGRESS) { if (skb_at_tc_ingress(skb)) skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len); From c6c33454072fc9fe961e2b25f22a619e4fa98838 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Jan 2016 01:16:39 +0100 Subject: [PATCH 2/2] bpf: support ipv6 for bpf_skb_{set,get}_tunnel_key After IPv6 support has recently been added to metadata dst and related encaps, add support for populating/reading it from an eBPF program. Commit d3aa45ce6b ("bpf: add helpers to access tunnel metadata") started with initial IPv4-only support back then (due to IPv6 metadata support not being available yet). To stay compatible with older programs, we need to test for the passed structure size. Also TOS and TTL support from the ip_tunnel_info key has been added. Tested with vxlan devs in collect meta data mode with IPv4, IPv6 and in compat mode over different network namespaces. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 10 +++++- net/core/filter.c | 69 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d94797ce9a5a..aa6f8571de13 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -289,6 +289,9 @@ enum bpf_func_id { /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ #define BPF_F_INGRESS (1ULL << 0) +/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ +#define BPF_F_TUNINFO_IPV6 (1ULL << 0) + /* user accessible mirror of in-kernel sk_buff. * new fields can only be added to the end of this structure */ @@ -312,7 +315,12 @@ struct __sk_buff { struct bpf_tunnel_key { __u32 tunnel_id; - __u32 remote_ipv4; + union { + __u32 remote_ipv4; + __u32 remote_ipv6[4]; + }; + __u8 tunnel_tos; + __u8 tunnel_ttl; }; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/net/core/filter.c b/net/core/filter.c index 7c55cadc0f38..77cdfb455e7f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1680,19 +1680,49 @@ bool bpf_helper_changes_skb_data(void *func) return false; } +static unsigned short bpf_tunnel_key_af(u64 flags) +{ + return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; +} + static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) { struct sk_buff *skb = (struct sk_buff *) (long) r1; struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; - struct ip_tunnel_info *info = skb_tunnel_info(skb); + const struct ip_tunnel_info *info = skb_tunnel_info(skb); + u8 compat[sizeof(struct bpf_tunnel_key)]; - if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags || !info)) - return -EINVAL; - if (ip_tunnel_info_af(info) != AF_INET) + if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) return -EINVAL; + if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) + return -EPROTO; + if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + switch (size) { + case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): + /* Fixup deprecated structure layouts here, so we have + * a common path later on. + */ + if (ip_tunnel_info_af(info) != AF_INET) + return -EINVAL; + to = (struct bpf_tunnel_key *)compat; + break; + default: + return -EINVAL; + } + } to->tunnel_id = be64_to_cpu(info->key.tun_id); - to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); + to->tunnel_tos = info->key.tos; + to->tunnel_ttl = info->key.ttl; + + if (flags & BPF_F_TUNINFO_IPV6) + memcpy(to->remote_ipv6, &info->key.u.ipv6.src, + sizeof(to->remote_ipv6)); + else + to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src); + + if (unlikely(size != sizeof(struct bpf_tunnel_key))) + memcpy((void *)(long) r2, to, size); return 0; } @@ -1714,10 +1744,25 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) struct sk_buff *skb = (struct sk_buff *) (long) r1; struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2; struct metadata_dst *md = this_cpu_ptr(md_dst); + u8 compat[sizeof(struct bpf_tunnel_key)]; struct ip_tunnel_info *info; - if (unlikely(size != sizeof(struct bpf_tunnel_key) || flags)) + if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6))) return -EINVAL; + if (unlikely(size != sizeof(struct bpf_tunnel_key))) { + switch (size) { + case offsetof(struct bpf_tunnel_key, remote_ipv6[1]): + /* Fixup deprecated structure layouts here, so we have + * a common path later on. + */ + memcpy(compat, from, size); + memset(compat + size, 0, sizeof(compat) - size); + from = (struct bpf_tunnel_key *)compat; + break; + default: + return -EINVAL; + } + } skb_dst_drop(skb); dst_hold((struct dst_entry *) md); @@ -1725,9 +1770,19 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) info = &md->u.tun_info; info->mode = IP_TUNNEL_INFO_TX; + info->key.tun_flags = TUNNEL_KEY; info->key.tun_id = cpu_to_be64(from->tunnel_id); - info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + info->key.tos = from->tunnel_tos; + info->key.ttl = from->tunnel_ttl; + + if (flags & BPF_F_TUNINFO_IPV6) { + info->mode |= IP_TUNNEL_INFO_IPV6; + memcpy(&info->key.u.ipv6.dst, from->remote_ipv6, + sizeof(from->remote_ipv6)); + } else { + info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4); + } return 0; }