From 089b19a9204fc090793d389a265f54124eacb05d Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:44 -0700 Subject: [PATCH 1/9] flow_dissector: switch kernel context to struct bpf_flow_dissector struct bpf_flow_dissector has a small subset of sk_buff fields that flow dissector BPF program is allowed to access and an optional pointer to real skb. Real skb is used only in bpf_skb_load_bytes helper to read non-linear data. The real motivation for this is to be able to call flow dissector from eth_get_headlen context where we don't have an skb and need to dissect raw bytes. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 4 ++ include/net/flow_dissector.h | 7 +++ include/net/sch_generic.h | 11 ++-- net/bpf/test_run.c | 4 -- net/core/filter.c | 105 +++++++++++++++++++++++++++-------- net/core/flow_dissector.c | 45 +++++++-------- 6 files changed, 117 insertions(+), 59 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6f42942a443b..2b7b8228c5c3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1275,6 +1275,10 @@ static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) } #endif +struct bpf_flow_dissector; +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen); + struct bpf_flow_keys; bool __skb_flow_bpf_dissect(struct bpf_prog *prog, const struct sk_buff *skb, diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index 2b26979efb48..7c5a8d9a8d2a 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -305,4 +305,11 @@ static inline void *skb_flow_dissector_target(struct flow_dissector *flow_dissec return ((char *)target_container) + flow_dissector->offset[key_id]; } +struct bpf_flow_dissector { + struct bpf_flow_keys *flow_keys; + const struct sk_buff *skb; + void *data; + void *data_end; +}; + #endif diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index e8f85cd2afce..21f434f3ac9e 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -364,13 +364,10 @@ struct tcf_proto { }; struct qdisc_skb_cb { - union { - struct { - unsigned int pkt_len; - u16 slave_dev_queue_mapping; - u16 tc_classid; - }; - struct bpf_flow_keys *flow_keys; + struct { + unsigned int pkt_len; + u16 slave_dev_queue_mapping; + u16 tc_classid; }; #define QDISC_CB_PRIV_LEN 20 unsigned char data[QDISC_CB_PRIV_LEN]; diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 2221573dacdb..006ad865f7fb 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -382,7 +382,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, u32 repeat = kattr->test.repeat; struct bpf_flow_keys flow_keys; u64 time_start, time_spent = 0; - struct bpf_skb_data_end *cb; u32 retval, duration; struct sk_buff *skb; struct sock *sk; @@ -423,9 +422,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, current->nsproxy->net_ns->loopback_dev); skb_reset_network_header(skb); - cb = (struct bpf_skb_data_end *)skb->cb; - cb->qdisc_cb.flow_keys = &flow_keys; - if (!repeat) repeat = 1; diff --git a/net/core/filter.c b/net/core/filter.c index fa8fb0548217..edb3a7c22f6c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1730,6 +1730,40 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_SIZE, }; +BPF_CALL_4(bpf_flow_dissector_load_bytes, + const struct bpf_flow_dissector *, ctx, u32, offset, + void *, to, u32, len) +{ + void *ptr; + + if (unlikely(offset > 0xffff)) + goto err_clear; + + if (unlikely(!ctx->skb)) + goto err_clear; + + ptr = skb_header_pointer(ctx->skb, offset, len, to); + if (unlikely(!ptr)) + goto err_clear; + if (ptr != to) + memcpy(to, ptr, len); + + return 0; +err_clear: + memset(to, 0, len); + return -EFAULT; +} + +static const struct bpf_func_proto bpf_flow_dissector_load_bytes_proto = { + .func = bpf_flow_dissector_load_bytes, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; + BPF_CALL_5(bpf_skb_load_bytes_relative, const struct sk_buff *, skb, u32, offset, void *, to, u32, len, u32, start_header) { @@ -6121,7 +6155,7 @@ flow_dissector_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_load_bytes: - return &bpf_skb_load_bytes_proto; + return &bpf_flow_dissector_load_bytes_proto; default: return bpf_base_func_proto(func_id); } @@ -6248,9 +6282,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type return false; break; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): - if (size != sizeof(__u64)) - return false; - break; + return false; case bpf_ctx_range(struct __sk_buff, tstamp): if (size != sizeof(__u64)) return false; @@ -6285,7 +6317,6 @@ static bool sk_filter_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): @@ -6312,7 +6343,6 @@ static bool cg_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, wire_len): return false; case bpf_ctx_range(struct __sk_buff, data): @@ -6358,7 +6388,6 @@ static bool lwt_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range_till(struct __sk_buff, family, local_port): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): return false; @@ -6601,7 +6630,6 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -6803,7 +6831,6 @@ static bool sk_skb_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data_meta): - case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): case bpf_ctx_range(struct __sk_buff, tstamp): case bpf_ctx_range(struct __sk_buff, wire_len): return false; @@ -6877,24 +6904,65 @@ static bool flow_dissector_is_valid_access(int off, int size, const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { + const int size_default = sizeof(__u32); + + if (off < 0 || off >= sizeof(struct __sk_buff)) + return false; + if (type == BPF_WRITE) return false; switch (off) { case bpf_ctx_range(struct __sk_buff, data): + if (size != size_default) + return false; info->reg_type = PTR_TO_PACKET; - break; + return true; case bpf_ctx_range(struct __sk_buff, data_end): + if (size != size_default) + return false; info->reg_type = PTR_TO_PACKET_END; - break; + return true; case bpf_ctx_range_ptr(struct __sk_buff, flow_keys): + if (size != sizeof(__u64)) + return false; info->reg_type = PTR_TO_FLOW_KEYS; - break; + return true; default: return false; } +} - return bpf_skb_is_valid_access(off, size, type, prog, info); +static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, + u32 *target_size) + +{ + struct bpf_insn *insn = insn_buf; + + switch (si->off) { + case offsetof(struct __sk_buff, data): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, data)); + break; + + case offsetof(struct __sk_buff, data_end): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, data_end), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, data_end)); + break; + + case offsetof(struct __sk_buff, flow_keys): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_flow_dissector, flow_keys), + si->dst_reg, si->src_reg, + offsetof(struct bpf_flow_dissector, flow_keys)); + break; + } + + return insn - insn_buf; } static u32 bpf_convert_ctx_access(enum bpf_access_type type, @@ -7201,15 +7269,6 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, skc_num, 2, target_size)); break; - case offsetof(struct __sk_buff, flow_keys): - off = si->off; - off -= offsetof(struct __sk_buff, flow_keys); - off += offsetof(struct sk_buff, cb); - off += offsetof(struct qdisc_skb_cb, flow_keys); - *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, - si->src_reg, off); - break; - case offsetof(struct __sk_buff, tstamp): BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tstamp) != 8); @@ -8214,7 +8273,7 @@ const struct bpf_prog_ops sk_msg_prog_ops = { const struct bpf_verifier_ops flow_dissector_verifier_ops = { .get_func_proto = flow_dissector_func_proto, .is_valid_access = flow_dissector_is_valid_access, - .convert_ctx_access = bpf_convert_ctx_access, + .convert_ctx_access = flow_dissector_convert_ctx_access, }; const struct bpf_prog_ops flow_dissector_prog_ops = { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 795449713ba4..ef6d9443cc75 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -688,39 +688,34 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, struct flow_dissector *flow_dissector, struct bpf_flow_keys *flow_keys) { - struct bpf_skb_data_end cb_saved; - struct bpf_skb_data_end *cb; + struct bpf_flow_dissector ctx = { + .flow_keys = flow_keys, + .skb = skb, + .data = skb->data, + .data_end = skb->data + skb_headlen(skb), + }; + + return bpf_flow_dissect(prog, &ctx, skb->protocol, + skb_network_offset(skb), skb_headlen(skb)); +} + +bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, + __be16 proto, int nhoff, int hlen) +{ + struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; - /* Note that even though the const qualifier is discarded - * throughout the execution of the BPF program, all changes(the - * control block) are reverted after the BPF program returns. - * Therefore, __skb_flow_dissect does not alter the skb. - */ - - cb = (struct bpf_skb_data_end *)skb->cb; - - /* Save Control Block */ - memcpy(&cb_saved, cb, sizeof(cb_saved)); - memset(cb, 0, sizeof(*cb)); - /* Pass parameters to the BPF program */ memset(flow_keys, 0, sizeof(*flow_keys)); - cb->qdisc_cb.flow_keys = flow_keys; - flow_keys->n_proto = skb->protocol; - flow_keys->nhoff = skb_network_offset(skb); + flow_keys->n_proto = proto; + flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; - bpf_compute_data_pointers((struct sk_buff *)skb); - result = BPF_PROG_RUN(prog, skb); + result = BPF_PROG_RUN(prog, ctx); - /* Restore state */ - memcpy(cb, &cb_saved, sizeof(cb_saved)); - - flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, - skb_network_offset(skb), skb->len); + flow_keys->nhoff = clamp_t(u16, flow_keys->nhoff, nhoff, hlen); flow_keys->thoff = clamp_t(u16, flow_keys->thoff, - flow_keys->nhoff, skb->len); + flow_keys->nhoff, hlen); return result == BPF_OK; } From 7b8a1304323b35bbf060e0d29691031056836b73 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:45 -0700 Subject: [PATCH 2/9] bpf: when doing BPF_PROG_TEST_RUN for flow dissector use no-skb mode Now that we have bpf_flow_dissect which can work on raw data, use it when doing BPF_PROG_TEST_RUN for flow dissector. Simplifies bpf_prog_test_run_flow_dissector and allows us to test no-skb mode. Note, that previously, with bpf_flow_dissect_skb we used to call eth_type_trans which pulled L2 (ETH_HLEN) header and we explicitly called skb_reset_network_header. That means flow_keys->nhoff would be initialized to 0 (skb_network_offset) in init_flow_keys. Now we call bpf_flow_dissect with nhoff set to ETH_HLEN and need to undo it once the dissection is done to preserve the existing behavior. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- net/bpf/test_run.c | 47 +++++++++++++++++----------------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 006ad865f7fb..db2ec88ab129 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -379,12 +379,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, union bpf_attr __user *uattr) { u32 size = kattr->test.data_size_in; + struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; struct bpf_flow_keys flow_keys; u64 time_start, time_spent = 0; + const struct ethhdr *eth; u32 retval, duration; - struct sk_buff *skb; - struct sock *sk; void *data; int ret; u32 i; @@ -395,43 +395,31 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (kattr->test.ctx_in || kattr->test.ctx_out) return -EINVAL; - data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (size < ETH_HLEN) + return -EINVAL; + + data = bpf_test_init(kattr, size, 0, 0); if (IS_ERR(data)) return PTR_ERR(data); - sk = kzalloc(sizeof(*sk), GFP_USER); - if (!sk) { - kfree(data); - return -ENOMEM; - } - sock_net_set(sk, current->nsproxy->net_ns); - sock_init_data(NULL, sk); - - skb = build_skb(data, 0); - if (!skb) { - kfree(data); - kfree(sk); - return -ENOMEM; - } - skb->sk = sk; - - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - __skb_put(skb, size); - skb->protocol = eth_type_trans(skb, - current->nsproxy->net_ns->loopback_dev); - skb_reset_network_header(skb); + eth = (struct ethhdr *)data; if (!repeat) repeat = 1; + ctx.flow_keys = &flow_keys; + ctx.data = data; + ctx.data_end = (__u8 *)data + size; + rcu_read_lock(); preempt_disable(); time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { - retval = __skb_flow_bpf_dissect(prog, skb, - &flow_keys_dissector, - &flow_keys); + retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, + size); + + flow_keys.nhoff -= ETH_HLEN; + flow_keys.thoff -= ETH_HLEN; if (signal_pending(current)) { preempt_enable(); @@ -464,7 +452,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, retval, duration); out: - kfree_skb(skb); - kfree(sk); + kfree(data); return ret; } From 3cbf4ffba5eeec60f82868a5facc1962d8a44d00 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:46 -0700 Subject: [PATCH 3/9] net: plumb network namespace into __skb_flow_dissect This new argument will be used in the next patches for the eth_get_headlen use case. eth_get_headlen calls flow dissector with only data (without skb) so there is currently no way to pull attached BPF flow dissector program. With this new argument, we can amend the callers to explicitly pass network namespace so we can use attached BPF program. Signed-off-by: Stanislav Fomichev Reviewed-by: Saeed Mahameed Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 19 +++++++++++-------- net/core/flow_dissector.c | 27 +++++++++++++++++---------- net/ethernet/eth.c | 5 +++-- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b7b8228c5c3..b466fbface2e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1284,7 +1284,8 @@ bool __skb_flow_bpf_dissect(struct bpf_prog *prog, const struct sk_buff *skb, struct flow_dissector *flow_dissector, struct bpf_flow_keys *flow_keys); -bool __skb_flow_dissect(const struct sk_buff *skb, +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -1294,8 +1295,8 @@ static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, unsigned int flags) { - return __skb_flow_dissect(skb, flow_dissector, target_container, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, flow_dissector, + target_container, NULL, 0, 0, 0, flags); } static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, @@ -1303,18 +1304,19 @@ static inline bool skb_flow_dissect_flow_keys(const struct sk_buff *skb, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_dissector, flow, - NULL, 0, 0, 0, flags); + return __skb_flow_dissect(NULL, skb, &flow_keys_dissector, + flow, NULL, 0, 0, 0, flags); } static inline bool -skb_flow_dissect_flow_keys_basic(const struct sk_buff *skb, +skb_flow_dissect_flow_keys_basic(const struct net *net, + const struct sk_buff *skb, struct flow_keys_basic *flow, void *data, __be16 proto, int nhoff, int hlen, unsigned int flags) { memset(flow, 0, sizeof(*flow)); - return __skb_flow_dissect(skb, &flow_keys_basic_dissector, flow, + return __skb_flow_dissect(net, skb, &flow_keys_basic_dissector, flow, data, proto, nhoff, hlen, flags); } @@ -2492,7 +2494,8 @@ static inline void skb_probe_transport_header(struct sk_buff *skb) if (skb_transport_header_was_set(skb)) return; - if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) skb_set_transport_header(skb, keys.control.thoff); } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index ef6d9443cc75..f32c7e737fc6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -722,6 +722,7 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, /** * __skb_flow_dissect - extract the flow_keys struct and return it + * @net: associated network namespace, derived from @skb if NULL * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified * @flow_dissector: list of keys to dissect * @target_container: target structure to put dissected values into @@ -738,7 +739,8 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, * * Caller must take care of zeroing target container memory. */ -bool __skb_flow_dissect(const struct sk_buff *skb, +bool __skb_flow_dissect(const struct net *net, + const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container, void *data, __be16 proto, int nhoff, int hlen, @@ -797,13 +799,17 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct bpf_prog *attached = NULL; rcu_read_lock(); + if (!net) { + if (skb->dev) + net = dev_net(skb->dev); + else if (skb->sk) + net = sock_net(skb->sk); + else + WARN_ON_ONCE(1); + } - if (skb->dev) - attached = rcu_dereference(dev_net(skb->dev)->flow_dissector_prog); - else if (skb->sk) - attached = rcu_dereference(sock_net(skb->sk)->flow_dissector_prog); - else - WARN_ON_ONCE(1); + if (net) + attached = rcu_dereference(net->flow_dissector_prog); if (attached) { ret = __skb_flow_bpf_dissect(attached, skb, @@ -1405,8 +1411,8 @@ u32 __skb_get_hash_symmetric(const struct sk_buff *skb) __flow_hash_secret_init(); memset(&keys, 0, sizeof(keys)); - __skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, - NULL, 0, 0, 0, + __skb_flow_dissect(NULL, skb, &flow_keys_dissector_symmetric, + &keys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); return __flow_hash_from_keys(&keys, hashrnd); @@ -1507,7 +1513,8 @@ u32 skb_get_poff(const struct sk_buff *skb) { struct flow_keys_basic keys; - if (!skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0)) + if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, + NULL, 0, 0, 0, 0)) return 0; return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb)); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index f7a3d7a171c7..1e439549c419 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -136,8 +136,9 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!skb_flow_dissect_flow_keys_basic(NULL, &keys, data, eth->h_proto, - sizeof(*eth), len, flags)) + if (!skb_flow_dissect_flow_keys_basic(NULL, NULL, &keys, data, + eth->h_proto, sizeof(*eth), + len, flags)) return max_t(u32, keys.control.thoff, sizeof(*eth)); /* parse for any L4 headers */ From 9b52e3f267a6835efd50ed9002d530666d16a411 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:47 -0700 Subject: [PATCH 4/9] flow_dissector: handle no-skb use case When called without skb, gather all required data from the __skb_flow_dissect's arguments and use recently introduces no-skb mode of bpf flow dissector. Note: WARN_ON_ONCE(!net) will now trigger for eth_get_headlen users. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- include/linux/skbuff.h | 5 ---- net/core/flow_dissector.c | 52 +++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 32 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index b466fbface2e..998256c2820b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1279,11 +1279,6 @@ struct bpf_flow_dissector; bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, __be16 proto, int nhoff, int hlen); -struct bpf_flow_keys; -bool __skb_flow_bpf_dissect(struct bpf_prog *prog, - const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - struct bpf_flow_keys *flow_keys); bool __skb_flow_dissect(const struct net *net, const struct sk_buff *skb, struct flow_dissector *flow_dissector, diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index f32c7e737fc6..fac712cee9d5 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -683,22 +683,6 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, } } -bool __skb_flow_bpf_dissect(struct bpf_prog *prog, - const struct sk_buff *skb, - struct flow_dissector *flow_dissector, - struct bpf_flow_keys *flow_keys) -{ - struct bpf_flow_dissector ctx = { - .flow_keys = flow_keys, - .skb = skb, - .data = skb->data, - .data_end = skb->data + skb_headlen(skb), - }; - - return bpf_flow_dissect(prog, &ctx, skb->protocol, - skb_network_offset(skb), skb_headlen(skb)); -} - bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, __be16 proto, int nhoff, int hlen) { @@ -753,6 +737,7 @@ bool __skb_flow_dissect(const struct net *net, struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; + struct bpf_prog *attached = NULL; enum flow_dissect_ret fdret; enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX; int num_hdrs = 0; @@ -795,26 +780,39 @@ bool __skb_flow_dissect(const struct net *net, target_container); if (skb) { - struct bpf_flow_keys flow_keys; - struct bpf_prog *attached = NULL; - - rcu_read_lock(); if (!net) { if (skb->dev) net = dev_net(skb->dev); else if (skb->sk) net = sock_net(skb->sk); - else - WARN_ON_ONCE(1); } + } - if (net) - attached = rcu_dereference(net->flow_dissector_prog); + WARN_ON_ONCE(!net); + if (net) { + rcu_read_lock(); + attached = rcu_dereference(net->flow_dissector_prog); if (attached) { - ret = __skb_flow_bpf_dissect(attached, skb, - flow_dissector, - &flow_keys); + struct bpf_flow_keys flow_keys; + struct bpf_flow_dissector ctx = { + .flow_keys = &flow_keys, + .data = data, + .data_end = data + hlen, + }; + __be16 n_proto = proto; + + if (skb) { + ctx.skb = skb; + /* we can't use 'proto' in the skb case + * because it might be set to skb->vlan_proto + * which has been pulled from the data + */ + n_proto = skb->protocol; + } + + ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, + hlen); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); From c43f1255b866b423d2381f77eaa2cbc64a9c49aa Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:48 -0700 Subject: [PATCH 5/9] net: pass net_device argument to the eth_get_headlen Update all users of eth_get_headlen to pass network device, fetch network namespace from it and pass it down to the flow dissector. This commit is a noop until administrator inserts BPF flow dissector program. Cc: Maxim Krasnyansky Cc: Saeed Mahameed Cc: Jeff Kirsher Cc: intel-wired-lan@lists.osuosl.org Cc: Yisen Zhuang Cc: Salil Mehta Cc: Michael Chan Cc: Igor Russkikh Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 3 ++- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- drivers/net/ethernet/hisilicon/hns/hns_enet.c | 2 +- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 2 +- drivers/net/ethernet/intel/fm10k/fm10k_main.c | 2 +- drivers/net/ethernet/intel/i40e/i40e_txrx.c | 3 ++- drivers/net/ethernet/intel/iavf/iavf_txrx.c | 2 +- drivers/net/ethernet/intel/ice/ice_txrx.c | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/igc/igc_main.c | 2 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 2 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- drivers/net/tun.c | 3 ++- include/linux/etherdevice.h | 2 +- net/ethernet/eth.c | 5 +++-- 16 files changed, 22 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index c64e2fb5a4f1..350e385528fd 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -354,7 +354,8 @@ int aq_ring_rx_clean(struct aq_ring_s *self, hdr_len = buff->len; if (hdr_len > AQ_CFG_RX_HDR_SIZE) - hdr_len = eth_get_headlen(aq_buf_vaddr(&buff->rxdata), + hdr_len = eth_get_headlen(skb->dev, + aq_buf_vaddr(&buff->rxdata), AQ_CFG_RX_HDR_SIZE); memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata), diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 6528a597367b..526f36dcb204 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -899,7 +899,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp, DMA_ATTR_WEAK_ORDERING); if (unlikely(!payload)) - payload = eth_get_headlen(data_ptr, len); + payload = eth_get_headlen(bp->dev, data_ptr, len); skb = napi_alloc_skb(&rxr->bnapi->napi, payload); if (!skb) { diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 297b95c1b3c1..65b985acae38 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -598,7 +598,7 @@ static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data, } else { ring->stats.seg_pkt_cnt++; - pull_len = eth_get_headlen(va, HNS_RX_HEAD_SIZE); + pull_len = eth_get_headlen(ndev, va, HNS_RX_HEAD_SIZE); memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 176d4b965709..5f7b51c6ee91 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -2580,7 +2580,7 @@ static int hns3_alloc_skb(struct hns3_enet_ring *ring, int length, ring->stats.seg_pkt_cnt++; u64_stats_update_end(&ring->syncp); - ring->pull_len = eth_get_headlen(va, HNS3_RX_HEAD_SIZE); + ring->pull_len = eth_get_headlen(netdev, va, HNS3_RX_HEAD_SIZE); __skb_put(skb, ring->pull_len); hns3_nic_reuse_page(skb, ring->frag_num++, ring, ring->pull_len, desc_cb); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 2325cee76211..b4d970e44163 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -280,7 +280,7 @@ static bool fm10k_add_rx_frag(struct fm10k_rx_buffer *rx_buffer, /* we need the header to contain the greater of either ETH_HLEN or * 60 bytes if the skb->len is less than 60 for skb_pad. */ - pull_len = eth_get_headlen(va, FM10K_RX_HDR_LEN); + pull_len = eth_get_headlen(skb->dev, va, FM10K_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, pull_len), va, ALIGN(pull_len, sizeof(long))); diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 1a95223c9f99..e1931701cd7e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -2035,7 +2035,8 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > I40E_RX_HDR_SIZE) - headlen = eth_get_headlen(xdp->data, I40E_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, xdp->data, + I40E_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), xdp->data, diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c index b64187753ad6..cf8be63a8a4f 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c +++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c @@ -1315,7 +1315,7 @@ static struct sk_buff *iavf_construct_skb(struct iavf_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IAVF_RX_HDR_SIZE) - headlen = eth_get_headlen(va, IAVF_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, va, IAVF_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 79043fec0187..259f118c7d8b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -699,7 +699,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, /* Determine available headroom for copy */ headlen = size; if (headlen > ICE_RX_HDR_SIZE) - headlen = eth_get_headlen(va, ICE_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, va, ICE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index acbb5b4f333d..9b8a4bb25327 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8051,7 +8051,7 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IGB_RX_HDR_LEN) - headlen = eth_get_headlen(va, IGB_RX_HDR_LEN); + headlen = eth_get_headlen(skb->dev, va, IGB_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index f79728381e8a..e58a6e0dc4d9 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -1199,7 +1199,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IGC_RX_HDR_LEN) - headlen = eth_get_headlen(va, IGC_RX_HDR_LEN); + headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long))); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 60cec3540dd7..7b903206b534 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1800,7 +1800,7 @@ static void ixgbe_pull_tail(struct ixgbe_ring *rx_ring, * we need the header to contain the greater of either ETH_HLEN or * 60 bytes if the skb->len is less than 60 for skb_pad. */ - pull_len = eth_get_headlen(va, IXGBE_RX_HDR_SIZE); + pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 49e23afa05a2..d189ed247665 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -895,7 +895,8 @@ struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, /* Determine available headroom for copy */ headlen = size; if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(xdp->data, IXGBEVF_RX_HDR_SIZE); + headlen = eth_get_headlen(skb->dev, xdp->data, + IXGBEVF_RX_HDR_SIZE); /* align pull length to size of long to optimize memcpy performance */ memcpy(__skb_put(skb, headlen), xdp->data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 40f3f98aa279..7b61126fcec9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -163,7 +163,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, case MLX5_INLINE_MODE_NONE: return 0; case MLX5_INLINE_MODE_TCP_UDP: - hlen = eth_get_headlen(skb->data, skb_headlen(skb)); + hlen = eth_get_headlen(skb->dev, skb->data, skb_headlen(skb)); if (hlen == ETH_HLEN && !skb_vlan_tag_present(skb)) hlen += VLAN_HLEN; break; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 24d0220b9ba0..9d72f8c76c15 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1965,7 +1965,8 @@ drop: if (frags) { /* Exercise flow dissector code path. */ - u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); + u32 headlen = eth_get_headlen(tun->dev, skb->data, + skb_headlen(skb)); if (unlikely(headlen > skb_headlen(skb))) { this_cpu_inc(tun->pcpu_stats->rx_dropped); diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index e2f3b21cd72a..c6c1930e28a0 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -33,7 +33,7 @@ struct device; int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr); unsigned char *arch_get_platform_mac_address(void); int nvmem_get_mac_address(struct device *dev, void *addrbuf); -u32 eth_get_headlen(void *data, unsigned int max_len); +u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len); __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); extern const struct header_ops eth_header_ops; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 1e439549c419..0f9863dc4d44 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -119,13 +119,14 @@ EXPORT_SYMBOL(eth_header); /** * eth_get_headlen - determine the length of header for an ethernet frame + * @dev: pointer to network device * @data: pointer to start of frame * @len: total length of frame * * Make a best effort attempt to pull the length for all of the headers for * a given frame in a linear buffer. */ -u32 eth_get_headlen(void *data, unsigned int len) +u32 eth_get_headlen(const struct net_device *dev, void *data, unsigned int len) { const unsigned int flags = FLOW_DISSECTOR_F_PARSE_1ST_FRAG; const struct ethhdr *eth = (const struct ethhdr *)data; @@ -136,7 +137,7 @@ u32 eth_get_headlen(void *data, unsigned int len) return len; /* parse any remaining L2/L3 headers, check for L4 */ - if (!skb_flow_dissect_flow_keys_basic(NULL, NULL, &keys, data, + if (!skb_flow_dissect_flow_keys_basic(dev_net(dev), NULL, &keys, data, eth->h_proto, sizeof(*eth), len, flags)) return max_t(u32, keys.control.thoff, sizeof(*eth)); From c9cb2c1e11cee75b3af5699add3302a3997f78e4 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:49 -0700 Subject: [PATCH 6/9] selftests/bpf: add flow dissector bpf_skb_load_bytes helper test When flow dissector is called without skb, we want to make sure bpf_skb_load_bytes invocations return error. Add small test which tries to read single byte from a packet. bpf_skb_load_bytes should always fail under BPF_PROG_TEST_RUN because it was converted to the skb-less mode. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- .../prog_tests/flow_dissector_load_bytes.c | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c new file mode 100644 index 000000000000..dc5ef155ec28 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +void test_flow_dissector_load_bytes(void) +{ + struct bpf_flow_keys flow_keys; + __u32 duration = 0, retval, size; + struct bpf_insn prog[] = { + // BPF_REG_1 - 1st argument: context + // BPF_REG_2 - 2nd argument: offset, start at first byte + BPF_MOV64_IMM(BPF_REG_2, 0), + // BPF_REG_3 - 3rd argument: destination, reserve byte on stack + BPF_ALU64_REG(BPF_MOV, BPF_REG_3, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -1), + // BPF_REG_4 - 4th argument: copy one byte + BPF_MOV64_IMM(BPF_REG_4, 1), + // bpf_skb_load_bytes(ctx, sizeof(pkt_v4), ptr, 1) + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_load_bytes), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2), + // if (ret == 0) return BPF_DROP (2) + BPF_MOV64_IMM(BPF_REG_0, BPF_DROP), + BPF_EXIT_INSN(), + // if (ret != 0) return BPF_OK (0) + BPF_MOV64_IMM(BPF_REG_0, BPF_OK), + BPF_EXIT_INSN(), + }; + int fd, err; + + /* make sure bpf_skb_load_bytes is not allowed from skb-less context + */ + fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog, + ARRAY_SIZE(prog), "GPL", 0, NULL, 0); + CHECK(fd < 0, + "flow_dissector-bpf_skb_load_bytes-load", + "fd %d errno %d\n", + fd, errno); + + err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4), + &flow_keys, &size, &retval, &duration); + CHECK(size != sizeof(flow_keys) || err || retval != 1, + "flow_dissector-bpf_skb_load_bytes", + "err %d errno %d retval %d duration %d size %u/%zu\n", + err, errno, retval, duration, size, sizeof(flow_keys)); + + if (fd >= -1) + close(fd); +} From 0905beec9f52caf2c7065a8a88c08bc370850710 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:50 -0700 Subject: [PATCH 7/9] selftests/bpf: run flow dissector tests in skb-less mode Export last_dissection map from flow dissector and use a known place in tun driver to trigger BPF flow dissection. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- .../selftests/bpf/flow_dissector_load.c | 2 +- .../selftests/bpf/flow_dissector_load.h | 16 ++- .../selftests/bpf/prog_tests/flow_dissector.c | 102 +++++++++++++++++- tools/testing/selftests/bpf/progs/bpf_flow.c | 79 +++++++++----- 4 files changed, 165 insertions(+), 34 deletions(-) diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c index 7136ab9ffa73..3fd83b9dc1bf 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.c +++ b/tools/testing/selftests/bpf/flow_dissector_load.c @@ -26,7 +26,7 @@ static void load_and_attach_program(void) struct bpf_object *obj; ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name, - cfg_map_name, &prog_fd); + cfg_map_name, NULL, &prog_fd, NULL); if (ret) error(1, 0, "bpf_flow_load %s", cfg_path_name); diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index 41dd6959feb0..eeb48b6fc827 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -9,10 +9,12 @@ static inline int bpf_flow_load(struct bpf_object **obj, const char *path, const char *section_name, const char *map_name, - int *prog_fd) + const char *keys_map_name, + int *prog_fd, + int *keys_fd) { struct bpf_program *prog, *main_prog; - struct bpf_map *prog_array; + struct bpf_map *prog_array, *keys; int prog_array_fd; int ret, fd, i; @@ -37,6 +39,16 @@ static inline int bpf_flow_load(struct bpf_object **obj, if (prog_array_fd < 0) return ret; + if (keys_map_name && keys_fd) { + keys = bpf_object__find_map_by_name(*obj, keys_map_name); + if (!keys) + return -1; + + *keys_fd = bpf_map__fd(keys); + if (*keys_fd < 0) + return -1; + } + i = 0; bpf_object__for_each_program(prog, *obj) { fd = bpf_program__fd(prog); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 126319f9a97c..51758a0ca55e 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #define CHECK_FLOW_KEYS(desc, got, expected) \ CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \ @@ -140,13 +143,73 @@ struct test tests[] = { }, }; +static int create_tap(const char *ifname) +{ + struct ifreq ifr = { + .ifr_flags = IFF_TAP | IFF_NO_PI | IFF_NAPI | IFF_NAPI_FRAGS, + }; + int fd, ret; + + strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + fd = open("/dev/net/tun", O_RDWR); + if (fd < 0) + return -1; + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + return -1; + + return fd; +} + +static int tx_tap(int fd, void *pkt, size_t len) +{ + struct iovec iov[] = { + { + .iov_len = len, + .iov_base = pkt, + }, + }; + return writev(fd, iov, ARRAY_SIZE(iov)); +} + +static int ifup(const char *ifname) +{ + struct ifreq ifr = {}; + int sk, ret; + + strncpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name)); + + sk = socket(PF_INET, SOCK_DGRAM, 0); + if (sk < 0) + return -1; + + ret = ioctl(sk, SIOCGIFFLAGS, &ifr); + if (ret) { + close(sk); + return -1; + } + + ifr.ifr_flags |= IFF_UP; + ret = ioctl(sk, SIOCSIFFLAGS, &ifr); + if (ret) { + close(sk); + return -1; + } + + close(sk); + return 0; +} + void test_flow_dissector(void) { + int i, err, prog_fd, keys_fd = -1, tap_fd; struct bpf_object *obj; - int i, err, prog_fd; + __u32 duration = 0; err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector", - "jmp_table", &prog_fd); + "jmp_table", "last_dissection", &prog_fd, &keys_fd); if (err) { error_cnt++; return; @@ -171,5 +234,40 @@ void test_flow_dissector(void) CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); } + /* Do the same tests but for skb-less flow dissector. + * We use a known path in the net/tun driver that calls + * eth_get_headlen and we manually export bpf_flow_keys + * via BPF map in this case. + * + * Note, that since eth_get_headlen operates on a L2 level, + * we adjust exported nhoff/thoff by ETH_HLEN. + */ + + err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); + CHECK(err, "bpf_prog_attach", "err %d errno %d", err, errno); + + tap_fd = create_tap("tap0"); + CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d", tap_fd, errno); + err = ifup("tap0"); + CHECK(err, "ifup", "err %d errno %d", err, errno); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + struct bpf_flow_keys flow_keys = {}; + struct bpf_prog_test_run_attr tattr = {}; + __u32 key = 0; + + err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt)); + CHECK(err < 0, "tx_tap", "err %d errno %d", err, errno); + + err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); + CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); + + flow_keys.nhoff -= ETH_HLEN; + flow_keys.thoff -= ETH_HLEN; + + CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); + CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); + } + bpf_object__close(obj); } diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index 75b17cada539..81ad9a0b29d0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -64,6 +64,25 @@ struct bpf_map_def SEC("maps") jmp_table = { .max_entries = 8 }; +struct bpf_map_def SEC("maps") last_dissection = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(__u32), + .value_size = sizeof(struct bpf_flow_keys), + .max_entries = 1, +}; + +static __always_inline int export_flow_keys(struct bpf_flow_keys *keys, + int ret) +{ + struct bpf_flow_keys *val; + __u32 key = 0; + + val = bpf_map_lookup_elem(&last_dissection, &key); + if (val) + memcpy(val, keys, sizeof(*val)); + return ret; +} + static __always_inline void *bpf_flow_dissect_get_header(struct __sk_buff *skb, __u16 hdr_size, void *buffer) @@ -109,10 +128,10 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) break; default: /* Protocol not supported */ - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } SEC("flow_dissector") @@ -139,8 +158,8 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_ICMP: icmp = bpf_flow_dissect_get_header(skb, sizeof(*icmp), &_icmp); if (!icmp) - return BPF_DROP; - return BPF_OK; + return export_flow_keys(keys, BPF_DROP); + return export_flow_keys(keys, BPF_OK); case IPPROTO_IPIP: keys->is_encap = true; return parse_eth_proto(skb, bpf_htons(ETH_P_IP)); @@ -150,11 +169,11 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_GRE: gre = bpf_flow_dissect_get_header(skb, sizeof(*gre), &_gre); if (!gre) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (bpf_htons(gre->flags & GRE_VERSION)) /* Only inspect standard GRE packets with version 0 */ - return BPF_OK; + return export_flow_keys(keys, BPF_OK); keys->thoff += sizeof(*gre); /* Step over GRE Flags and Proto */ if (GRE_IS_CSUM(gre->flags)) @@ -170,7 +189,7 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) eth = bpf_flow_dissect_get_header(skb, sizeof(*eth), &_eth); if (!eth) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->thoff += sizeof(*eth); @@ -181,31 +200,31 @@ static __always_inline int parse_ip_proto(struct __sk_buff *skb, __u8 proto) case IPPROTO_TCP: tcp = bpf_flow_dissect_get_header(skb, sizeof(*tcp), &_tcp); if (!tcp) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (tcp->doff < 5) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if ((__u8 *)tcp + (tcp->doff << 2) > data_end) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->sport = tcp->source; keys->dport = tcp->dest; - return BPF_OK; + return export_flow_keys(keys, BPF_OK); case IPPROTO_UDP: case IPPROTO_UDPLITE: udp = bpf_flow_dissect_get_header(skb, sizeof(*udp), &_udp); if (!udp) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->sport = udp->source; keys->dport = udp->dest; - return BPF_OK; + return export_flow_keys(keys, BPF_OK); default: - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) @@ -225,7 +244,7 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) return parse_ip_proto(skb, nexthdr); } - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); } PROG(IP)(struct __sk_buff *skb) @@ -238,11 +257,11 @@ PROG(IP)(struct __sk_buff *skb) iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph); if (!iph) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); /* IP header cannot be smaller than 20 bytes */ if (iph->ihl < 5) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->addr_proto = ETH_P_IP; keys->ipv4_src = iph->saddr; @@ -250,7 +269,7 @@ PROG(IP)(struct __sk_buff *skb) keys->thoff += iph->ihl << 2; if (data + keys->thoff > data_end) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET)) { keys->is_frag = true; @@ -264,7 +283,7 @@ PROG(IP)(struct __sk_buff *skb) } if (done) - return BPF_OK; + return export_flow_keys(keys, BPF_OK); return parse_ip_proto(skb, iph->protocol); } @@ -276,7 +295,7 @@ PROG(IPV6)(struct __sk_buff *skb) ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); if (!ip6h) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->addr_proto = ETH_P_IPV6; memcpy(&keys->ipv6_src, &ip6h->saddr, 2*sizeof(ip6h->saddr)); @@ -288,11 +307,12 @@ PROG(IPV6)(struct __sk_buff *skb) PROG(IPV6OP)(struct __sk_buff *skb) { + struct bpf_flow_keys *keys = skb->flow_keys; struct ipv6_opt_hdr *ip6h, _ip6h; ip6h = bpf_flow_dissect_get_header(skb, sizeof(*ip6h), &_ip6h); if (!ip6h) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); /* hlen is in 8-octets and does not include the first 8 bytes * of the header @@ -309,7 +329,7 @@ PROG(IPV6FR)(struct __sk_buff *skb) fragh = bpf_flow_dissect_get_header(skb, sizeof(*fragh), &_fragh); if (!fragh) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->thoff += sizeof(*fragh); keys->is_frag = true; @@ -321,13 +341,14 @@ PROG(IPV6FR)(struct __sk_buff *skb) PROG(MPLS)(struct __sk_buff *skb) { + struct bpf_flow_keys *keys = skb->flow_keys; struct mpls_label *mpls, _mpls; mpls = bpf_flow_dissect_get_header(skb, sizeof(*mpls), &_mpls); if (!mpls) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); - return BPF_OK; + return export_flow_keys(keys, BPF_OK); } PROG(VLAN)(struct __sk_buff *skb) @@ -339,10 +360,10 @@ PROG(VLAN)(struct __sk_buff *skb) if (keys->n_proto == bpf_htons(ETH_P_8021AD)) { vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); if (vlan->h_vlan_encapsulated_proto != bpf_htons(ETH_P_8021Q)) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); @@ -350,14 +371,14 @@ PROG(VLAN)(struct __sk_buff *skb) vlan = bpf_flow_dissect_get_header(skb, sizeof(*vlan), &_vlan); if (!vlan) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->nhoff += sizeof(*vlan); keys->thoff += sizeof(*vlan); /* Only allow 8021AD + 8021Q double tagging and no triple tagging.*/ if (vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021AD) || vlan->h_vlan_encapsulated_proto == bpf_htons(ETH_P_8021Q)) - return BPF_DROP; + return export_flow_keys(keys, BPF_DROP); keys->n_proto = vlan->h_vlan_encapsulated_proto; return parse_eth_proto(skb, vlan->h_vlan_encapsulated_proto); From fe993c646831105f579976fec28d1842608bd551 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:51 -0700 Subject: [PATCH 8/9] selftests/bpf: properly return error from bpf_flow_load Right now we incorrectly return 'ret' which is always zero at that point. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/flow_dissector_load.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index eeb48b6fc827..daeaeb518894 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -25,19 +25,19 @@ static inline int bpf_flow_load(struct bpf_object **obj, main_prog = bpf_object__find_program_by_title(*obj, section_name); if (!main_prog) - return ret; + return -1; *prog_fd = bpf_program__fd(main_prog); if (*prog_fd < 0) - return ret; + return -1; prog_array = bpf_object__find_map_by_name(*obj, map_name); if (!prog_array) - return ret; + return -1; prog_array_fd = bpf_map__fd(prog_array); if (prog_array_fd < 0) - return ret; + return -1; if (keys_map_name && keys_fd) { keys = bpf_object__find_map_by_name(*obj, keys_map_name); From 02ee0658362d3713421851bb7487af77a4098bb5 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 22 Apr 2019 08:55:52 -0700 Subject: [PATCH 9/9] bpf/flow_dissector: don't adjust nhoff by ETH_HLEN in BPF_PROG_TEST_RUN Now that we use skb-less flow dissector let's return true nhoff and thoff. We used to adjust them by ETH_HLEN because that's how it was done in the skb case. For VLAN tests that looks confusing: nhoff is pointing to vlan parts :-\ Warning, this is an API change for BPF_PROG_TEST_RUN! Feel free to drop if you think that it's too late at this point to fix it. Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann --- net/bpf/test_run.c | 3 --- .../selftests/bpf/prog_tests/flow_dissector.c | 23 ++++++++----------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index db2ec88ab129..8606e5aef0b6 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -418,9 +418,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, size); - flow_keys.nhoff -= ETH_HLEN; - flow_keys.thoff -= ETH_HLEN; - if (signal_pending(current)) { preempt_enable(); rcu_read_unlock(); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 51758a0ca55e..8b54adfd6264 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -82,8 +82,8 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = 0, - .thoff = sizeof(struct iphdr), + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), @@ -98,8 +98,8 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = 0, - .thoff = sizeof(struct ipv6hdr), + .nhoff = ETH_HLEN, + .thoff = ETH_HLEN + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), @@ -116,8 +116,8 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = VLAN_HLEN, - .thoff = VLAN_HLEN + sizeof(struct iphdr), + .nhoff = ETH_HLEN + VLAN_HLEN, + .thoff = ETH_HLEN + VLAN_HLEN + sizeof(struct iphdr), .addr_proto = ETH_P_IP, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IP), @@ -134,8 +134,9 @@ struct test tests[] = { .tcp.doff = 5, }, .keys = { - .nhoff = VLAN_HLEN * 2, - .thoff = VLAN_HLEN * 2 + sizeof(struct ipv6hdr), + .nhoff = ETH_HLEN + VLAN_HLEN * 2, + .thoff = ETH_HLEN + VLAN_HLEN * 2 + + sizeof(struct ipv6hdr), .addr_proto = ETH_P_IPV6, .ip_proto = IPPROTO_TCP, .n_proto = __bpf_constant_htons(ETH_P_IPV6), @@ -238,9 +239,6 @@ void test_flow_dissector(void) * We use a known path in the net/tun driver that calls * eth_get_headlen and we manually export bpf_flow_keys * via BPF map in this case. - * - * Note, that since eth_get_headlen operates on a L2 level, - * we adjust exported nhoff/thoff by ETH_HLEN. */ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0); @@ -262,9 +260,6 @@ void test_flow_dissector(void) err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys); CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err); - flow_keys.nhoff -= ETH_HLEN; - flow_keys.thoff -= ETH_HLEN; - CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err); CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys); }