diff --git a/Documentation/bpf/prog_flow_dissector.rst b/Documentation/bpf/prog_flow_dissector.rst index a78bf036cadd..4d86780ab0f1 100644 --- a/Documentation/bpf/prog_flow_dissector.rst +++ b/Documentation/bpf/prog_flow_dissector.rst @@ -142,3 +142,6 @@ BPF flow dissector doesn't support exporting all the metadata that in-kernel C-based implementation can export. Notable example is single VLAN (802.1Q) and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys`` for a set of information that's currently can be exported from the BPF context. + +When BPF flow dissector is attached to the root network namespace (machine-wide +policy), users can't override it in their child network namespaces. diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 7c09d87d3269..6b4b88d1599d 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -114,19 +114,46 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr, { struct bpf_prog *attached; struct net *net; + int ret = 0; net = current->nsproxy->net_ns; mutex_lock(&flow_dissector_mutex); + + if (net == &init_net) { + /* BPF flow dissector in the root namespace overrides + * any per-net-namespace one. When attaching to root, + * make sure we don't have any BPF program attached + * to the non-root namespaces. + */ + struct net *ns; + + for_each_net(ns) { + if (rcu_access_pointer(ns->flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + } else { + /* Make sure root flow dissector is not attached + * when attaching to the non-root namespace. + */ + if (rcu_access_pointer(init_net.flow_dissector_prog)) { + ret = -EEXIST; + goto out; + } + } + attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&flow_dissector_mutex)); if (attached) { /* Only one BPF program can be attached at a time */ - mutex_unlock(&flow_dissector_mutex); - return -EEXIST; + ret = -EEXIST; + goto out; } rcu_assign_pointer(net->flow_dissector_prog, prog); +out: mutex_unlock(&flow_dissector_mutex); - return 0; + return ret; } int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) @@ -910,7 +937,10 @@ bool __skb_flow_dissect(const struct net *net, WARN_ON_ONCE(!net); if (net) { rcu_read_lock(); - attached = rcu_dereference(net->flow_dissector_prog); + attached = rcu_dereference(init_net.flow_dissector_prog); + + if (!attached) + attached = rcu_dereference(net->flow_dissector_prog); if (attached) { struct bpf_flow_keys flow_keys; diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh index d23d4da66b83..2c3a25d64faf 100755 --- a/tools/testing/selftests/bpf/test_flow_dissector.sh +++ b/tools/testing/selftests/bpf/test_flow_dissector.sh @@ -18,19 +18,55 @@ fi # this is the case and run it with in_netns.sh if it is being run in the root # namespace. if [[ -z $(ip netns identify $$) ]]; then - ../net/in_netns.sh "$0" "$@" - exit $? -fi + err=0 + if bpftool="$(which bpftool)"; then + echo "Testing global flow dissector..." -# Determine selftest success via shell exit code -exit_handler() -{ - if (( $? == 0 )); then + $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \ + type flow_dissector + + if ! unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected unsuccessful attach in namespace" >&2 + err=1 + fi + + $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \ + flow_dissector + + if unshare --net $bpftool prog attach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Unexpected successful attach in namespace" >&2 + err=1 + fi + + if ! $bpftool prog detach pinned \ + /sys/fs/bpf/flow/flow_dissector flow_dissector; then + echo "Failed to detach flow dissector" >&2 + err=1 + fi + + rm -rf /sys/fs/bpf/flow + else + echo "Skipping root flow dissector test, bpftool not found" >&2 + fi + + # Run the rest of the tests in a net namespace. + ../net/in_netns.sh "$0" "$@" + err=$(( $err + $? )) + + if (( $err == 0 )); then echo "selftests: $TESTNAME [PASS]"; else echo "selftests: $TESTNAME [FAILED]"; fi + exit $err +fi + +# Determine selftest success via shell exit code +exit_handler() +{ set +e # Cleanup