Merge tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net

Pull networking updates from Jakub Kicinski:
 "Including fixes from netfilter and bpf.

  Current release - regressions:

   - eth: stmmac: fix failure to probe without MAC interface specified

  Current release - new code bugs:

   - docs: netlink: fix missing classic_netlink doc reference

  Previous releases - regressions:

   - deal with integer overflows in kmalloc_reserve()

   - use sk_forward_alloc_get() in sk_get_meminfo()

   - bpf_sk_storage: fix the missing uncharge in sk_omem_alloc

   - fib: avoid warn splat in flow dissector after packet mangling

   - skb_segment: call zero copy functions before using skbuff frags

   - eth: sfc: check for zero length in EF10 RX prefix

  Previous releases - always broken:

   - af_unix: fix msg_controllen test in scm_pidfd_recv() for
     MSG_CMSG_COMPAT

   - xsk: fix xsk_build_skb() dereferencing possible ERR_PTR()

   - netfilter:
      - nft_exthdr: fix non-linear header modification
      - xt_u32, xt_sctp: validate user space input
      - nftables: exthdr: fix 4-byte stack OOB write
      - nfnetlink_osf: avoid OOB read
      - one more fix for the garbage collection work from last release

   - igmp: limit igmpv3_newpack() packet size to IP_MAX_MTU

   - bpf, sockmap: fix preempt_rt splat when using raw_spin_lock_t

   - handshake: fix null-deref in handshake_nl_done_doit()

   - ip: ignore dst hint for multipath routes to ensure packets are
     hashed across the nexthops

   - phy: micrel:
      - correct bit assignments for cable test errata
      - disable EEE according to the KSZ9477 errata

  Misc:

   - docs/bpf: document compile-once-run-everywhere (CO-RE) relocations

   - Revert "net: macsec: preserve ingress frame ordering", it appears
     to have been developed against an older kernel, problem doesn't
     exist upstream"

* tag 'net-6.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (95 commits)
  net: enetc: distinguish error from valid pointers in enetc_fixup_clear_rss_rfs()
  Revert "net: team: do not use dynamic lockdep key"
  net: hns3: remove GSO partial feature bit
  net: hns3: fix the port information display when sfp is absent
  net: hns3: fix invalid mutex between tc qdisc and dcb ets command issue
  net: hns3: fix debugfs concurrency issue between kfree buffer and read
  net: hns3: fix byte order conversion issue in hclge_dbg_fd_tcam_read()
  net: hns3: Support query tx timeout threshold by debugfs
  net: hns3: fix tx timeout issue
  net: phy: Provide Module 4 KSZ9477 errata (DS80000754C)
  netfilter: nf_tables: Unbreak audit log reset
  netfilter: ipset: add the missing IP_SET_HASH_WITH_NET0 macro for ip_set_hash_netportnet.c
  netfilter: nft_set_rbtree: skip sync GC for new elements in this transaction
  netfilter: nf_tables: uapi: Describe NFTA_RULE_CHAIN_ID
  netfilter: nfnetlink_osf: avoid OOB read
  netfilter: nftables: exthdr: fix 4-byte stack OOB write
  selftests/bpf: Check bpf_sk_storage has uncharged sk_omem_alloc
  bpf: bpf_sk_storage: Fix the missing uncharge in sk_omem_alloc
  bpf: bpf_sk_storage: Fix invalid wait context lockdep report
  s390/bpf: Pass through tail call counter in trampolines
  ...
This commit is contained in:
Linus Torvalds
2023-09-07 18:33:07 -07:00
118 changed files with 1405 additions and 410 deletions

View File

@@ -50,14 +50,17 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_cgroup_storage \
test_tcpnotify_user test_sysctl \
test_progs-no_alu32
TEST_INST_SUBDIRS := no_alu32
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
TEST_GEN_PROGS += test_progs-bpf_gcc
TEST_INST_SUBDIRS += bpf_gcc
endif
ifneq ($(CLANG_CPUV4),)
TEST_GEN_PROGS += test_progs-cpuv4
TEST_INST_SUBDIRS += cpuv4
endif
TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
@@ -714,3 +717,12 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
# Delete partially updated (corrupted) files on error
.DELETE_ON_ERROR:
DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
@for DIR in $(TEST_INST_SUBDIRS); do \
mkdir -p $(INSTALL_PATH)/$$DIR; \
rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\
done
endef

View File

@@ -8,6 +8,7 @@
#include <linux/unistd.h>
#include <linux/mount.h>
#include <sys/syscall.h>
#include "bpf/libbpf_internal.h"
static inline int sys_fsopen(const char *fsname, unsigned flags)
{
@@ -155,7 +156,7 @@ static void validate_pin(int map_fd, const char *map_name, int src_value,
ASSERT_OK(err, "obj_pin");
/* cleanup */
if (pin_opts.path_fd >= 0)
if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
close(pin_opts.path_fd);
if (old_cwd[0])
ASSERT_OK(chdir(old_cwd), "restore_cwd");
@@ -220,7 +221,7 @@ static void validate_get(int map_fd, const char *map_name, int src_value,
goto cleanup;
/* cleanup */
if (get_opts.path_fd >= 0)
if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
close(get_opts.path_fd);
if (old_cwd[0])
ASSERT_OK(chdir(old_cwd), "restore_cwd");

View File

@@ -12,6 +12,17 @@
#include "test_d_path_check_rdonly_mem.skel.h"
#include "test_d_path_check_types.skel.h"
/* sys_close_range is not around for long time, so let's
* make sure we can call it on systems with older glibc
*/
#ifndef __NR_close_range
#ifdef __alpha__
#define __NR_close_range 546
#else
#define __NR_close_range 436
#endif
#endif
static int duration;
static struct {
@@ -90,7 +101,11 @@ static int trigger_fstat_events(pid_t pid)
fstat(indicatorfd, &fileStat);
out_close:
/* triggers filp_close */
/* sys_close no longer triggers filp_close, but we can
* call sys_close_range instead which still does
*/
#define close(fd) syscall(__NR_close_range, fd, fd, 0)
close(pipefd[0]);
close(pipefd[1]);
close(sockfd);
@@ -98,6 +113,8 @@ out_close:
close(devfd);
close(localfd);
close(indicatorfd);
#undef close
return ret;
}

View File

@@ -0,0 +1,56 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Facebook */
#include <test_progs.h>
#include <bpf/libbpf.h>
#include <sys/types.h>
#include <sys/socket.h>
#include "sk_storage_omem_uncharge.skel.h"
void test_sk_storage_omem_uncharge(void)
{
struct sk_storage_omem_uncharge *skel;
int sk_fd = -1, map_fd, err, value;
socklen_t optlen;
skel = sk_storage_omem_uncharge__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
return;
map_fd = bpf_map__fd(skel->maps.sk_storage);
/* A standalone socket not binding to addr:port,
* so nentns is not needed.
*/
sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
if (!ASSERT_GE(sk_fd, 0, "socket"))
goto done;
optlen = sizeof(skel->bss->cookie);
err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen);
if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)"))
goto done;
value = 0;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)"))
goto done;
value = 0xdeadbeef;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)"))
goto done;
err = sk_storage_omem_uncharge__attach(skel);
if (!ASSERT_OK(err, "attach"))
goto done;
close(sk_fd);
sk_fd = -1;
ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found");
ASSERT_EQ(skel->bss->omem, 0, "omem");
done:
sk_storage_omem_uncharge__destroy(skel);
if (sk_fd != -1)
close(sk_fd);
}

View File

@@ -179,6 +179,32 @@
__ret; \
})
static inline int poll_connect(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
fd_set wfds;
int r, eval;
socklen_t esize = sizeof(eval);
FD_ZERO(&wfds);
FD_SET(fd, &wfds);
r = select(fd + 1, NULL, &wfds, NULL, &timeout);
if (r == 0)
errno = ETIME;
if (r != 1)
return -1;
if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
return -1;
if (eval != 0) {
errno = eval;
return -1;
}
return 0;
}
static inline int poll_read(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };

View File

@@ -1452,11 +1452,18 @@ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
if (p < 0)
goto close_cli;
if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
FAIL_ERRNO("poll_connect");
goto close_acc;
}
*v0 = p;
*v1 = c;
return 0;
close_acc:
close(p);
close_cli:
close(c);
close_srv:

View File

@@ -88,6 +88,7 @@
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_flags __sk_common.skc_flags
#define sk_reuse __sk_common.skc_reuse
#define sk_cookie __sk_common.skc_cookie
#define s6_addr32 in6_u.u6_addr32

View File

@@ -0,0 +1,61 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2023 Facebook */
#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
void *local_storage_ptr = NULL;
void *sk_ptr = NULL;
int cookie_found = 0;
__u64 cookie = 0;
__u32 omem = 0;
void *bpf_rdonly_cast(void *, __u32) __ksym;
struct {
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
__uint(map_flags, BPF_F_NO_PREALLOC);
__type(key, int);
__type(value, int);
} sk_storage SEC(".maps");
SEC("fexit/bpf_local_storage_destroy")
int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
{
struct sock *sk;
if (local_storage_ptr != local_storage)
return 0;
sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock));
if (sk->sk_cookie.counter != cookie)
return 0;
cookie_found++;
omem = sk->sk_omem_alloc.counter;
local_storage_ptr = NULL;
return 0;
}
SEC("fentry/inet6_sock_destruct")
int BPF_PROG(inet6_sock_destruct, struct sock *sk)
{
int *value;
if (!cookie || sk->sk_cookie.counter != cookie)
return 0;
value = bpf_sk_storage_get(&sk_storage, sk, 0, 0);
if (value && *value == 0xdeadbeef) {
cookie_found++;
sk_ptr = sk;
local_storage_ptr = sk->sk_bpf_storage;
}
return 0;
}
char _license[] SEC("license") = "GPL";

View File

@@ -12,7 +12,8 @@ ksft_skip=4
TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test"
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
ipv4_mpath_list ipv6_mpath_list"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -2352,6 +2353,156 @@ ipv4_bcast_neigh_test()
cleanup
}
mpath_dep_check()
{
if [ ! -x "$(command -v mausezahn)" ]; then
echo "mausezahn command not found. Skipping test"
return 1
fi
if [ ! -x "$(command -v jq)" ]; then
echo "jq command not found. Skipping test"
return 1
fi
if [ ! -x "$(command -v bc)" ]; then
echo "bc command not found. Skipping test"
return 1
fi
if [ ! -x "$(command -v perf)" ]; then
echo "perf command not found. Skipping test"
return 1
fi
perf list fib:* | grep -q fib_table_lookup
if [ $? -ne 0 ]; then
echo "IPv4 FIB tracepoint not found. Skipping test"
return 1
fi
perf list fib6:* | grep -q fib6_table_lookup
if [ $? -ne 0 ]; then
echo "IPv6 FIB tracepoint not found. Skipping test"
return 1
fi
return 0
}
link_stats_get()
{
local ns=$1; shift
local dev=$1; shift
local dir=$1; shift
local stat=$1; shift
ip -n $ns -j -s link show dev $dev \
| jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
}
list_rcv_eval()
{
local file=$1; shift
local expected=$1; shift
local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
local ratio=$(echo "scale=2; $count / $expected" | bc -l)
local res=$(echo "$ratio >= 0.95" | bc)
[[ $res -eq 1 ]]
log_test $? 0 "Multipath route hit ratio ($ratio)"
}
ipv4_mpath_list_test()
{
echo
echo "IPv4 multipath list receive tests"
mpath_dep_check || return 1
route_setup
set -e
run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
run_cmd "ip -n ns2 link add name nh1 up type dummy"
run_cmd "ip -n ns2 link add name nh2 up type dummy"
run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1"
run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2"
run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
run_cmd "ip -n ns2 route add 203.0.113.0/24
nexthop via 172.16.201.2 nexthop via 172.16.202.2"
run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
set +e
local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
local tmp_file=$(mktemp)
local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac
-A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
# Packets forwarded in a list using a multipath route must not reuse a
# cached result so that a flow always hits the same nexthop. In other
# words, the FIB lookup tracepoint needs to be triggered for every
# packet.
local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
list_rcv_eval $tmp_file $diff
rm $tmp_file
route_cleanup
}
ipv6_mpath_list_test()
{
echo
echo "IPv6 multipath list receive tests"
mpath_dep_check || return 1
route_setup
set -e
run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
run_cmd "ip -n ns2 link add name nh1 up type dummy"
run_cmd "ip -n ns2 link add name nh2 up type dummy"
run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1"
run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2"
run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64
nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
set +e
local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
local tmp_file=$(mktemp)
local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac
-A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
# Packets forwarded in a list using a multipath route must not reuse a
# cached result so that a flow always hits the same nexthop. In other
# words, the FIB lookup tracepoint needs to be triggered for every
# packet.
local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
list_rcv_eval $tmp_file $diff
rm $tmp_file
route_cleanup
}
################################################################################
# usage
@@ -2433,6 +2584,8 @@ do
ipv6_mangle) ipv6_mangle_test;;
ipv4_bcast_neigh) ipv4_bcast_neigh_test;;
fib6_gc_test|ipv6_gc) fib6_gc_test;;
ipv4_mpath_list) ipv4_mpath_list_test;;
ipv6_mpath_list) ipv6_mpath_list_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac