From f532957d76de89b8ee2545cb6ad5265e1701d0ba Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 28 Feb 2024 15:22:49 -0800 Subject: [PATCH 1/5] netdevsim: allow two netdevsim ports to be connected Add two netdevsim bus attribute to sysfs: /sys/bus/netdevsim/link_device /sys/bus/netdevsim/unlink_device Writing "A M B N" to link_device will link netdevsim M in netnsid A with netdevsim N in netnsid B. Writing "A M" to unlink_device will unlink netdevsim M in netnsid A from its peer, if any. rtnl_lock is taken to ensure nothing changes during the linking. Signed-off-by: David Wei Reviewed-by: Maciek Machnikowski Signed-off-by: David S. Miller --- drivers/net/netdevsim/bus.c | 145 ++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdev.c | 10 +++ drivers/net/netdevsim/netdevsim.h | 2 + 3 files changed, 157 insertions(+) diff --git a/drivers/net/netdevsim/bus.c b/drivers/net/netdevsim/bus.c index 0c5aff63d242..64c0cdd31bf8 100644 --- a/drivers/net/netdevsim/bus.c +++ b/drivers/net/netdevsim/bus.c @@ -232,9 +232,154 @@ del_device_store(const struct bus_type *bus, const char *buf, size_t count) } static BUS_ATTR_WO(del_device); +static ssize_t link_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct netdevsim *nsim_a, *nsim_b, *peer; + struct net_device *dev_a, *dev_b; + unsigned int ifidx_a, ifidx_b; + int netnsfd_a, netnsfd_b, err; + struct net *ns_a, *ns_b; + + err = sscanf(buf, "%d:%u %d:%u", &netnsfd_a, &ifidx_a, &netnsfd_b, + &ifidx_b); + if (err != 4) { + pr_err("Format for linking two devices is \"netnsfd_a:ifidx_a netnsfd_b:ifidx_b\" (int uint int uint).\n"); + return -EINVAL; + } + + ns_a = get_net_ns_by_fd(netnsfd_a); + if (IS_ERR(ns_a)) { + pr_err("Could not find netns with fd: %d\n", netnsfd_a); + return -EINVAL; + } + + ns_b = get_net_ns_by_fd(netnsfd_b); + if (IS_ERR(ns_b)) { + pr_err("Could not find netns with fd: %d\n", netnsfd_b); + put_net(ns_a); + return -EINVAL; + } + + err = -EINVAL; + rtnl_lock(); + dev_a = __dev_get_by_index(ns_a, ifidx_a); + if (!dev_a) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx_a, netnsfd_a); + goto out_err; + } + + if (!netdev_is_nsim(dev_a)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx_a, netnsfd_a); + goto out_err; + } + + dev_b = __dev_get_by_index(ns_b, ifidx_b); + if (!dev_b) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx_b, netnsfd_b); + goto out_err; + } + + if (!netdev_is_nsim(dev_b)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx_b, netnsfd_b); + goto out_err; + } + + if (dev_a == dev_b) { + pr_err("Cannot link a netdevsim to itself\n"); + goto out_err; + } + + err = -EBUSY; + nsim_a = netdev_priv(dev_a); + peer = rtnl_dereference(nsim_a->peer); + if (peer) { + pr_err("Netdevsim %d:%u is already linked\n", netnsfd_a, + ifidx_a); + goto out_err; + } + + nsim_b = netdev_priv(dev_b); + peer = rtnl_dereference(nsim_b->peer); + if (peer) { + pr_err("Netdevsim %d:%u is already linked\n", netnsfd_b, + ifidx_b); + goto out_err; + } + + err = 0; + rcu_assign_pointer(nsim_a->peer, nsim_b); + rcu_assign_pointer(nsim_b->peer, nsim_a); + +out_err: + put_net(ns_b); + put_net(ns_a); + rtnl_unlock(); + + return !err ? count : err; +} +static BUS_ATTR_WO(link_device); + +static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count) +{ + struct netdevsim *nsim, *peer; + struct net_device *dev; + unsigned int ifidx; + int netnsfd, err; + struct net *ns; + + err = sscanf(buf, "%u:%u", &netnsfd, &ifidx); + if (err != 2) { + pr_err("Format for unlinking a device is \"netnsfd:ifidx\" (int uint).\n"); + return -EINVAL; + } + + ns = get_net_ns_by_fd(netnsfd); + if (IS_ERR(ns)) { + pr_err("Could not find netns with fd: %d\n", netnsfd); + return -EINVAL; + } + + err = -EINVAL; + rtnl_lock(); + dev = __dev_get_by_index(ns, ifidx); + if (!dev) { + pr_err("Could not find device with ifindex %u in netnsfd %d\n", + ifidx, netnsfd); + goto out_put_netns; + } + + if (!netdev_is_nsim(dev)) { + pr_err("Device with ifindex %u in netnsfd %d is not a netdevsim\n", + ifidx, netnsfd); + goto out_put_netns; + } + + nsim = netdev_priv(dev); + peer = rtnl_dereference(nsim->peer); + if (!peer) + goto out_put_netns; + + err = 0; + RCU_INIT_POINTER(nsim->peer, NULL); + RCU_INIT_POINTER(peer->peer, NULL); + +out_put_netns: + put_net(ns); + rtnl_unlock(); + + return !err ? count : err; +} +static BUS_ATTR_WO(unlink_device); + static struct attribute *nsim_bus_attrs[] = { &bus_attr_new_device.attr, &bus_attr_del_device.attr, + &bus_attr_link_device.attr, + &bus_attr_unlink_device.attr, NULL }; ATTRIBUTE_GROUPS(nsim_bus); diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 77e8250282a5..9063f4f2971b 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -413,8 +413,13 @@ err_free_netdev: void nsim_destroy(struct netdevsim *ns) { struct net_device *dev = ns->netdev; + struct netdevsim *peer; rtnl_lock(); + peer = rtnl_dereference(ns->peer); + if (peer) + RCU_INIT_POINTER(peer->peer, NULL); + RCU_INIT_POINTER(ns->peer, NULL); unregister_netdevice(dev); if (nsim_dev_port_is_pf(ns->nsim_dev_port)) { nsim_macsec_teardown(ns); @@ -427,6 +432,11 @@ void nsim_destroy(struct netdevsim *ns) free_netdev(dev); } +bool netdev_is_nsim(struct net_device *dev) +{ + return dev->netdev_ops == &nsim_netdev_ops; +} + static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 028c825b86db..c8b45b0d955e 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -125,11 +125,13 @@ struct netdevsim { } udp_ports; struct nsim_ethtool ethtool; + struct netdevsim __rcu *peer; }; struct netdevsim * nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port); void nsim_destroy(struct netdevsim *ns); +bool netdev_is_nsim(struct net_device *dev); void nsim_ethtool_init(struct netdevsim *ns); From 9eb95228a74163e26d338913a5d61e571ea45307 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 28 Feb 2024 15:22:50 -0800 Subject: [PATCH 2/5] netdevsim: forward skbs from one connected port to another Forward skbs sent from one netdevsim port to its connected netdevsim port using dev_forward_skb, in a spirit similar to veth. Add a tx_dropped variable to struct netdevsim, tracking the number of skbs that could not be forwarded using dev_forward_skb(). The xmit() function accessing the peer ptr is protected by an RCU read critical section. The rcu_read_lock() is functionally redundant as since v5.0 all softirqs are implicitly RCU read critical sections; but it is useful for human readers. If another CPU is concurrently in nsim_destroy(), then it will first set the peer ptr to NULL. This does not affect any existing readers that dereferenced a non-NULL peer. Then, in unregister_netdevice(), there is a synchronize_rcu() before the netdev is actually unregistered and freed. This ensures that any readers i.e. xmit() that got a non-NULL peer will complete before the netdev is freed. Any readers after the RCU_INIT_POINTER() but before synchronize_rcu() will dereference NULL, making it safe. The codepath to nsim_destroy() and nsim_create() takes both the newly added nsim_dev_list_lock and rtnl_lock. This makes it safe with concurrent calls to linking two netdevsims together. Signed-off-by: David Wei Reviewed-by: Maciek Machnikowski Signed-off-by: David S. Miller --- drivers/net/netdevsim/netdev.c | 27 ++++++++++++++++++++++----- drivers/net/netdevsim/netdevsim.h | 1 + 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 9063f4f2971b..c3f3fda5fdc0 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -29,18 +29,35 @@ static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); + unsigned int len = skb->len; + struct netdevsim *peer_ns; + rcu_read_lock(); if (!nsim_ipsec_tx(ns, skb)) - goto out; + goto out_drop_free; + peer_ns = rcu_dereference(ns->peer); + if (!peer_ns) + goto out_drop_free; + + skb_tx_timestamp(skb); + if (unlikely(dev_forward_skb(peer_ns->netdev, skb) == NET_RX_DROP)) + goto out_drop_cnt; + + rcu_read_unlock(); u64_stats_update_begin(&ns->syncp); ns->tx_packets++; - ns->tx_bytes += skb->len; + ns->tx_bytes += len; u64_stats_update_end(&ns->syncp); + return NETDEV_TX_OK; -out: +out_drop_free: dev_kfree_skb(skb); - +out_drop_cnt: + rcu_read_unlock(); + u64_stats_update_begin(&ns->syncp); + ns->tx_dropped++; + u64_stats_update_end(&ns->syncp); return NETDEV_TX_OK; } @@ -70,6 +87,7 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) start = u64_stats_fetch_begin(&ns->syncp); stats->tx_bytes = ns->tx_bytes; stats->tx_packets = ns->tx_packets; + stats->tx_dropped = ns->tx_dropped; } while (u64_stats_fetch_retry(&ns->syncp, start)); } @@ -302,7 +320,6 @@ static void nsim_setup(struct net_device *dev) eth_hw_addr_random(dev); dev->tx_queue_len = 0; - dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE; diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index c8b45b0d955e..553c4b9b4f63 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -98,6 +98,7 @@ struct netdevsim { u64 tx_packets; u64 tx_bytes; + u64 tx_dropped; struct u64_stats_sync syncp; struct nsim_bus_dev *nsim_bus_dev; From 8debcf5832c3e8a6baaea27c75ad8a6ba5077beb Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 28 Feb 2024 15:22:51 -0800 Subject: [PATCH 3/5] netdevsim: add ndo_get_iflink() implementation Add an implementation for ndo_get_iflink() in netdevsim that shows the ifindex of the linked peer, if any. Signed-off-by: David Wei Reviewed-by: Maciek Machnikowski Signed-off-by: David S. Miller --- drivers/net/netdevsim/netdev.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index c3f3fda5fdc0..8330bc0bcb7e 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -283,6 +283,21 @@ nsim_set_features(struct net_device *dev, netdev_features_t features) return 0; } +static int nsim_get_iflink(const struct net_device *dev) +{ + struct netdevsim *nsim, *peer; + int iflink; + + nsim = netdev_priv(dev); + + rcu_read_lock(); + peer = rcu_dereference(nsim->peer); + iflink = peer ? READ_ONCE(peer->netdev->ifindex) : 0; + rcu_read_unlock(); + + return iflink; +} + static const struct net_device_ops nsim_netdev_ops = { .ndo_start_xmit = nsim_start_xmit, .ndo_set_rx_mode = nsim_set_rx_mode, @@ -300,6 +315,7 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en, .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, + .ndo_get_iflink = nsim_get_iflink, .ndo_bpf = nsim_bpf, }; From dfb429ea4f2db6a946ada94cce8955e87906e6b5 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 28 Feb 2024 15:22:52 -0800 Subject: [PATCH 4/5] netdevsim: add selftest for forwarding skb between connected ports Connect two netdevsim ports in different namespaces together, then send packets between them using socat. Signed-off-by: David Wei Reviewed-by: Maciek Machnikowski Signed-off-by: David S. Miller --- .../selftests/drivers/net/netdevsim/Makefile | 1 + .../selftests/drivers/net/netdevsim/peer.sh | 143 ++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netdevsim/peer.sh diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 7a29a05bea8b..5bace0b7fb57 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -10,6 +10,7 @@ TEST_PROGS = devlink.sh \ fib.sh \ hw_stats_l3.sh \ nexthop.sh \ + peer.sh \ psample.sh \ tc-mq-visibility.sh \ udp_tunnel_nic.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh new file mode 100755 index 000000000000..aed62d9e6c0a --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ../../../net/net_helper.sh + +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +socat_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + return 0 +} + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_1_SYS/net -exec basename {} \;) + NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_DEV_2_SYS/net -exec basename {} \;) + + ip link set $NSIM_DEV_1_NAME netns nssv + ip link set $NSIM_DEV_2_NAME netns nscl + + ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME + ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME + + ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up + ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +socat_check || exit 4 + +modprobe netdevsim + +# linking + +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_DEV_1_FD=$((256 + RANDOM % 256)) +exec {NSIM_DEV_1_FD} $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netdevsim should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with non-existent netnsid should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "linking with self should fail" + cleanup_ns + exit 1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +# argument error checking + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null +if [ $? -eq 0 ]; then + echo "invalid arg should fail" + cleanup_ns + exit 1 +fi + +# send/recv packets + +tmp_file=$(mktemp) +ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file & +pid=$! +res=0 + +wait_local_port_listen nssv 1234 tcp + +echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234 + +count=$(cat $tmp_file | wc -c) +if [[ $count -ne 3 ]]; then + echo "expected 3 bytes, got $count" + res=1 +fi + +echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL + +kill $pid +echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit $res From 8ee60f9c41fb01440e8a8f97127869c9b1978362 Mon Sep 17 00:00:00 2001 From: David Wei Date: Wed, 28 Feb 2024 15:22:53 -0800 Subject: [PATCH 5/5] netdevsim: fix rtnetlink.sh selftest I cleared IFF_NOARP flag from netdevsim dev->flags in order to support skb forwarding. This breaks the rtnetlink.sh selftest kci_test_ipsec_offload() test because ipsec does not connect to peers it cannot transmit to. Fix the issue by adding a neigh entry manually. ipsec_offload test now successfully pass. Signed-off-by: David Wei Signed-off-by: David S. Miller --- tools/testing/selftests/net/rtnetlink.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 874a2952aa8e..bdf6f10d0558 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -801,6 +801,8 @@ kci_test_ipsec_offload() end_test "FAIL: ipsec_offload SA offload missing from list output" fi + # we didn't create a peer, make sure we can Tx + ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55 # use ping to exercise the Tx path ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null