From ca059af85283ba33d816b833a2654cb9a4b697de Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 28 Mar 2019 12:12:19 +0000 Subject: [PATCH 1/9] selftests: forwarding: Add reverse path forwarding (RPF) test cases In case a packet is routed using a multicast route whose specified ingress interface does not match the interface from which the packet was received, the packet is dropped. Add IPv4 and IPv6 test cases for above mentioned scenario. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/forwarding/router_multicast.sh | 107 +++++++++++++++++- 1 file changed, 106 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 109e6d785169..57e90c873a2c 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -28,7 +28,7 @@ # +------------------+ +------------------+ # -ALL_TESTS="mcast_v4 mcast_v6" +ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6" NUM_NETIFS=6 source lib.sh source tc_common.sh @@ -46,10 +46,14 @@ h1_create() ip route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::1 ip route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::1 + + tc qdisc add dev $h1 ingress } h1_destroy() { + tc qdisc del dev $h1 ingress + ip route del 2001:db8:3::/64 vrf v$h1 ip route del 2001:db8:2::/64 vrf v$h1 @@ -124,10 +128,14 @@ router_create() ip address add 2001:db8:1::1/64 dev $rp1 ip address add 2001:db8:2::1/64 dev $rp2 ip address add 2001:db8:3::1/64 dev $rp3 + + tc qdisc add dev $rp3 ingress } router_destroy() { + tc qdisc del dev $rp3 ingress + ip address del 2001:db8:3::1/64 dev $rp3 ip address del 2001:db8:2::1/64 dev $rp2 ip address del 2001:db8:1::1/64 dev $rp1 @@ -301,6 +309,103 @@ mcast_v6() log_test "mcast IPv6" } +rpf_v4() +{ + # Add a multicast route from first router port to the other two. Send + # matching packets and test that both hosts receive them. Then, send + # the same packets via the third router port and test that they do not + # reach any host due to RPF check. A filter with 'skip_hw' is added to + # test that devices capable of multicast routing offload trap those + # packets. The filter is essentialy a NOP in other scenarios. + + RET=0 + + tc filter add dev $h1 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h2 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h3 ingress protocol ip pref 1 handle 1 flower \ + dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop + tc filter add dev $rp3 ingress protocol ip pref 1 handle 1 flower \ + skip_hw dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action pass + + create_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + $MZ $h1 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 1 5 + check_err $? "Multicast not received on second host" + + $MZ $h3 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \ + -A 198.51.100.2 -B 225.1.2.3 -q + + tc_check_packets "dev $h1 ingress" 1 0 + check_err $? "Multicast received on first host when should not" + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast received on second host when should not" + tc_check_packets "dev $rp3 ingress" 1 5 + check_err $? "Packets not trapped due to RPF check" + + delete_mcast_sg $rp1 198.51.100.2 225.1.2.3 $rp2 $rp3 + + tc filter del dev $rp3 ingress protocol ip pref 1 handle 1 flower + tc filter del dev $h3 ingress protocol ip pref 1 handle 1 flower + tc filter del dev $h2 ingress protocol ip pref 1 handle 1 flower + tc filter del dev $h1 ingress protocol ip pref 1 handle 1 flower + + log_test "RPF IPv4" +} + +rpf_v6() +{ + RET=0 + + tc filter add dev $h1 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 1 flower \ + dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop + tc filter add dev $rp3 ingress protocol ipv6 pref 1 handle 1 flower \ + skip_hw dst_ip ff0e::3 ip_proto udp dst_port 12345 action pass + + create_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + $MZ $h1 -6 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \ + -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast not received on first host" + tc_check_packets "dev $h3 ingress" 1 5 + check_err $? "Multicast not received on second host" + + $MZ $h3 -6 -c 5 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \ + -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \ + -A 2001:db8:1::2 -B ff0e::3 -q + + tc_check_packets "dev $h1 ingress" 1 0 + check_err $? "Multicast received on first host when should not" + tc_check_packets "dev $h2 ingress" 1 5 + check_err $? "Multicast received on second host when should not" + tc_check_packets "dev $rp3 ingress" 1 5 + check_err $? "Packets not trapped due to RPF check" + + delete_mcast_sg $rp1 2001:db8:1::2 ff0e::3 $rp2 $rp3 + + tc filter del dev $rp3 ingress protocol ipv6 pref 1 handle 1 flower + tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 1 flower + tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 1 flower + tc filter del dev $h1 ingress protocol ipv6 pref 1 handle 1 flower + + log_test "RPF IPv6" +} + trap cleanup EXIT setup_prepare From 0637e1f878b5b670485ee0afbd9e22c9cea2ffe0 Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Thu, 28 Mar 2019 12:12:20 +0000 Subject: [PATCH 2/9] selftests: forwarding: Add PCP match and VLAN match tests Send packets with VLAN and PCP set and check that TC flower filters can match on these keys. Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/tc_flower.sh | 59 ++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 20d1077e5a3d..29bcfa84aec7 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ - match_src_ip_test match_ip_flags_test" + match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -219,6 +219,63 @@ match_ip_flags_test() log_test "ip_flags match ($tcflags)" } +match_pcp_test() +{ + RET=0 + + vlan_create $h2 85 v$h2 192.0.2.11/24 + + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 101 \ + flower vlan_prio 6 $tcflags dst_mac $h2mac action drop + tc filter add dev $h2 ingress protocol 802.1q pref 2 handle 102 \ + flower vlan_prio 7 $tcflags dst_mac $h2mac action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 7:85 -t ip -q + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 0:85 -t ip -q + + tc_check_packets "dev $h2 ingress" 101 0 + check_err $? "Matched on specified PCP when should not" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on specified PCP" + + tc filter del dev $h2 ingress protocol 802.1q pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 101 flower + + vlan_destroy $h2 85 + + log_test "PCP match ($tcflags)" +} + +match_vlan_test() +{ + RET=0 + + vlan_create $h2 85 v$h2 192.0.2.11/24 + vlan_create $h2 75 v$h2 192.0.2.10/24 + + tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 101 \ + flower vlan_id 75 $tcflags action drop + tc filter add dev $h2 ingress protocol 802.1q pref 2 handle 102 \ + flower vlan_id 85 $tcflags action drop + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -B 192.0.2.11 -Q 0:85 -t ip -q + + tc_check_packets "dev $h2 ingress" 101 0 + check_err $? "Matched on specified VLAN when should not" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on specified VLAN" + + tc filter del dev $h2 ingress protocol 802.1q pref 2 handle 102 flower + tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 101 flower + + vlan_destroy $h2 75 + vlan_destroy $h2 85 + + log_test "VLAN match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} From 2fcbc0b15e39019e84863a69c822b3c3103cfd56 Mon Sep 17 00:00:00 2001 From: Danielle Ratson Date: Thu, 28 Mar 2019 12:12:21 +0000 Subject: [PATCH 3/9] selftests: forwarding: Test action VLAN modify Construct a basic topology consisting of two hosts connected using a VLAN-aware bridge. Put each port in a different VLAN and test that ping fails. Add ingress and egress filters with a VLAN modify action and test that ping passes. Signed-off-by: Danielle Ratson Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../net/forwarding/tc_vlan_modify.sh | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100755 tools/testing/selftests/net/forwarding/tc_vlan_modify.sh diff --git a/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh b/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh new file mode 100755 index 000000000000..45378905cb97 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_vlan_modify.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + vlan_modify_ingress + vlan_modify_egress +" + +NUM_NETIFS=4 +CHECK_TC="yes" +source lib.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 85 v$h1 192.0.2.17/28 2001:db8:2::1/64 +} + +h1_destroy() +{ + vlan_destroy $h1 85 + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + vlan_create $h2 65 v$h2 192.0.2.18/28 2001:db8:2::2/64 +} + +h2_destroy() +{ + vlan_destroy $h2 65 + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + bridge vlan add dev $swp1 vid 85 + bridge vlan add dev $swp2 vid 65 + + bridge vlan add dev $swp2 vid 85 + bridge vlan add dev $swp1 vid 65 + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + bridge vlan del vid 65 dev $swp1 + bridge vlan del vid 85 dev $swp2 + + bridge vlan del vid 65 dev $swp2 + bridge vlan del vid 85 dev $swp1 + + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +vlan_modify_ingress() +{ + RET=0 + + ping_do $h1.85 192.0.2.18 + check_fail $? "ping between two different vlans passed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_fail $? "ping6 between two different vlans passed when should not" + + tc filter add dev $swp1 ingress protocol all pref 1 handle 1 \ + flower action vlan modify id 65 + tc filter add dev $swp2 ingress protocol all pref 1 handle 1 \ + flower action vlan modify id 85 + + ping_do $h1.85 192.0.2.18 + check_err $? "ping between two different vlans failed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_err $? "ping6 between two different vlans failed when should not" + + log_test "VLAN modify at ingress" + + tc filter del dev $swp2 ingress protocol all pref 1 handle 1 flower + tc filter del dev $swp1 ingress protocol all pref 1 handle 1 flower +} + +vlan_modify_egress() +{ + RET=0 + + ping_do $h1.85 192.0.2.18 + check_fail $? "ping between two different vlans passed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_fail $? "ping6 between two different vlans passed when should not" + + tc filter add dev $swp1 egress protocol all pref 1 handle 1 \ + flower action vlan modify id 85 + tc filter add dev $swp2 egress protocol all pref 1 handle 1 \ + flower action vlan modify id 65 + + ping_do $h1.85 192.0.2.18 + check_err $? "ping between two different vlans failed when should not" + + ping6_do $h1.85 2001:db8:2::2 + check_err $? "ping6 between two different vlans failed when should not" + + log_test "VLAN modify at egress" + + tc filter del dev $swp2 egress protocol all pref 1 handle 1 flower + tc filter del dev $swp1 egress protocol all pref 1 handle 1 flower +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS From 2cca8751af36d5f84eaa05d63632afa25a523b69 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Mar 2019 12:12:23 +0000 Subject: [PATCH 4/9] selftests: forwarding: devlink_lib: Avoid double sourcing of lib.sh Don't source lib.sh twice and make the script work with ifnames passed on the command line. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/spectrum-2/tc_flower.sh | 1 + .../drivers/net/mlxsw/spectrum/devlink_resources.sh | 3 +++ .../drivers/net/mlxsw/spectrum/resource_scale.sh | 5 ++++- tools/testing/selftests/net/forwarding/devlink_lib.sh | 10 ---------- 4 files changed, 8 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh index a372b2f60874..fb850e0ec837 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh @@ -12,6 +12,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \ delta_two_masks_one_key_test delta_simple_rehash_test \ bloom_simple_test bloom_complex_test bloom_delta_test" NUM_NETIFS=2 +source $lib_dir/lib.sh source $lib_dir/tc_common.sh source $lib_dir/devlink_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh index b1fe960e398a..6f2683cbc7d5 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_resources.sh @@ -1,7 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +lib_dir=$(dirname $0)/../../../../net/forwarding + NUM_NETIFS=1 +source $lib_dir/lib.sh source devlink_lib_spectrum.sh setup_prepare() diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index e7ffc79561b7..43ba1b438f6d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -1,8 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +lib_dir=$(dirname $0)/../../../../net/forwarding + NUM_NETIFS=6 -source ../../../../net/forwarding/tc_common.sh +source $lib_dir/lib.sh +source $lib_dir/tc_common.sh source devlink_lib_spectrum.sh current_test="" diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 57cf8914910d..981b897d418d 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -1,16 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -############################################################################## -# Source library - -relative_path="${BASH_SOURCE%/*}" -if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then - relative_path="." -fi - -source "$relative_path/lib.sh" - ############################################################################## # Defines From 8e46aee69722054e85ae4b6029d0aed678016950 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Mar 2019 12:12:23 +0000 Subject: [PATCH 5/9] selftests: forwarding: devlink_lib: Simplify deduction of DEVLINK_DEV Use devlink -j and jq for more accurate querying. Use cut -f-2 instead of rev-cut-rev combo. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- tools/testing/selftests/net/forwarding/devlink_lib.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 981b897d418d..61d3579a62af 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -4,9 +4,8 @@ ############################################################################## # Defines -DEVLINK_DEV=$(devlink port show | grep "${NETIFS[p1]}" | \ - grep -v "${NETIFS[p1]}[0-9]" | cut -d" " -f1 | \ - rev | cut -d"/" -f2- | rev) +DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ + | jq -r '.port | keys[]' | cut -d/ -f-2) if [ -z "$DEVLINK_DEV" ]; then echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" exit 1 From d04cc726c8da45732c815549f2841a646332cc94 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Mar 2019 12:12:24 +0000 Subject: [PATCH 6/9] selftests: forwarding: devlink_lib: Add shared buffer helpers Add helpers to obtain, set, and restore a pool size, and a port-pool and tc-pool threshold. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/net/forwarding/devlink_lib.sh | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 61d3579a62af..8553a67a2322 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -95,3 +95,98 @@ devlink_reload() grep -c "size_new") check_err $still_pending "Failed reload - There are still unset sizes" } + +declare -A DEVLINK_ORIG + +devlink_port_pool_threshold() +{ + local port=$1; shift + local pool=$1; shift + + devlink sb port pool show $port pool $pool -j \ + | jq '.port_pool."'"$port"'"[].threshold' +} + +devlink_port_pool_th_set() +{ + local port=$1; shift + local pool=$1; shift + local th=$1; shift + local key="port_pool($port,$pool).threshold" + + DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool) + devlink sb port pool set $port pool $pool th $th +} + +devlink_port_pool_th_restore() +{ + local port=$1; shift + local pool=$1; shift + local key="port_pool($port,$pool).threshold" + + devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]} +} + +devlink_pool_size_thtype() +{ + local pool=$1; shift + + devlink sb pool show "$DEVLINK_DEV" pool $pool -j \ + | jq -r '.pool[][] | (.size, .thtype)' +} + +devlink_pool_size_thtype_set() +{ + local pool=$1; shift + local thtype=$1; shift + local size=$1; shift + local key="pool($pool).size_thtype" + + DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) + devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype +} + +devlink_pool_size_thtype_restore() +{ + local pool=$1; shift + local key="pool($pool).size_thtype" + local -a orig=(${DEVLINK_ORIG[$key]}) + + devlink sb pool set "$DEVLINK_DEV" pool $pool \ + size ${orig[0]} thtype ${orig[1]} +} + +devlink_tc_bind_pool_th() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + + devlink sb tc bind show $port tc $tc type $dir -j \ + | jq -r '.tc_bind[][] | (.pool, .threshold)' +} + +devlink_tc_bind_pool_th_set() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local pool=$1; shift + local th=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + + DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) + devlink sb tc bind set $port tc $tc type $dir pool $pool th $th +} + +devlink_tc_bind_pool_th_restore() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + local -a orig=(${DEVLINK_ORIG[$key]}) + + devlink sb tc bind set $port tc $tc type $dir \ + pool ${orig[0]} th ${orig[1]} +} From 5dde21b3a7f648342d873ec964e0c486c329b91c Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Mar 2019 12:12:25 +0000 Subject: [PATCH 7/9] selftests: mlxsw: qos_mc_aware: Configure shared buffers This test runs two streams of traffic from two independent ports to create congestion on one egress port. It is necessary to configure the shared buffer thresholds correctly, to make sure that there is traffic from both streams in the shared buffer. Only then can the test actually test prioritization among these streams. Without this configuration, it is possible, that one of the streams takes all of port-pool quota, and the other stream is not even admitted, thus invalidating the result. On Spectrum-1, this is not a problem, because MC traffic uses a separate pool. But for Spectrum-2, MC and UC share the same pool, and the correct configuration is important. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/qos_mc_aware.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 117f6f35d72f..2e17fe3b4872 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -67,6 +67,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding NUM_NETIFS=6 source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh h1_create() { @@ -140,10 +141,28 @@ switch_create() ip link set dev br111 up ip link set dev $swp2.111 master br111 ip link set dev $swp3.111 master br111 + + # Make sure that ingress quotas are smaller than egress so that there is + # room for both streams of traffic to be admitted to shared buffer. + devlink_port_pool_th_set $swp1 0 5 + devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + + devlink_port_pool_th_set $swp2 0 5 + devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + + devlink_port_pool_th_set $swp3 4 12 } switch_destroy() { + devlink_port_pool_th_restore $swp3 4 + + devlink_tc_bind_pool_th_restore $swp2 1 ingress + devlink_port_pool_th_restore $swp2 0 + + devlink_tc_bind_pool_th_restore $swp1 0 ingress + devlink_port_pool_th_restore $swp1 0 + ip link del dev br111 ip link del dev br1 From 573363a68f27c75f92b029345eed0026a2bac0c0 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Mar 2019 12:12:26 +0000 Subject: [PATCH 8/9] selftests: mlxsw: Add qos_lib.sh Extract reusable code from qos_mc_aware.sh and put into a new library. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../selftests/drivers/net/mlxsw/qos_lib.sh | 98 +++++++++++++++++ .../drivers/net/mlxsw/qos_mc_aware.sh | 103 ++---------------- 2 files changed, 109 insertions(+), 92 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh new file mode 100644 index 000000000000..e80be65799ad --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -0,0 +1,98 @@ +# SPDX-License-Identifier: GPL-2.0 + +humanize() +{ + local speed=$1; shift + + for unit in bps Kbps Mbps Gbps; do + if (($(echo "$speed < 1024" | bc))); then + break + fi + + speed=$(echo "scale=1; $speed / 1024" | bc) + done + + echo "$speed${unit}" +} + +rate() +{ + local t0=$1; shift + local t1=$1; shift + local interval=$1; shift + + echo $((8 * (t1 - t0) / interval)) +} + +start_traffic() +{ + local h_in=$1; shift # Where the traffic egresses the host + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + + $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ + -a own -b $dmac -t udp -q & + sleep 1 +} + +stop_traffic() +{ + # Suppress noise from killing mausezahn. + { kill %% && wait %%; } 2>/dev/null +} + +check_rate() +{ + local rate=$1; shift + local min=$1; shift + local what=$1; shift + + if ((rate > min)); then + return 0 + fi + + echo "$what $(humanize $ir) < $(humanize $min)" > /dev/stderr + return 1 +} + +measure_rate() +{ + local sw_in=$1; shift # Where the traffic ingresses the switch + local host_in=$1; shift # Where it ingresses another host + local counter=$1; shift # Counter to use for measurement + local what=$1; shift + + local interval=10 + local i + local ret=0 + + # Dips in performance might cause momentary ingress rate to drop below + # 1Gbps. That wouldn't saturate egress and MC would thus get through, + # seemingly winning bandwidth on account of UC. Demand at least 2Gbps + # average ingress rate to somewhat mitigate this. + local min_ingress=2147483648 + + for i in {5..0}; do + local t0=$(ethtool_stats_get $host_in $counter) + local u0=$(ethtool_stats_get $sw_in $counter) + sleep $interval + local t1=$(ethtool_stats_get $host_in $counter) + local u1=$(ethtool_stats_get $sw_in $counter) + + local ir=$(rate $u0 $u1 $interval) + local er=$(rate $t0 $t1 $interval) + + if check_rate $ir $min_ingress "$what ingress rate"; then + break + fi + + # Fail the test if we can't get the throughput. + if ((i == 0)); then + ret=1 + fi + done + + echo $ir $er + return $ret +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 2e17fe3b4872..71231ad2dbfb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -68,6 +68,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding NUM_NETIFS=6 source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh +source qos_lib.sh h1_create() { @@ -220,107 +221,28 @@ ping_ipv4() ping_test $h2 192.0.2.130 } -humanize() -{ - local speed=$1; shift - - for unit in bps Kbps Mbps Gbps; do - if (($(echo "$speed < 1024" | bc))); then - break - fi - - speed=$(echo "scale=1; $speed / 1024" | bc) - done - - echo "$speed${unit}" -} - -rate() -{ - local t0=$1; shift - local t1=$1; shift - local interval=$1; shift - - echo $((8 * (t1 - t0) / interval)) -} - -check_rate() -{ - local rate=$1; shift - local min=$1; shift - local what=$1; shift - - if ((rate > min)); then - return 0 - fi - - echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr - return 1 -} - -measure_uc_rate() -{ - local what=$1; shift - - local interval=10 - local i - local ret=0 - - # Dips in performance might cause momentary ingress rate to drop below - # 1Gbps. That wouldn't saturate egress and MC would thus get through, - # seemingly winning bandwidth on account of UC. Demand at least 2Gbps - # average ingress rate to somewhat mitigate this. - local min_ingress=2147483648 - - $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ - -a own -b $h3mac -t udp -q & - sleep 1 - - for i in {5..0}; do - local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) - local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) - sleep $interval - local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) - local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) - - local ir=$(rate $u0 $u1 $interval) - local er=$(rate $t0 $t1 $interval) - - if check_rate $ir $min_ingress "$what ingress rate"; then - break - fi - - # Fail the test if we can't get the throughput. - if ((i == 0)); then - ret=1 - fi - done - - # Suppress noise from killing mausezahn. - { kill %% && wait; } 2>/dev/null - - echo $ir $er - exit $ret -} - test_mc_aware() { RET=0 local -a uc_rate - uc_rate=($(measure_uc_rate "UC-only")) + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only")) check_err $? "Could not get high enough UC-only ingress rate" + stop_traffic local ucth1=${uc_rate[1]} - $MZ $h1 -p 8000 -c 0 -a own -b bc -t udp -q & + start_traffic $h1 own bc bc local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) local -a uc_rate_2 - uc_rate_2=($(measure_uc_rate "UC+MC")) + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC")) check_err $? "Could not get high enough UC+MC ingress rate" + stop_traffic local ucth2=${uc_rate_2[1]} local d1=$(date +%s) @@ -338,8 +260,7 @@ test_mc_aware() local mc_ir=$(rate $u0 $u1 $interval) local mc_er=$(rate $t0 $t1 $interval) - # Suppress noise from killing mausezahn. - { kill %% && wait; } 2>/dev/null + stop_traffic log_test "UC performace under MC overload" @@ -363,8 +284,7 @@ test_uc_aware() { RET=0 - $MZ $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ - -a own -b $h3mac -t udp -q & + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) @@ -394,8 +314,7 @@ test_uc_aware() ((attempts == passes)) check_err $? - # Suppress noise from killing mausezahn. - { kill %% && wait; } 2>/dev/null + stop_traffic log_test "MC performace under UC overload" echo " ingress UC throughput $(humanize ${uc_ir})" From 30905dc63badb17e5960fa73fa49ed1459790494 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Thu, 28 Mar 2019 12:12:27 +0000 Subject: [PATCH 9/9] selftests: mlxsw: Add a new test for strict priority Test that when strict priority is configured on a system, the higher-priority traffic does actually win all the available bandwidth. The test uses a similar approach to qos_mc_aware.sh to run and account the traffic. Signed-off-by: Petr Machata Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- .../drivers/net/mlxsw/qos_ets_strict.sh | 311 ++++++++++++++++++ 1 file changed, 311 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh new file mode 100755 index 000000000000..6d1790b5de7a --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -0,0 +1,311 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# A test for strict prioritization of traffic in the switch. Run two streams of +# traffic, each through a different ingress port, one tagged with PCP of 1, the +# other with PCP of 2. Both streams converge at one egress port, where they are +# assigned TC of, respectively, 1 and 2, with strict priority configured between +# them. In H3, we expect to see (almost) exclusively the high-priority traffic. +# +# Please see qos_mc_aware.sh for an explanation of why we use mausezahn and +# counters instead of just running iperf3. +# +# +---------------------------+ +-----------------------------+ +# | H1 | | H2 | +# | $h1.111 + | | + $h2.222 | +# | 192.0.2.33/28 | | | | 192.0.2.65/28 | +# | e-qos-map 0:1 | | | | e-qos-map 0:2 | +# | | | | | | +# | $h1 + | | + $h2 | +# +-----------------|---------+ +---------|-------------------+ +# | | +# +-----------------|-------------------------------------|-------------------+ +# | $swp1 + + $swp2 | +# | >1Gbps | | >1Gbps | +# | +---------------|-----------+ +----------|----------------+ | +# | | $swp1.111 + | | + $swp2.222 | | +# | | BR111 | SW | BR222 | | +# | | $swp3.111 + | | + $swp3.222 | | +# | +---------------|-----------+ +----------|----------------+ | +# | \_____________________________________/ | +# | | | +# | + $swp3 | +# | | 1Gbps bottleneck | +# | | ETS: (up n->tc n for n in 0..7) | +# | | strict priority | +# +------------------------------------|--------------------------------------+ +# | +# +--------------------|--------------------+ +# | + $h3 H3 | +# | / \ | +# | / \ | +# | $h3.111 + + $h3.222 | +# | 192.0.2.34/28 192.0.2.66/28 | +# +-----------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ets_strict +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 + ip link set dev $h1.111 type vlan egress-qos-map 0:1 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 222 v$h2 192.0.2.65/28 + ip link set dev $h2.222 type vlan egress-qos-map 0:2 +} + +h2_destroy() +{ + vlan_destroy $h2 222 + + mtu_restore $h2 + simple_if_fini $h2 +} + +h3_create() +{ + simple_if_init $h3 + mtu_set $h3 10000 + + vlan_create $h3 111 v$h3 192.0.2.34/28 + vlan_create $h3 222 v$h3 192.0.2.66/28 +} + +h3_destroy() +{ + vlan_destroy $h3 222 + vlan_destroy $h3 111 + + mtu_restore $h3 + simple_if_fini $h3 +} + +switch_create() +{ + ip link set dev $swp1 up + mtu_set $swp1 10000 + + ip link set dev $swp2 up + mtu_set $swp2 10000 + + # prio n -> TC n, strict scheduling + lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7 + lldptool -T -i $swp3 -V ETS-CFG tsa=$( + )"0:strict,"$( + )"1:strict,"$( + )"2:strict,"$( + )"3:strict,"$( + )"4:strict,"$( + )"5:strict,"$( + )"6:strict,"$( + )"7:strict" + sleep 1 + + ip link set dev $swp3 up + mtu_set $swp3 10000 + ethtool -s $swp3 speed 1000 autoneg off + + vlan_create $swp1 111 + vlan_create $swp2 222 + vlan_create $swp3 111 + vlan_create $swp3 222 + + ip link add name br111 up type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br111 + ip link set dev $swp3.111 master br111 + + ip link add name br222 up type bridge vlan_filtering 0 + ip link set dev $swp2.222 master br222 + ip link set dev $swp3.222 master br222 + + # Make sure that ingress quotas are smaller than egress so that there is + # room for both streams of traffic to be admitted to shared buffer. + devlink_pool_size_thtype_set 0 dynamic 10000000 + devlink_pool_size_thtype_set 4 dynamic 10000000 + + devlink_port_pool_th_set $swp1 0 6 + devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + + devlink_port_pool_th_set $swp2 0 6 + devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + + devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + devlink_port_pool_th_set $swp3 4 7 +} + +switch_destroy() +{ + devlink_port_pool_th_restore $swp3 4 + devlink_tc_bind_pool_th_restore $swp3 2 egress + devlink_tc_bind_pool_th_restore $swp3 1 egress + + devlink_tc_bind_pool_th_restore $swp2 2 ingress + devlink_port_pool_th_restore $swp2 0 + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 0 + + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 0 + + ip link del dev br222 + ip link del dev br111 + + vlan_destroy $swp3 222 + vlan_destroy $swp3 111 + vlan_destroy $swp2 222 + vlan_destroy $swp1 111 + + ethtool -s $swp3 autoneg on + mtu_restore $swp3 + ip link set dev $swp3 down + lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 + + mtu_restore $swp2 + ip link set dev $swp2 down + + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 " from H1" + ping_test $h2 192.0.2.66 " from H2" +} + +rel() +{ + local old=$1; shift + local new=$1; shift + + bc <<< " + scale=2 + ret = 100 * $new / $old + if (ret > 0) { ret } else { 0 } + " +} + +test_ets_strict() +{ + RET=0 + + # Run high-prio traffic on its own. + start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac + local -a rate_2 + rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2")) + check_err $? "Could not get high enough prio-2 ingress rate" + local rate_2_in=${rate_2[0]} + local rate_2_eg=${rate_2[1]} + stop_traffic # $h2.222 + + # Start low-prio stream. + start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac + + local -a rate_1 + rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1")) + check_err $? "Could not get high enough prio-1 ingress rate" + local rate_1_in=${rate_1[0]} + local rate_1_eg=${rate_1[1]} + + # High-prio and low-prio on their own should have about the same + # throughput. + local rel21=$(rel $rate_1_eg $rate_2_eg) + check_err $(bc <<< "$rel21 < 95") + check_err $(bc <<< "$rel21 > 105") + + # Start the high-prio stream--now both streams run. + start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac + rate_3=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2 w/ 1")) + check_err $? "Could not get high enough prio-2 ingress rate with prio-1" + local rate_3_in=${rate_3[0]} + local rate_3_eg=${rate_3[1]} + stop_traffic # $h2.222 + + stop_traffic # $h1.111 + + # High-prio should have about the same throughput whether or not + # low-prio is in the system. + local rel32=$(rel $rate_2_eg $rate_3_eg) + check_err $(bc <<< "$rel32 < 95") + + log_test "strict priority" + echo "Ingress to switch:" + echo " p1 in rate $(humanize $rate_1_in)" + echo " p2 in rate $(humanize $rate_2_in)" + echo " p2 in rate w/ p1 $(humanize $rate_3_in)" + echo "Egress from switch:" + echo " p1 eg rate $(humanize $rate_1_eg)" + echo " p2 eg rate $(humanize $rate_2_eg) ($rel21% of p1)" + echo " p2 eg rate w/ p1 $(humanize $rate_3_eg) ($rel32% of p2)" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS