Merge branch 'mlxsw-PFC-and-headroom-selftests'
Petr Machata says: ==================== mlxsw: PFC and headroom selftests Recent changes in the headroom management code made it clear that an automated way of testing this functionality is needed. This patchset brings two tests: a synthetic headroom behavior test, which verifies mechanics of headroom management. And a PFC test, which verifies whether this behavior actually translates into a working lossless configuration. Both of these tests rely on mlnx_qos[1], a tool that interfaces with Linux DCB API. The tool was originally written to work with Mellanox NICs, but does not actually rely on anything Mellanox-specific, and can be used for mlxsw as well as for any other NIC-like driver. Unlike Open LLDP it does support buffer commands and permits a fire-and-forget approach to configuration, which makes it very handy for writing of selftests. Patches #1-#3 extend the selftest devlink_lib.sh in various ways. Patch #4 then adds a helper wrapper for mlnx_qos to mlxsw's qos_lib.sh. Patch #5 adds a test for management of port headroom. Patch #6 adds a PFC test. [1] https://github.com/Mellanox/mlnx-tools/ ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
e13dbc4f41
@ -147,17 +147,26 @@ switch_create()
|
||||
|
||||
# Make sure that ingress quotas are smaller than egress so that there is
|
||||
# room for both streams of traffic to be admitted to shared buffer.
|
||||
devlink_pool_size_thtype_save 0
|
||||
devlink_pool_size_thtype_set 0 dynamic 10000000
|
||||
devlink_pool_size_thtype_save 4
|
||||
devlink_pool_size_thtype_set 4 dynamic 10000000
|
||||
|
||||
devlink_port_pool_th_save $swp1 0
|
||||
devlink_port_pool_th_set $swp1 0 6
|
||||
devlink_tc_bind_pool_th_save $swp1 1 ingress
|
||||
devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
|
||||
|
||||
devlink_port_pool_th_save $swp2 0
|
||||
devlink_port_pool_th_set $swp2 0 6
|
||||
devlink_tc_bind_pool_th_save $swp2 2 ingress
|
||||
devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
|
||||
|
||||
devlink_tc_bind_pool_th_save $swp3 1 egress
|
||||
devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
|
||||
devlink_tc_bind_pool_th_save $swp3 2 egress
|
||||
devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
|
||||
devlink_port_pool_th_save $swp3 4
|
||||
devlink_port_pool_th_set $swp3 4 7
|
||||
}
|
||||
|
||||
|
379
tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
Executable file
379
tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
Executable file
@ -0,0 +1,379 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
ALL_TESTS="
|
||||
test_defaults
|
||||
test_dcb_ets
|
||||
test_mtu
|
||||
test_pfc
|
||||
test_int_buf
|
||||
test_tc_priomap
|
||||
test_tc_mtu
|
||||
test_tc_sizes
|
||||
test_tc_int_buf
|
||||
"
|
||||
|
||||
lib_dir=$(dirname $0)/../../../net/forwarding
|
||||
|
||||
NUM_NETIFS=0
|
||||
source $lib_dir/lib.sh
|
||||
source $lib_dir/devlink_lib.sh
|
||||
source qos_lib.sh
|
||||
|
||||
swp=$NETIF_NO_CABLE
|
||||
|
||||
cleanup()
|
||||
{
|
||||
pre_cleanup
|
||||
}
|
||||
|
||||
get_prio_pg()
|
||||
{
|
||||
__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
|
||||
grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
|
||||
}
|
||||
|
||||
get_prio_pfc()
|
||||
{
|
||||
__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
|
||||
grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
|
||||
}
|
||||
|
||||
get_prio_tc()
|
||||
{
|
||||
__mlnx_qos -i $swp | sed -n '/^tc/,$p' |
|
||||
awk '/^tc/ { TC = $2 }
|
||||
/priority:/ { PRIO[$2]=TC }
|
||||
END {
|
||||
for (i in PRIO)
|
||||
printf("%d ", PRIO[i])
|
||||
}'
|
||||
}
|
||||
|
||||
get_buf_size()
|
||||
{
|
||||
local idx=$1; shift
|
||||
|
||||
__mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
|
||||
}
|
||||
|
||||
get_tot_size()
|
||||
{
|
||||
__mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
|
||||
}
|
||||
|
||||
check_prio_pg()
|
||||
{
|
||||
local expect=$1; shift
|
||||
|
||||
local current=$(get_prio_pg)
|
||||
test "$current" = "$expect"
|
||||
check_err $? "prio2buffer is '$current', expected '$expect'"
|
||||
}
|
||||
|
||||
check_prio_pfc()
|
||||
{
|
||||
local expect=$1; shift
|
||||
|
||||
local current=$(get_prio_pfc)
|
||||
test "$current" = "$expect"
|
||||
check_err $? "prio PFC is '$current', expected '$expect'"
|
||||
}
|
||||
|
||||
check_prio_tc()
|
||||
{
|
||||
local expect=$1; shift
|
||||
|
||||
local current=$(get_prio_tc)
|
||||
test "$current" = "$expect"
|
||||
check_err $? "prio_tc is '$current', expected '$expect'"
|
||||
}
|
||||
|
||||
__check_buf_size()
|
||||
{
|
||||
local idx=$1; shift
|
||||
local expr=$1; shift
|
||||
local what=$1; shift
|
||||
|
||||
local current=$(get_buf_size $idx)
|
||||
((current $expr))
|
||||
check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
|
||||
echo $current
|
||||
}
|
||||
|
||||
check_buf_size()
|
||||
{
|
||||
__check_buf_size "$@" > /dev/null
|
||||
}
|
||||
|
||||
test_defaults()
|
||||
{
|
||||
RET=0
|
||||
|
||||
check_prio_pg "0 0 0 0 0 0 0 0 "
|
||||
check_prio_tc "0 0 0 0 0 0 0 0 "
|
||||
check_prio_pfc "0 0 0 0 0 0 0 0 "
|
||||
|
||||
log_test "Default headroom configuration"
|
||||
}
|
||||
|
||||
test_dcb_ets()
|
||||
{
|
||||
RET=0
|
||||
|
||||
__mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
|
||||
|
||||
check_prio_pg "0 2 4 6 1 3 5 7 "
|
||||
check_prio_tc "0 2 4 6 1 3 5 7 "
|
||||
check_prio_pfc "0 0 0 0 0 0 0 0 "
|
||||
|
||||
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
|
||||
|
||||
check_prio_pg "0 0 0 0 0 0 0 0 "
|
||||
check_prio_tc "0 0 0 0 0 0 0 0 "
|
||||
|
||||
__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
|
||||
check_fail $? "prio2buffer accepted in DCB mode"
|
||||
|
||||
log_test "Configuring headroom through ETS"
|
||||
}
|
||||
|
||||
test_mtu()
|
||||
{
|
||||
local what=$1; shift
|
||||
local buf0size_2
|
||||
local buf0size
|
||||
|
||||
RET=0
|
||||
buf0size=$(__check_buf_size 0 "> 0")
|
||||
|
||||
mtu_set $swp 3000
|
||||
buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
|
||||
mtu_restore $swp
|
||||
|
||||
mtu_set $swp 6000
|
||||
check_buf_size 0 "> $buf0size_2" "MTU 6000: "
|
||||
mtu_restore $swp
|
||||
|
||||
check_buf_size 0 "== $buf0size"
|
||||
|
||||
log_test "${what}MTU impacts buffer size"
|
||||
}
|
||||
|
||||
test_tc_mtu()
|
||||
{
|
||||
# In TC mode, MTU still impacts the threshold below which a buffer is
|
||||
# not permitted to go.
|
||||
|
||||
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
|
||||
test_mtu "TC: "
|
||||
tc qdisc delete dev $swp root
|
||||
}
|
||||
|
||||
test_pfc()
|
||||
{
|
||||
RET=0
|
||||
|
||||
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
|
||||
|
||||
local buf0size=$(get_buf_size 0)
|
||||
local buf1size=$(get_buf_size 1)
|
||||
local buf2size=$(get_buf_size 2)
|
||||
local buf3size=$(get_buf_size 3)
|
||||
check_buf_size 0 "> 0"
|
||||
check_buf_size 1 "> 0"
|
||||
check_buf_size 2 "> 0"
|
||||
check_buf_size 3 "> 0"
|
||||
check_buf_size 4 "== 0"
|
||||
check_buf_size 5 "== 0"
|
||||
check_buf_size 6 "== 0"
|
||||
check_buf_size 7 "== 0"
|
||||
|
||||
log_test "Buffer size sans PFC"
|
||||
|
||||
RET=0
|
||||
|
||||
__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
|
||||
|
||||
check_prio_pg "0 0 0 0 0 1 2 3 "
|
||||
check_prio_pfc "0 0 0 0 0 1 1 1 "
|
||||
check_buf_size 0 "== $buf0size"
|
||||
check_buf_size 1 "> $buf1size"
|
||||
check_buf_size 2 "> $buf2size"
|
||||
check_buf_size 3 "> $buf3size"
|
||||
|
||||
local buf1size=$(get_buf_size 1)
|
||||
check_buf_size 2 "== $buf1size"
|
||||
check_buf_size 3 "== $buf1size"
|
||||
|
||||
log_test "PFC: Cable length 0"
|
||||
|
||||
RET=0
|
||||
|
||||
__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
|
||||
|
||||
check_buf_size 0 "== $buf0size"
|
||||
check_buf_size 1 "> $buf1size"
|
||||
check_buf_size 2 "> $buf1size"
|
||||
check_buf_size 3 "> $buf1size"
|
||||
|
||||
log_test "PFC: Cable length 1000"
|
||||
|
||||
RET=0
|
||||
|
||||
__mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
|
||||
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
|
||||
|
||||
check_prio_pg "0 0 0 0 0 0 0 0 "
|
||||
check_prio_tc "0 0 0 0 0 0 0 0 "
|
||||
check_buf_size 0 "> 0"
|
||||
check_buf_size 1 "== 0"
|
||||
check_buf_size 2 "== 0"
|
||||
check_buf_size 3 "== 0"
|
||||
check_buf_size 4 "== 0"
|
||||
check_buf_size 5 "== 0"
|
||||
check_buf_size 6 "== 0"
|
||||
check_buf_size 7 "== 0"
|
||||
|
||||
log_test "PFC: Restore defaults"
|
||||
}
|
||||
|
||||
test_tc_priomap()
|
||||
{
|
||||
RET=0
|
||||
|
||||
__mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
|
||||
check_prio_pg "0 1 2 3 4 5 6 7 "
|
||||
|
||||
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
|
||||
check_prio_pg "0 0 0 0 0 0 0 0 "
|
||||
|
||||
__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
|
||||
check_prio_pg "1 3 5 7 0 2 4 6 "
|
||||
|
||||
tc qdisc delete dev $swp root
|
||||
check_prio_pg "0 1 2 3 4 5 6 7 "
|
||||
|
||||
# Clean up.
|
||||
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
|
||||
__mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
|
||||
tc qdisc delete dev $swp root
|
||||
__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
|
||||
|
||||
log_test "TC: priomap"
|
||||
}
|
||||
|
||||
test_tc_sizes()
|
||||
{
|
||||
local cell_size=$(devlink_cell_size_get)
|
||||
local size=$((cell_size * 1000))
|
||||
|
||||
RET=0
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
|
||||
check_fail $? "buffer_size should fail before qdisc is added"
|
||||
|
||||
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
|
||||
check_err $? "buffer_size should pass after qdisc is added"
|
||||
check_buf_size 0 "== $size" "set size: "
|
||||
|
||||
mtu_set $swp 6000
|
||||
check_buf_size 0 "== $size" "set MTU: "
|
||||
mtu_restore $swp
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
|
||||
|
||||
# After replacing the qdisc for the same kind, buffer_size still has to
|
||||
# work.
|
||||
tc qdisc replace dev $swp root handle 1: bfifo limit 1M
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
|
||||
check_buf_size 0 "== $size" "post replace, set size: "
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
|
||||
|
||||
# Likewise after replacing for a different kind.
|
||||
tc qdisc replace dev $swp root handle 2: prio bands 8
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
|
||||
check_buf_size 0 "== $size" "post replace different kind, set size: "
|
||||
|
||||
tc qdisc delete dev $swp root
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
|
||||
check_fail $? "buffer_size should fail after qdisc is deleted"
|
||||
|
||||
log_test "TC: buffer size"
|
||||
}
|
||||
|
||||
test_int_buf()
|
||||
{
|
||||
local what=$1; shift
|
||||
|
||||
RET=0
|
||||
|
||||
local buf0size=$(get_buf_size 0)
|
||||
local tot_size=$(get_tot_size)
|
||||
|
||||
# Size of internal buffer and buffer 9.
|
||||
local dsize=$((tot_size - buf0size))
|
||||
|
||||
tc qdisc add dev $swp clsact
|
||||
tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
|
||||
|
||||
local buf0size_2=$(get_buf_size 0)
|
||||
local tot_size_2=$(get_tot_size)
|
||||
local dsize_2=$((tot_size_2 - buf0size_2))
|
||||
|
||||
# Egress SPAN should have added to the "invisible" buffer configuration.
|
||||
((dsize_2 > dsize))
|
||||
check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
|
||||
|
||||
mtu_set $swp 3000
|
||||
|
||||
local buf0size_3=$(get_buf_size 0)
|
||||
local tot_size_3=$(get_tot_size)
|
||||
local dsize_3=$((tot_size_3 - buf0size_3))
|
||||
|
||||
# MTU change might change buffer 0, which will show at total, but the
|
||||
# hidden buffers should stay the same size.
|
||||
((dsize_3 == dsize_2))
|
||||
check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
|
||||
|
||||
mtu_restore $swp
|
||||
tc qdisc del dev $swp clsact
|
||||
|
||||
# After SPAN removal, hidden buffers should be back to the original sizes.
|
||||
local buf0size_4=$(get_buf_size 0)
|
||||
local tot_size_4=$(get_tot_size)
|
||||
local dsize_4=$((tot_size_4 - buf0size_4))
|
||||
((dsize_4 == dsize))
|
||||
check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
|
||||
|
||||
log_test "${what}internal buffer size"
|
||||
}
|
||||
|
||||
test_tc_int_buf()
|
||||
{
|
||||
local cell_size=$(devlink_cell_size_get)
|
||||
local size=$((cell_size * 1000))
|
||||
|
||||
tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
|
||||
test_int_buf "TC: "
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
|
||||
test_int_buf "TC+buffsize: "
|
||||
|
||||
__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
|
||||
tc qdisc delete dev $swp root
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
bail_on_lldpad
|
||||
setup_wait
|
||||
tests_run
|
||||
|
||||
exit $EXIT_STATUS
|
@ -82,3 +82,17 @@ bail_on_lldpad()
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
__mlnx_qos()
|
||||
{
|
||||
local err
|
||||
|
||||
mlnx_qos "$@" 2>/dev/null
|
||||
err=$?
|
||||
|
||||
if ((err)); then
|
||||
echo "Error ($err) in mlnx_qos $@" >/dev/stderr
|
||||
fi
|
||||
|
||||
return $err
|
||||
}
|
||||
|
@ -145,12 +145,17 @@ switch_create()
|
||||
|
||||
# Make sure that ingress quotas are smaller than egress so that there is
|
||||
# room for both streams of traffic to be admitted to shared buffer.
|
||||
devlink_port_pool_th_save $swp1 0
|
||||
devlink_port_pool_th_set $swp1 0 5
|
||||
devlink_tc_bind_pool_th_save $swp1 0 ingress
|
||||
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
|
||||
|
||||
devlink_port_pool_th_save $swp2 0
|
||||
devlink_port_pool_th_set $swp2 0 5
|
||||
devlink_tc_bind_pool_th_save $swp2 1 ingress
|
||||
devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
|
||||
|
||||
devlink_port_pool_th_save $swp3 4
|
||||
devlink_port_pool_th_set $swp3 4 12
|
||||
}
|
||||
|
||||
|
403
tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
Executable file
403
tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
Executable file
@ -0,0 +1,403 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
|
||||
# of 1. This stream is consistently prioritized as priority 1, is put to PG
|
||||
# buffer 1, and scheduled at TC 1.
|
||||
#
|
||||
# - the stream first ingresses through $swp1, where it is forwarded to $swp3
|
||||
#
|
||||
# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
|
||||
# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
|
||||
# shaped, and thus the PFC pool eventually fills, therefore the headroom
|
||||
# fills, and $swp3 is paused.
|
||||
#
|
||||
# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
|
||||
# a pool ("overflow pool"). The overflow pool needs to be large enough to
|
||||
# contain the whole burst.
|
||||
#
|
||||
# - eventually the PFC pool gets some traffic out, headroom therefore gets some
|
||||
# traffic to the pool, and $swp3 is unpaused again. This way the traffic is
|
||||
# gradually forwarded from the overflow pool, through the PFC pool, out of
|
||||
# $swp2, and eventually to $h2.
|
||||
#
|
||||
# - if PFC works, all lossless flow packets that ingress through $swp1 should
|
||||
# also be seen ingressing $h2. If it doesn't, there will be drops due to
|
||||
# discrepancy between the speeds of $swp1 and $h2.
|
||||
#
|
||||
# - it should all play out relatively quickly, so that SLL and HLL will not
|
||||
# cause drops.
|
||||
#
|
||||
# +-----------------------+
|
||||
# | H1 |
|
||||
# | + $h1.111 |
|
||||
# | | 192.0.2.33/28 |
|
||||
# | | |
|
||||
# | + $h1 |
|
||||
# +---|-------------------+ +--------------------+
|
||||
# | | |
|
||||
# +---|----------------------|--------------------|---------------------------+
|
||||
# | + $swp1 $swp3 + + $swp4 |
|
||||
# | | iPOOL1 iPOOL0 | | iPOOL2 |
|
||||
# | | ePOOL4 ePOOL5 | | ePOOL4 |
|
||||
# | | 1Gbps | | 1Gbps |
|
||||
# | | PFC:enabled=1 | | PFC:enabled=1 |
|
||||
# | +-|----------------------|-+ +-|------------------------+ |
|
||||
# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | |
|
||||
# | | | | | |
|
||||
# | | BR1 | | BR2 | |
|
||||
# | | | | | |
|
||||
# | | | | + $swp2.111 | |
|
||||
# | +--------------------------+ +---------|----------------+ |
|
||||
# | | |
|
||||
# | iPOOL0: 500KB dynamic | |
|
||||
# | iPOOL1: 10MB static | |
|
||||
# | iPOOL2: 1MB static + $swp2 |
|
||||
# | ePOOL4: 500KB dynamic | iPOOL0 |
|
||||
# | ePOOL5: 10MB static | ePOOL6 |
|
||||
# | ePOOL6: "infinite" static | 200Mbps shaper |
|
||||
# +-------------------------------------------------------|-------------------+
|
||||
# |
|
||||
# +---|-------------------+
|
||||
# | + $h2 H2 |
|
||||
# | | |
|
||||
# | + $h2.111 |
|
||||
# | 192.0.2.34/28 |
|
||||
# +-----------------------+
|
||||
#
|
||||
# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
|
||||
# iPOOL1+ePOOL5 are overflow pools.
|
||||
# iPOOL2+ePOOL6 are PFC pools.
|
||||
|
||||
ALL_TESTS="
|
||||
ping_ipv4
|
||||
test_qos_pfc
|
||||
"
|
||||
|
||||
lib_dir=$(dirname $0)/../../../net/forwarding
|
||||
|
||||
NUM_NETIFS=6
|
||||
source $lib_dir/lib.sh
|
||||
source $lib_dir/devlink_lib.sh
|
||||
source qos_lib.sh
|
||||
|
||||
_1KB=1000
|
||||
_100KB=$((100 * _1KB))
|
||||
_500KB=$((500 * _1KB))
|
||||
_1MB=$((1000 * _1KB))
|
||||
_10MB=$((10 * _1MB))
|
||||
|
||||
h1_create()
|
||||
{
|
||||
simple_if_init $h1
|
||||
mtu_set $h1 10000
|
||||
|
||||
vlan_create $h1 111 v$h1 192.0.2.33/28
|
||||
}
|
||||
|
||||
h1_destroy()
|
||||
{
|
||||
vlan_destroy $h1 111
|
||||
|
||||
mtu_restore $h1
|
||||
simple_if_fini $h1
|
||||
}
|
||||
|
||||
h2_create()
|
||||
{
|
||||
simple_if_init $h2
|
||||
mtu_set $h2 10000
|
||||
|
||||
vlan_create $h2 111 v$h2 192.0.2.34/28
|
||||
}
|
||||
|
||||
h2_destroy()
|
||||
{
|
||||
vlan_destroy $h2 111
|
||||
|
||||
mtu_restore $h2
|
||||
simple_if_fini $h2
|
||||
}
|
||||
|
||||
switch_create()
|
||||
{
|
||||
# pools
|
||||
# -----
|
||||
|
||||
devlink_pool_size_thtype_save 0
|
||||
devlink_pool_size_thtype_save 4
|
||||
devlink_pool_size_thtype_save 1
|
||||
devlink_pool_size_thtype_save 5
|
||||
devlink_pool_size_thtype_save 2
|
||||
devlink_pool_size_thtype_save 6
|
||||
|
||||
devlink_port_pool_th_save $swp1 1
|
||||
devlink_port_pool_th_save $swp2 6
|
||||
devlink_port_pool_th_save $swp3 5
|
||||
devlink_port_pool_th_save $swp4 2
|
||||
|
||||
devlink_tc_bind_pool_th_save $swp1 1 ingress
|
||||
devlink_tc_bind_pool_th_save $swp2 1 egress
|
||||
devlink_tc_bind_pool_th_save $swp3 1 egress
|
||||
devlink_tc_bind_pool_th_save $swp4 1 ingress
|
||||
|
||||
# Control traffic pools. Just reduce the size. Keep them dynamic so that
|
||||
# we don't need to change all the uninteresting quotas.
|
||||
devlink_pool_size_thtype_set 0 dynamic $_500KB
|
||||
devlink_pool_size_thtype_set 4 dynamic $_500KB
|
||||
|
||||
# Overflow pools.
|
||||
devlink_pool_size_thtype_set 1 static $_10MB
|
||||
devlink_pool_size_thtype_set 5 static $_10MB
|
||||
|
||||
# PFC pools. As per the writ, the size of egress PFC pool should be
|
||||
# infinice, but actually it just needs to be large enough to not matter
|
||||
# in practice, so reuse the 10MB limit.
|
||||
devlink_pool_size_thtype_set 2 static $_1MB
|
||||
devlink_pool_size_thtype_set 6 static $_10MB
|
||||
|
||||
# $swp1
|
||||
# -----
|
||||
|
||||
ip link set dev $swp1 up
|
||||
mtu_set $swp1 10000
|
||||
vlan_create $swp1 111
|
||||
ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
|
||||
|
||||
devlink_port_pool_th_set $swp1 1 $_10MB
|
||||
devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
|
||||
|
||||
# Configure qdisc so that we can configure PG and therefore pool
|
||||
# assignment.
|
||||
tc qdisc replace dev $swp1 root handle 1: \
|
||||
ets bands 8 strict 8 priomap 7 6
|
||||
__mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
|
||||
|
||||
# $swp2
|
||||
# -----
|
||||
|
||||
ip link set dev $swp2 up
|
||||
mtu_set $swp2 10000
|
||||
vlan_create $swp2 111
|
||||
ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
|
||||
|
||||
devlink_port_pool_th_set $swp2 6 $_10MB
|
||||
devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
|
||||
|
||||
# prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
|
||||
tc qdisc replace dev $swp2 root handle 1: \
|
||||
ets bands 8 strict 8 priomap 7 6
|
||||
tc qdisc replace dev $swp2 parent 1:7 handle 17: \
|
||||
tbf rate 200Mbit burst 131072 limit 1M
|
||||
|
||||
# $swp3
|
||||
# -----
|
||||
|
||||
ip link set dev $swp3 up
|
||||
mtu_set $swp3 10000
|
||||
vlan_create $swp3 111
|
||||
ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
|
||||
|
||||
devlink_port_pool_th_set $swp3 5 $_10MB
|
||||
devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
|
||||
|
||||
# prio 0->TC0 (band 7), 1->TC1 (band 6)
|
||||
tc qdisc replace dev $swp3 root handle 1: \
|
||||
ets bands 8 strict 8 priomap 7 6
|
||||
|
||||
# Need to enable PFC so that PAUSE takes effect. Therefore need to put
|
||||
# the lossless prio into a buffer of its own. Don't bother with buffer
|
||||
# sizes though, there is not going to be any pressure in the "backward"
|
||||
# direction.
|
||||
__mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
|
||||
__mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
|
||||
|
||||
# $swp4
|
||||
# -----
|
||||
|
||||
ip link set dev $swp4 up
|
||||
mtu_set $swp4 10000
|
||||
vlan_create $swp4 111
|
||||
ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
|
||||
|
||||
devlink_port_pool_th_set $swp4 2 $_1MB
|
||||
devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
|
||||
|
||||
# Configure qdisc so that we can hand-tune headroom.
|
||||
tc qdisc replace dev $swp4 root handle 1: \
|
||||
ets bands 8 strict 8 priomap 7 6
|
||||
__mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
|
||||
__mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
|
||||
# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
|
||||
# is (-2*MTU) about 80K of delay provision.
|
||||
__mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
|
||||
|
||||
# bridges
|
||||
# -------
|
||||
|
||||
ip link add name br1 type bridge vlan_filtering 0
|
||||
ip link set dev $swp1.111 master br1
|
||||
ip link set dev $swp3.111 master br1
|
||||
ip link set dev br1 up
|
||||
|
||||
ip link add name br2 type bridge vlan_filtering 0
|
||||
ip link set dev $swp2.111 master br2
|
||||
ip link set dev $swp4.111 master br2
|
||||
ip link set dev br2 up
|
||||
}
|
||||
|
||||
switch_destroy()
|
||||
{
|
||||
# Do this first so that we can reset the limits to values that are only
|
||||
# valid for the original static / dynamic setting.
|
||||
devlink_pool_size_thtype_restore 6
|
||||
devlink_pool_size_thtype_restore 5
|
||||
devlink_pool_size_thtype_restore 4
|
||||
devlink_pool_size_thtype_restore 2
|
||||
devlink_pool_size_thtype_restore 1
|
||||
devlink_pool_size_thtype_restore 0
|
||||
|
||||
# bridges
|
||||
# -------
|
||||
|
||||
ip link set dev br2 down
|
||||
ip link set dev $swp4.111 nomaster
|
||||
ip link set dev $swp2.111 nomaster
|
||||
ip link del dev br2
|
||||
|
||||
ip link set dev br1 down
|
||||
ip link set dev $swp3.111 nomaster
|
||||
ip link set dev $swp1.111 nomaster
|
||||
ip link del dev br1
|
||||
|
||||
# $swp4
|
||||
# -----
|
||||
|
||||
__mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
|
||||
__mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
|
||||
__mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
|
||||
tc qdisc del dev $swp4 root
|
||||
|
||||
devlink_tc_bind_pool_th_restore $swp4 1 ingress
|
||||
devlink_port_pool_th_restore $swp4 2
|
||||
|
||||
vlan_destroy $swp4 111
|
||||
mtu_restore $swp4
|
||||
ip link set dev $swp4 down
|
||||
|
||||
# $swp3
|
||||
# -----
|
||||
|
||||
__mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
|
||||
__mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
|
||||
tc qdisc del dev $swp3 root
|
||||
|
||||
devlink_tc_bind_pool_th_restore $swp3 1 egress
|
||||
devlink_port_pool_th_restore $swp3 5
|
||||
|
||||
vlan_destroy $swp3 111
|
||||
mtu_restore $swp3
|
||||
ip link set dev $swp3 down
|
||||
|
||||
# $swp2
|
||||
# -----
|
||||
|
||||
tc qdisc del dev $swp2 parent 1:7
|
||||
tc qdisc del dev $swp2 root
|
||||
|
||||
devlink_tc_bind_pool_th_restore $swp2 1 egress
|
||||
devlink_port_pool_th_restore $swp2 6
|
||||
|
||||
vlan_destroy $swp2 111
|
||||
mtu_restore $swp2
|
||||
ip link set dev $swp2 down
|
||||
|
||||
# $swp1
|
||||
# -----
|
||||
|
||||
__mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
|
||||
tc qdisc del dev $swp1 root
|
||||
|
||||
devlink_tc_bind_pool_th_restore $swp1 1 ingress
|
||||
devlink_port_pool_th_restore $swp1 1
|
||||
|
||||
vlan_destroy $swp1 111
|
||||
mtu_restore $swp1
|
||||
ip link set dev $swp1 down
|
||||
}
|
||||
|
||||
setup_prepare()
|
||||
{
|
||||
h1=${NETIFS[p1]}
|
||||
swp1=${NETIFS[p2]}
|
||||
|
||||
swp2=${NETIFS[p3]}
|
||||
h2=${NETIFS[p4]}
|
||||
|
||||
swp3=${NETIFS[p5]}
|
||||
swp4=${NETIFS[p6]}
|
||||
|
||||
h2mac=$(mac_get $h2)
|
||||
|
||||
vrf_prepare
|
||||
|
||||
h1_create
|
||||
h2_create
|
||||
switch_create
|
||||
}
|
||||
|
||||
cleanup()
|
||||
{
|
||||
pre_cleanup
|
||||
|
||||
switch_destroy
|
||||
h2_destroy
|
||||
h1_destroy
|
||||
|
||||
vrf_cleanup
|
||||
}
|
||||
|
||||
ping_ipv4()
|
||||
{
|
||||
ping_test $h1 192.0.2.34
|
||||
}
|
||||
|
||||
test_qos_pfc()
|
||||
{
|
||||
RET=0
|
||||
|
||||
# 10M pool, each packet is 8K of payload + headers
|
||||
local pkts=$((_10MB / 8050))
|
||||
local size=$((pkts * 8050))
|
||||
local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
|
||||
local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
|
||||
|
||||
$MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
|
||||
-a own -b $h2mac -c $pkts -t udp -q
|
||||
sleep 2
|
||||
|
||||
local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
|
||||
local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
|
||||
|
||||
local din=$((in1 - in0))
|
||||
local dout=$((out1 - out0))
|
||||
|
||||
local pct_in=$((din * 100 / size))
|
||||
|
||||
((pct_in > 95 && pct_in < 105))
|
||||
check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
|
||||
|
||||
((dout == din))
|
||||
check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
|
||||
|
||||
log_test "PFC"
|
||||
}
|
||||
|
||||
trap cleanup EXIT
|
||||
|
||||
bail_on_lldpad
|
||||
setup_prepare
|
||||
setup_wait
|
||||
tests_run
|
||||
|
||||
exit $EXIT_STATUS
|
@ -27,11 +27,17 @@ switch_create()
|
||||
# amount of traffic that is admitted to the shared buffers. This makes
|
||||
# sure that there is always enough traffic of all types to select from
|
||||
# for the DWRR process.
|
||||
devlink_port_pool_th_save $swp1 0
|
||||
devlink_port_pool_th_set $swp1 0 12
|
||||
devlink_tc_bind_pool_th_save $swp1 0 ingress
|
||||
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
|
||||
devlink_port_pool_th_save $swp2 4
|
||||
devlink_port_pool_th_set $swp2 4 12
|
||||
devlink_tc_bind_pool_th_save $swp2 7 egress
|
||||
devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
|
||||
devlink_tc_bind_pool_th_save $swp2 6 egress
|
||||
devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
|
||||
devlink_tc_bind_pool_th_save $swp2 5 egress
|
||||
devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
|
||||
|
||||
# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
|
||||
|
@ -208,6 +208,7 @@ switch_create()
|
||||
ip link set dev br2_11 up
|
||||
|
||||
local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
|
||||
devlink_port_pool_th_save $swp3 8
|
||||
devlink_port_pool_th_set $swp3 8 $size
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
# Defines
|
||||
|
||||
if [[ ! -v DEVLINK_DEV ]]; then
|
||||
DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
|
||||
DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \
|
||||
| jq -r '.port | keys[]' | cut -d/ -f-2)
|
||||
if [ -z "$DEVLINK_DEV" ]; then
|
||||
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
|
||||
@ -117,6 +117,12 @@ devlink_reload()
|
||||
|
||||
declare -A DEVLINK_ORIG
|
||||
|
||||
# Changing pool type from static to dynamic causes reinterpretation of threshold
|
||||
# values. They therefore need to be saved before pool type is changed, then the
|
||||
# pool type can be changed, and then the new values need to be set up. Therefore
|
||||
# instead of saving the current state implicitly in the _set call, provide
|
||||
# functions for all three primitives: save, set, and restore.
|
||||
|
||||
devlink_port_pool_threshold()
|
||||
{
|
||||
local port=$1; shift
|
||||
@ -126,14 +132,21 @@ devlink_port_pool_threshold()
|
||||
| jq '.port_pool."'"$port"'"[].threshold'
|
||||
}
|
||||
|
||||
devlink_port_pool_th_save()
|
||||
{
|
||||
local port=$1; shift
|
||||
local pool=$1; shift
|
||||
local key="port_pool($port,$pool).threshold"
|
||||
|
||||
DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
|
||||
}
|
||||
|
||||
devlink_port_pool_th_set()
|
||||
{
|
||||
local port=$1; shift
|
||||
local pool=$1; shift
|
||||
local th=$1; shift
|
||||
local key="port_pool($port,$pool).threshold"
|
||||
|
||||
DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
|
||||
devlink sb port pool set $port pool $pool th $th
|
||||
}
|
||||
|
||||
@ -142,8 +155,13 @@ devlink_port_pool_th_restore()
|
||||
local port=$1; shift
|
||||
local pool=$1; shift
|
||||
local key="port_pool($port,$pool).threshold"
|
||||
local -a orig=(${DEVLINK_ORIG[$key]})
|
||||
|
||||
devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
|
||||
if [[ -z $orig ]]; then
|
||||
echo "WARNING: Mismatched devlink_port_pool_th_restore"
|
||||
else
|
||||
devlink sb port pool set $port pool $pool th $orig
|
||||
fi
|
||||
}
|
||||
|
||||
devlink_pool_size_thtype()
|
||||
@ -154,14 +172,20 @@ devlink_pool_size_thtype()
|
||||
| jq -r '.pool[][] | (.size, .thtype)'
|
||||
}
|
||||
|
||||
devlink_pool_size_thtype_save()
|
||||
{
|
||||
local pool=$1; shift
|
||||
local key="pool($pool).size_thtype"
|
||||
|
||||
DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
|
||||
}
|
||||
|
||||
devlink_pool_size_thtype_set()
|
||||
{
|
||||
local pool=$1; shift
|
||||
local thtype=$1; shift
|
||||
local size=$1; shift
|
||||
local key="pool($pool).size_thtype"
|
||||
|
||||
DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
|
||||
devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
|
||||
}
|
||||
|
||||
@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore()
|
||||
local key="pool($pool).size_thtype"
|
||||
local -a orig=(${DEVLINK_ORIG[$key]})
|
||||
|
||||
devlink sb pool set "$DEVLINK_DEV" pool $pool \
|
||||
size ${orig[0]} thtype ${orig[1]}
|
||||
if [[ -z ${orig[0]} ]]; then
|
||||
echo "WARNING: Mismatched devlink_pool_size_thtype_restore"
|
||||
else
|
||||
devlink sb pool set "$DEVLINK_DEV" pool $pool \
|
||||
size ${orig[0]} thtype ${orig[1]}
|
||||
fi
|
||||
}
|
||||
|
||||
devlink_tc_bind_pool_th()
|
||||
@ -185,6 +213,16 @@ devlink_tc_bind_pool_th()
|
||||
| jq -r '.tc_bind[][] | (.pool, .threshold)'
|
||||
}
|
||||
|
||||
devlink_tc_bind_pool_th_save()
|
||||
{
|
||||
local port=$1; shift
|
||||
local tc=$1; shift
|
||||
local dir=$1; shift
|
||||
local key="tc_bind($port,$dir,$tc).pool_th"
|
||||
|
||||
DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
|
||||
}
|
||||
|
||||
devlink_tc_bind_pool_th_set()
|
||||
{
|
||||
local port=$1; shift
|
||||
@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set()
|
||||
local dir=$1; shift
|
||||
local pool=$1; shift
|
||||
local th=$1; shift
|
||||
local key="tc_bind($port,$dir,$tc).pool_th"
|
||||
|
||||
DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
|
||||
devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
|
||||
}
|
||||
|
||||
@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore()
|
||||
local key="tc_bind($port,$dir,$tc).pool_th"
|
||||
local -a orig=(${DEVLINK_ORIG[$key]})
|
||||
|
||||
devlink sb tc bind set $port tc $tc type $dir \
|
||||
pool ${orig[0]} th ${orig[1]}
|
||||
if [[ -z ${orig[0]} ]]; then
|
||||
echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore"
|
||||
else
|
||||
devlink sb tc bind set $port tc $tc type $dir \
|
||||
pool ${orig[0]} th ${orig[1]}
|
||||
fi
|
||||
}
|
||||
|
||||
devlink_traps_num_get()
|
||||
@ -509,3 +549,9 @@ devlink_cpu_port_get()
|
||||
|
||||
echo "$DEVLINK_DEV/$cpu_dl_port_num"
|
||||
}
|
||||
|
||||
devlink_cell_size_get()
|
||||
{
|
||||
devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
|
||||
| jq '.pool[][].cell_size'
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user