Merge branch 'nexthop-refactor-and-fix-nexthop-selection-for-multipath-routes'
Benjamin Poirier says: ==================== nexthop: Refactor and fix nexthop selection for multipath routes In order to select a nexthop for multipath routes, fib_select_multipath() is used with legacy nexthops and nexthop_select_path_hthr() is used with nexthop objects. Those two functions perform a validity test on the neighbor related to each nexthop but their logic is structured differently. This causes a divergence in behavior and nexthop_select_path_hthr() may return a nexthop that failed the neighbor validity test even if there was one that passed. Refactor nexthop_select_path_hthr() to make it more similar to fib_select_multipath() and fix the problem mentioned above. v1: https://lore.kernel.org/netdev/20230529201914.69828-1-bpoirier@nvidia.com/ ==================== Link: https://lore.kernel.org/r/20230719-nh_select-v2-0-04383e89f868@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
bf837e8f7d
@ -1152,41 +1152,64 @@ static bool ipv4_good_nh(const struct fib_nh *nh)
|
||||
return !!(state & NUD_VALID);
|
||||
}
|
||||
|
||||
static bool nexthop_is_good_nh(const struct nexthop *nh)
|
||||
{
|
||||
struct nh_info *nhi = rcu_dereference(nh->nh_info);
|
||||
|
||||
switch (nhi->family) {
|
||||
case AF_INET:
|
||||
return ipv4_good_nh(&nhi->fib_nh);
|
||||
case AF_INET6:
|
||||
return ipv6_good_nh(&nhi->fib6_nh);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct nexthop *nexthop_select_path_fdb(struct nh_group *nhg, int hash)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nhg->num_nh; i++) {
|
||||
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
|
||||
|
||||
if (hash > atomic_read(&nhge->hthr.upper_bound))
|
||||
continue;
|
||||
|
||||
return nhge->nh;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct nexthop *nexthop_select_path_hthr(struct nh_group *nhg, int hash)
|
||||
{
|
||||
struct nexthop *rc = NULL;
|
||||
int i;
|
||||
|
||||
if (nhg->fdb_nh)
|
||||
return nexthop_select_path_fdb(nhg, hash);
|
||||
|
||||
for (i = 0; i < nhg->num_nh; ++i) {
|
||||
struct nh_grp_entry *nhge = &nhg->nh_entries[i];
|
||||
struct nh_info *nhi;
|
||||
|
||||
if (hash > atomic_read(&nhge->hthr.upper_bound))
|
||||
continue;
|
||||
|
||||
nhi = rcu_dereference(nhge->nh->nh_info);
|
||||
if (nhi->fdb_nh)
|
||||
return nhge->nh;
|
||||
|
||||
/* nexthops always check if it is good and does
|
||||
* not rely on a sysctl for this behavior
|
||||
*/
|
||||
switch (nhi->family) {
|
||||
case AF_INET:
|
||||
if (ipv4_good_nh(&nhi->fib_nh))
|
||||
return nhge->nh;
|
||||
break;
|
||||
case AF_INET6:
|
||||
if (ipv6_good_nh(&nhi->fib6_nh))
|
||||
return nhge->nh;
|
||||
break;
|
||||
}
|
||||
if (!nexthop_is_good_nh(nhge->nh))
|
||||
continue;
|
||||
|
||||
if (!rc)
|
||||
rc = nhge->nh;
|
||||
|
||||
if (hash > atomic_read(&nhge->hthr.upper_bound))
|
||||
continue;
|
||||
|
||||
return nhge->nh;
|
||||
}
|
||||
|
||||
return rc;
|
||||
return rc ? : nhg->nh_entries[0].nh;
|
||||
}
|
||||
|
||||
static struct nexthop *nexthop_select_path_res(struct nh_group *nhg, int hash)
|
||||
|
@ -29,6 +29,7 @@ IPV4_TESTS="
|
||||
ipv4_large_res_grp
|
||||
ipv4_compat_mode
|
||||
ipv4_fdb_grp_fcnal
|
||||
ipv4_mpath_select
|
||||
ipv4_torture
|
||||
ipv4_res_torture
|
||||
"
|
||||
@ -42,6 +43,7 @@ IPV6_TESTS="
|
||||
ipv6_large_res_grp
|
||||
ipv6_compat_mode
|
||||
ipv6_fdb_grp_fcnal
|
||||
ipv6_mpath_select
|
||||
ipv6_torture
|
||||
ipv6_res_torture
|
||||
"
|
||||
@ -370,6 +372,27 @@ check_large_res_grp()
|
||||
log_test $? 0 "Dump large (x$buckets) nexthop buckets"
|
||||
}
|
||||
|
||||
get_route_dev()
|
||||
{
|
||||
local pfx="$1"
|
||||
local out
|
||||
|
||||
if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then
|
||||
echo "$out"
|
||||
fi
|
||||
}
|
||||
|
||||
check_route_dev()
|
||||
{
|
||||
local pfx="$1"
|
||||
local expected="$2"
|
||||
local out
|
||||
|
||||
out=$(get_route_dev "$pfx")
|
||||
|
||||
check_output "$out" "$expected"
|
||||
}
|
||||
|
||||
start_ip_monitor()
|
||||
{
|
||||
local mtype=$1
|
||||
@ -575,6 +598,112 @@ ipv4_fdb_grp_fcnal()
|
||||
$IP link del dev vx10
|
||||
}
|
||||
|
||||
ipv4_mpath_select()
|
||||
{
|
||||
local rc dev match h addr
|
||||
|
||||
echo
|
||||
echo "IPv4 multipath selection"
|
||||
echo "------------------------"
|
||||
if [ ! -x "$(command -v jq)" ]; then
|
||||
echo "SKIP: Could not run test; need jq tool"
|
||||
return $ksft_skip
|
||||
fi
|
||||
|
||||
# Use status of existing neighbor entry when determining nexthop for
|
||||
# multipath routes.
|
||||
local -A gws
|
||||
gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2)
|
||||
local -A other_dev
|
||||
other_dev=([veth1]=veth3 [veth3]=veth1)
|
||||
|
||||
run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
|
||||
run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
|
||||
run_cmd "$IP nexthop add id 1001 group 1/2"
|
||||
run_cmd "$IP ro add 172.16.101.0/24 nhid 1001"
|
||||
rc=0
|
||||
for dev in veth1 veth3; do
|
||||
match=0
|
||||
for h in {1..254}; do
|
||||
addr="172.16.101.$h"
|
||||
if [ "$(get_route_dev "$addr")" = "$dev" ]; then
|
||||
match=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
if (( match == 0 )); then
|
||||
echo "SKIP: Did not find a route using device $dev"
|
||||
return $ksft_skip
|
||||
fi
|
||||
run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
|
||||
if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
|
||||
rc=1
|
||||
break
|
||||
fi
|
||||
run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
|
||||
done
|
||||
log_test $rc 0 "Use valid neighbor during multipath selection"
|
||||
|
||||
run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete"
|
||||
run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete"
|
||||
run_cmd "$IP route get 172.16.101.1"
|
||||
# if we did not crash, success
|
||||
log_test $rc 0 "Multipath selection with no valid neighbor"
|
||||
}
|
||||
|
||||
ipv6_mpath_select()
|
||||
{
|
||||
local rc dev match h addr
|
||||
|
||||
echo
|
||||
echo "IPv6 multipath selection"
|
||||
echo "------------------------"
|
||||
if [ ! -x "$(command -v jq)" ]; then
|
||||
echo "SKIP: Could not run test; need jq tool"
|
||||
return $ksft_skip
|
||||
fi
|
||||
|
||||
# Use status of existing neighbor entry when determining nexthop for
|
||||
# multipath routes.
|
||||
local -A gws
|
||||
gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2)
|
||||
local -A other_dev
|
||||
other_dev=([veth1]=veth3 [veth3]=veth1)
|
||||
|
||||
run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
|
||||
run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
|
||||
run_cmd "$IP nexthop add id 1001 group 1/2"
|
||||
run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001"
|
||||
rc=0
|
||||
for dev in veth1 veth3; do
|
||||
match=0
|
||||
for h in {1..65535}; do
|
||||
addr=$(printf "2001:db8:101::%x" $h)
|
||||
if [ "$(get_route_dev "$addr")" = "$dev" ]; then
|
||||
match=1
|
||||
break
|
||||
fi
|
||||
done
|
||||
if (( match == 0 )); then
|
||||
echo "SKIP: Did not find a route using device $dev"
|
||||
return $ksft_skip
|
||||
fi
|
||||
run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
|
||||
if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
|
||||
rc=1
|
||||
break
|
||||
fi
|
||||
run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
|
||||
done
|
||||
log_test $rc 0 "Use valid neighbor during multipath selection"
|
||||
|
||||
run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete"
|
||||
run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete"
|
||||
run_cmd "$IP route get 2001:db8:101::1"
|
||||
# if we did not crash, success
|
||||
log_test $rc 0 "Multipath selection with no valid neighbor"
|
||||
}
|
||||
|
||||
################################################################################
|
||||
# basic operations (add, delete, replace) on nexthops and nexthop groups
|
||||
#
|
||||
|
Loading…
x
Reference in New Issue
Block a user