Merge branch 'ipv6-Refactor-nexthop-selection-helpers-during-a-fib-lookup'

David Ahern says:

====================
ipv6: Refactor nexthop selection helpers during a fib lookup

IPv6 has a fib6_nh embedded within each fib6_info and a separate
fib6_info for each path in a multipath route. A side effect is that
a fib6_info is passed all the way down the stack when selecting a path
on a fib lookup. Refactor the fib lookup functions and associated
helper functions to take a fib6_nh when appropriate to enable IPv6
to work with nexthop objects where the fib6_nh is not directly part
of a fib entry.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
David S. Miller 2019-04-11 14:24:07 -07:00
commit 78f07adac8
2 changed files with 151 additions and 127 deletions

View File

@ -127,6 +127,10 @@ struct rt6_exception {
struct fib6_nh { struct fib6_nh {
struct fib_nh_common nh_common; struct fib_nh_common nh_common;
#ifdef CONFIG_IPV6_ROUTER_PREF
unsigned long last_probe;
#endif
}; };
struct fib6_info { struct fib6_info {
@ -155,10 +159,6 @@ struct fib6_info {
struct rt6_info * __percpu *rt6i_pcpu; struct rt6_info * __percpu *rt6i_pcpu;
struct rt6_exception_bucket __rcu *rt6i_exception_bucket; struct rt6_exception_bucket __rcu *rt6i_exception_bucket;
#ifdef CONFIG_IPV6_ROUTER_PREF
unsigned long last_probe;
#endif
u32 fib6_metric; u32 fib6_metric;
u8 fib6_protocol; u8 fib6_protocol;
u8 fib6_type; u8 fib6_type;

View File

@ -102,7 +102,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu); struct sk_buff *skb, u32 mtu);
static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb); struct sk_buff *skb);
static int rt6_score_route(struct fib6_info *rt, int oif, int strict); static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
int strict);
static size_t rt6_nlmsg_size(struct fib6_info *rt); static size_t rt6_nlmsg_size(struct fib6_info *rt);
static int rt6_fill_node(struct net *net, struct sk_buff *skb, static int rt6_fill_node(struct net *net, struct sk_buff *skb,
struct fib6_info *rt, struct dst_entry *dst, struct fib6_info *rt, struct dst_entry *dst,
@ -446,12 +447,13 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings,
fib6_siblings) { fib6_siblings) {
const struct fib6_nh *nh = &sibling->fib6_nh;
int nh_upper_bound; int nh_upper_bound;
nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound); nh_upper_bound = atomic_read(&nh->fib_nh_upper_bound);
if (fl6->mp_hash > nh_upper_bound) if (fl6->mp_hash > nh_upper_bound)
continue; continue;
if (rt6_score_route(sibling, oif, strict) < 0) if (rt6_score_route(nh, sibling->fib6_flags, oif, strict) < 0)
break; break;
match = sibling; match = sibling;
break; break;
@ -464,12 +466,34 @@ struct fib6_info *fib6_multipath_select(const struct net *net,
* Route lookup. rcu_read_lock() should be held. * Route lookup. rcu_read_lock() should be held.
*/ */
static bool __rt6_device_match(struct net *net, const struct fib6_nh *nh,
const struct in6_addr *saddr, int oif, int flags)
{
const struct net_device *dev;
if (nh->fib_nh_flags & RTNH_F_DEAD)
return false;
dev = nh->fib_nh_dev;
if (oif) {
if (dev->ifindex == oif)
return true;
} else {
if (ipv6_chk_addr(net, saddr, dev,
flags & RT6_LOOKUP_F_IFACE))
return true;
}
return false;
}
static inline struct fib6_info *rt6_device_match(struct net *net, static inline struct fib6_info *rt6_device_match(struct net *net,
struct fib6_info *rt, struct fib6_info *rt,
const struct in6_addr *saddr, const struct in6_addr *saddr,
int oif, int oif,
int flags) int flags)
{ {
const struct fib6_nh *nh;
struct fib6_info *sprt; struct fib6_info *sprt;
if (!oif && ipv6_addr_any(saddr) && if (!oif && ipv6_addr_any(saddr) &&
@ -477,19 +501,9 @@ static inline struct fib6_info *rt6_device_match(struct net *net,
return rt; return rt;
for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) {
const struct net_device *dev = sprt->fib6_nh.fib_nh_dev; nh = &sprt->fib6_nh;
if (__rt6_device_match(net, nh, saddr, oif, flags))
if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) return sprt;
continue;
if (oif) {
if (dev->ifindex == oif)
return sprt;
} else {
if (ipv6_chk_addr(net, saddr, dev,
flags & RT6_LOOKUP_F_IFACE))
return sprt;
}
} }
if (oif && flags & RT6_LOOKUP_F_IFACE) if (oif && flags & RT6_LOOKUP_F_IFACE)
@ -517,7 +531,7 @@ static void rt6_probe_deferred(struct work_struct *w)
kfree(work); kfree(work);
} }
static void rt6_probe(struct fib6_info *rt) static void rt6_probe(struct fib6_nh *fib6_nh)
{ {
struct __rt6_probe_work *work = NULL; struct __rt6_probe_work *work = NULL;
const struct in6_addr *nh_gw; const struct in6_addr *nh_gw;
@ -533,11 +547,11 @@ static void rt6_probe(struct fib6_info *rt)
* Router Reachability Probe MUST be rate-limited * Router Reachability Probe MUST be rate-limited
* to no more than one per minute. * to no more than one per minute.
*/ */
if (!rt || !rt->fib6_nh.fib_nh_gw_family) if (fib6_nh->fib_nh_gw_family)
return; return;
nh_gw = &rt->fib6_nh.fib_nh_gw6; nh_gw = &fib6_nh->fib_nh_gw6;
dev = rt->fib6_nh.fib_nh_dev; dev = fib6_nh->fib_nh_dev;
rcu_read_lock_bh(); rcu_read_lock_bh();
idev = __in6_dev_get(dev); idev = __in6_dev_get(dev);
neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
@ -554,13 +568,13 @@ static void rt6_probe(struct fib6_info *rt)
__neigh_set_probe_once(neigh); __neigh_set_probe_once(neigh);
} }
write_unlock(&neigh->lock); write_unlock(&neigh->lock);
} else if (time_after(jiffies, rt->last_probe + } else if (time_after(jiffies, fib6_nh->last_probe +
idev->cnf.rtr_probe_interval)) { idev->cnf.rtr_probe_interval)) {
work = kmalloc(sizeof(*work), GFP_ATOMIC); work = kmalloc(sizeof(*work), GFP_ATOMIC);
} }
if (work) { if (work) {
rt->last_probe = jiffies; fib6_nh->last_probe = jiffies;
INIT_WORK(&work->work, rt6_probe_deferred); INIT_WORK(&work->work, rt6_probe_deferred);
work->target = *nh_gw; work->target = *nh_gw;
dev_hold(dev); dev_hold(dev);
@ -572,7 +586,7 @@ out:
rcu_read_unlock_bh(); rcu_read_unlock_bh();
} }
#else #else
static inline void rt6_probe(struct fib6_info *rt) static inline void rt6_probe(struct fib6_nh *fib6_nh)
{ {
} }
#endif #endif
@ -580,27 +594,14 @@ static inline void rt6_probe(struct fib6_info *rt)
/* /*
* Default Router Selection (RFC 2461 6.3.6) * Default Router Selection (RFC 2461 6.3.6)
*/ */
static inline int rt6_check_dev(struct fib6_info *rt, int oif) static enum rt6_nud_state rt6_check_neigh(const struct fib6_nh *fib6_nh)
{
const struct net_device *dev = rt->fib6_nh.fib_nh_dev;
if (!oif || dev->ifindex == oif)
return 2;
return 0;
}
static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
{ {
enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
struct neighbour *neigh; struct neighbour *neigh;
if (rt->fib6_flags & RTF_NONEXTHOP ||
!rt->fib6_nh.fib_nh_gw_family)
return RT6_NUD_SUCCEED;
rcu_read_lock_bh(); rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev, neigh = __ipv6_neigh_lookup_noref(fib6_nh->fib_nh_dev,
&rt->fib6_nh.fib_nh_gw6); &fib6_nh->fib_nh_gw6);
if (neigh) { if (neigh) {
read_lock(&neigh->lock); read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID) if (neigh->nud_state & NUD_VALID)
@ -621,43 +622,44 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
return ret; return ret;
} }
static int rt6_score_route(struct fib6_info *rt, int oif, int strict) static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif,
int strict)
{ {
int m; int m = 0;
if (!oif || nh->fib_nh_dev->ifindex == oif)
m = 2;
m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE)) if (!m && (strict & RT6_LOOKUP_F_IFACE))
return RT6_NUD_FAIL_HARD; return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF #ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->fib6_flags)) << 2; m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(fib6_flags)) << 2;
#endif #endif
if (strict & RT6_LOOKUP_F_REACHABLE) { if ((strict & RT6_LOOKUP_F_REACHABLE) &&
int n = rt6_check_neigh(rt); !(fib6_flags & RTF_NONEXTHOP) && nh->fib_nh_gw_family) {
int n = rt6_check_neigh(nh);
if (n < 0) if (n < 0)
return n; return n;
} }
return m; return m;
} }
static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, static bool find_match(struct fib6_nh *nh, u32 fib6_flags,
int *mpri, struct fib6_info *match, int oif, int strict, int *mpri, bool *do_rr)
bool *do_rr)
{ {
int m;
bool match_do_rr = false; bool match_do_rr = false;
bool rc = false;
int m;
if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) if (nh->fib_nh_flags & RTNH_F_DEAD)
goto out; goto out;
if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) && if (ip6_ignore_linkdown(nh->fib_nh_dev) &&
rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && nh->fib_nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out; goto out;
if (fib6_check_expired(rt)) m = rt6_score_route(nh, fib6_flags, oif, strict);
goto out;
m = rt6_score_route(rt, oif, strict);
if (m == RT6_NUD_FAIL_DO_RR) { if (m == RT6_NUD_FAIL_DO_RR) {
match_do_rr = true; match_do_rr = true;
m = 0; /* lowest valid score */ m = 0; /* lowest valid score */
@ -666,53 +668,64 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict,
} }
if (strict & RT6_LOOKUP_F_REACHABLE) if (strict & RT6_LOOKUP_F_REACHABLE)
rt6_probe(rt); rt6_probe(nh);
/* note that m can be RT6_NUD_FAIL_PROBE at this point */ /* note that m can be RT6_NUD_FAIL_PROBE at this point */
if (m > *mpri) { if (m > *mpri) {
*do_rr = match_do_rr; *do_rr = match_do_rr;
*mpri = m; *mpri = m;
match = rt; rc = true;
} }
out: out:
return match; return rc;
}
static void __find_rr_leaf(struct fib6_info *rt_start,
struct fib6_info *nomatch, u32 metric,
struct fib6_info **match, struct fib6_info **cont,
int oif, int strict, bool *do_rr, int *mpri)
{
struct fib6_info *rt;
for (rt = rt_start;
rt && rt != nomatch;
rt = rcu_dereference(rt->fib6_next)) {
struct fib6_nh *nh;
if (cont && rt->fib6_metric != metric) {
*cont = rt;
return;
}
if (fib6_check_expired(rt))
continue;
nh = &rt->fib6_nh;
if (find_match(nh, rt->fib6_flags, oif, strict, mpri, do_rr))
*match = rt;
}
} }
static struct fib6_info *find_rr_leaf(struct fib6_node *fn, static struct fib6_info *find_rr_leaf(struct fib6_node *fn,
struct fib6_info *leaf, struct fib6_info *leaf,
struct fib6_info *rr_head, struct fib6_info *rr_head,
u32 metric, int oif, int strict, u32 metric, int oif, int strict,
bool *do_rr) bool *do_rr)
{ {
struct fib6_info *rt, *match, *cont; struct fib6_info *match = NULL, *cont = NULL;
int mpri = -1; int mpri = -1;
match = NULL; __find_rr_leaf(rr_head, NULL, metric, &match, &cont,
cont = NULL; oif, strict, do_rr, &mpri);
for (rt = rr_head; rt; rt = rcu_dereference(rt->fib6_next)) {
if (rt->fib6_metric != metric) {
cont = rt;
break;
}
match = find_match(rt, oif, strict, &mpri, match, do_rr); __find_rr_leaf(leaf, rr_head, metric, &match, &cont,
} oif, strict, do_rr, &mpri);
for (rt = leaf; rt && rt != rr_head;
rt = rcu_dereference(rt->fib6_next)) {
if (rt->fib6_metric != metric) {
cont = rt;
break;
}
match = find_match(rt, oif, strict, &mpri, match, do_rr);
}
if (match || !cont) if (match || !cont)
return match; return match;
for (rt = cont; rt; rt = rcu_dereference(rt->fib6_next)) __find_rr_leaf(cont, NULL, metric, &match, NULL,
match = find_match(rt, oif, strict, &mpri, match, do_rr); oif, strict, do_rr, &mpri);
return match; return match;
} }
@ -1061,36 +1074,37 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart: restart:
f6i = rcu_dereference(fn->leaf); f6i = rcu_dereference(fn->leaf);
if (!f6i) { if (!f6i)
f6i = net->ipv6.fib6_null_entry; f6i = net->ipv6.fib6_null_entry;
} else { else
f6i = rt6_device_match(net, f6i, &fl6->saddr, f6i = rt6_device_match(net, f6i, &fl6->saddr,
fl6->flowi6_oif, flags); fl6->flowi6_oif, flags);
if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
f6i = fib6_multipath_select(net, f6i, fl6,
fl6->flowi6_oif, skb,
flags);
}
if (f6i == net->ipv6.fib6_null_entry) { if (f6i == net->ipv6.fib6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr); fn = fib6_backtrack(fn, &fl6->saddr);
if (fn) if (fn)
goto restart; goto restart;
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
goto out;
} }
trace_fib6_table_lookup(net, f6i, table, fl6); if (f6i->fib6_nsiblings && fl6->flowi6_oif == 0)
f6i = fib6_multipath_select(net, f6i, fl6, fl6->flowi6_oif, skb,
flags);
/* Search through exception table */ /* Search through exception table */
rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt) { if (rt) {
if (ip6_hold_safe(net, &rt)) if (ip6_hold_safe(net, &rt))
dst_use_noref(&rt->dst, jiffies); dst_use_noref(&rt->dst, jiffies);
} else if (f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry;
dst_hold(&rt->dst);
} else { } else {
rt = ip6_create_rt_rcu(f6i); rt = ip6_create_rt_rcu(f6i);
} }
out:
trace_fib6_table_lookup(net, f6i, table, fl6);
rcu_read_unlock(); rcu_read_unlock();
return rt; return rt;
@ -1841,9 +1855,6 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
rcu_read_lock(); rcu_read_lock();
f6i = fib6_table_lookup(net, table, oif, fl6, strict); f6i = fib6_table_lookup(net, table, oif, fl6, strict);
if (f6i->fib6_nsiblings)
f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
if (f6i == net->ipv6.fib6_null_entry) { if (f6i == net->ipv6.fib6_null_entry) {
rt = net->ipv6.ip6_null_entry; rt = net->ipv6.ip6_null_entry;
rcu_read_unlock(); rcu_read_unlock();
@ -1851,6 +1862,9 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
return rt; return rt;
} }
if (f6i->fib6_nsiblings)
f6i = fib6_multipath_select(net, f6i, fl6, oif, skb, strict);
/*Search through exception table */ /*Search through exception table */
rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr); rt = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt) { if (rt) {
@ -2393,6 +2407,35 @@ void ip6_sk_dst_store_flow(struct sock *sk, struct dst_entry *dst,
NULL); NULL);
} }
static bool ip6_redirect_nh_match(struct fib6_info *f6i,
struct fib6_nh *nh,
struct flowi6 *fl6,
const struct in6_addr *gw,
struct rt6_info **ret)
{
if (nh->fib_nh_flags & RTNH_F_DEAD || !nh->fib_nh_gw_family ||
fl6->flowi6_oif != nh->fib_nh_dev->ifindex)
return false;
/* rt_cache's gateway might be different from its 'parent'
* in the case of an ip redirect.
* So we keep searching in the exception table if the gateway
* is different.
*/
if (!ipv6_addr_equal(gw, &nh->fib_nh_gw6)) {
struct rt6_info *rt_cache;
rt_cache = rt6_find_cached_rt(f6i, &fl6->daddr, &fl6->saddr);
if (rt_cache &&
ipv6_addr_equal(gw, &rt_cache->rt6i_gateway)) {
*ret = rt_cache;
return true;
}
return false;
}
return true;
}
/* Handle redirects */ /* Handle redirects */
struct ip6rd_flowi { struct ip6rd_flowi {
struct flowi6 fl6; struct flowi6 fl6;
@ -2406,7 +2449,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
int flags) int flags)
{ {
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
struct rt6_info *ret = NULL, *rt_cache; struct rt6_info *ret = NULL;
struct fib6_info *rt; struct fib6_info *rt;
struct fib6_node *fn; struct fib6_node *fn;
@ -2424,34 +2467,15 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart: restart:
for_each_fib6_node_rt_rcu(fn) { for_each_fib6_node_rt_rcu(fn) {
if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)
continue;
if (fib6_check_expired(rt)) if (fib6_check_expired(rt))
continue; continue;
if (rt->fib6_flags & RTF_REJECT) if (rt->fib6_flags & RTF_REJECT)
break; break;
if (!rt->fib6_nh.fib_nh_gw_family)
continue;
if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex) if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex)
continue; continue;
/* rt_cache's gateway might be different from its 'parent' if (ip6_redirect_nh_match(rt, &rt->fib6_nh, fl6,
* in the case of an ip redirect. &rdfl->gateway, &ret))
* So we keep searching in the exception table if the gateway goto out;
* is different.
*/
if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) {
rt_cache = rt6_find_cached_rt(rt,
&fl6->daddr,
&fl6->saddr);
if (rt_cache &&
ipv6_addr_equal(&rdfl->gateway,
&rt_cache->rt6i_gateway)) {
ret = rt_cache;
break;
}
continue;
}
break;
} }
if (!rt) if (!rt)