ipv4: lock mtu in fnhe when received PMTU < net.ipv4.route.min_pmtu
[ Upstream commit d52e5a7e7ca49457dd31fc8b42fb7c0d58a31221 ] Prior to the rework of PMTU information storage in commit 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer."), when a PMTU event advertising a PMTU smaller than net.ipv4.route.min_pmtu was received, we would disable setting the DF flag on packets by locking the MTU metric, and set the PMTU to net.ipv4.route.min_pmtu. Since then, we don't disable DF, and set PMTU to net.ipv4.route.min_pmtu, so the intermediate router that has this link with a small MTU will have to drop the packets. This patch reestablishes pre-2.6.39 behavior by splitting rtable->rt_pmtu into a bitfield with rt_mtu_locked and rt_pmtu. rt_mtu_locked indicates that we shouldn't set the DF bit on that path, and is checked in ip_dont_fragment(). One possible workaround is to set net.ipv4.route.min_pmtu to a value low enough to accommodate the lowest MTU encountered. Fixes: 2c8cec5c10bc ("ipv4: Cache learned PMTU information in inetpeer.") Signed-off-by: Sabrina Dubroca <sd@queasysnail.net> Reviewed-by: Stefano Brivio <sbrivio@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
b29cfb8ae0
commit
119bbaa679
@ -279,6 +279,13 @@ int ip_decrease_ttl(struct iphdr *iph)
|
||||
return --iph->ttl;
|
||||
}
|
||||
|
||||
static inline int ip_mtu_locked(const struct dst_entry *dst)
|
||||
{
|
||||
const struct rtable *rt = (const struct rtable *)dst;
|
||||
|
||||
return rt->rt_mtu_locked || dst_metric_locked(dst, RTAX_MTU);
|
||||
}
|
||||
|
||||
static inline
|
||||
int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
|
||||
{
|
||||
@ -286,7 +293,7 @@ int ip_dont_fragment(const struct sock *sk, const struct dst_entry *dst)
|
||||
|
||||
return pmtudisc == IP_PMTUDISC_DO ||
|
||||
(pmtudisc == IP_PMTUDISC_WANT &&
|
||||
!(dst_metric_locked(dst, RTAX_MTU)));
|
||||
!ip_mtu_locked(dst));
|
||||
}
|
||||
|
||||
static inline bool ip_sk_accept_pmtu(const struct sock *sk)
|
||||
@ -312,7 +319,7 @@ static inline unsigned int ip_dst_mtu_maybe_forward(const struct dst_entry *dst,
|
||||
struct net *net = dev_net(dst->dev);
|
||||
|
||||
if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
|
||||
dst_metric_locked(dst, RTAX_MTU) ||
|
||||
ip_mtu_locked(dst) ||
|
||||
!forwarding)
|
||||
return dst_mtu(dst);
|
||||
|
||||
|
@ -56,6 +56,7 @@ struct fib_nh_exception {
|
||||
int fnhe_genid;
|
||||
__be32 fnhe_daddr;
|
||||
u32 fnhe_pmtu;
|
||||
bool fnhe_mtu_locked;
|
||||
__be32 fnhe_gw;
|
||||
unsigned long fnhe_expires;
|
||||
struct rtable __rcu *fnhe_rth_input;
|
||||
|
@ -64,7 +64,8 @@ struct rtable {
|
||||
__be32 rt_gateway;
|
||||
|
||||
/* Miscellaneous cached information */
|
||||
u32 rt_pmtu;
|
||||
u32 rt_mtu_locked:1,
|
||||
rt_pmtu:31;
|
||||
|
||||
u32 rt_table_id;
|
||||
|
||||
|
@ -612,6 +612,7 @@ static inline u32 fnhe_hashfun(__be32 daddr)
|
||||
static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
|
||||
{
|
||||
rt->rt_pmtu = fnhe->fnhe_pmtu;
|
||||
rt->rt_mtu_locked = fnhe->fnhe_mtu_locked;
|
||||
rt->dst.expires = fnhe->fnhe_expires;
|
||||
|
||||
if (fnhe->fnhe_gw) {
|
||||
@ -622,7 +623,7 @@ static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnh
|
||||
}
|
||||
|
||||
static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
|
||||
u32 pmtu, unsigned long expires)
|
||||
u32 pmtu, bool lock, unsigned long expires)
|
||||
{
|
||||
struct fnhe_hash_bucket *hash;
|
||||
struct fib_nh_exception *fnhe;
|
||||
@ -659,8 +660,10 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
|
||||
fnhe->fnhe_genid = genid;
|
||||
if (gw)
|
||||
fnhe->fnhe_gw = gw;
|
||||
if (pmtu)
|
||||
if (pmtu) {
|
||||
fnhe->fnhe_pmtu = pmtu;
|
||||
fnhe->fnhe_mtu_locked = lock;
|
||||
}
|
||||
fnhe->fnhe_expires = max(1UL, expires);
|
||||
/* Update all cached dsts too */
|
||||
rt = rcu_dereference(fnhe->fnhe_rth_input);
|
||||
@ -684,6 +687,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
|
||||
fnhe->fnhe_daddr = daddr;
|
||||
fnhe->fnhe_gw = gw;
|
||||
fnhe->fnhe_pmtu = pmtu;
|
||||
fnhe->fnhe_mtu_locked = lock;
|
||||
fnhe->fnhe_expires = expires;
|
||||
|
||||
/* Exception created; mark the cached routes for the nexthop
|
||||
@ -765,7 +769,8 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
|
||||
struct fib_nh *nh = &FIB_RES_NH(res);
|
||||
|
||||
update_or_create_fnhe(nh, fl4->daddr, new_gw,
|
||||
0, jiffies + ip_rt_gc_timeout);
|
||||
0, false,
|
||||
jiffies + ip_rt_gc_timeout);
|
||||
}
|
||||
if (kill_route)
|
||||
rt->dst.obsolete = DST_OBSOLETE_KILL;
|
||||
@ -977,15 +982,18 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
|
||||
{
|
||||
struct dst_entry *dst = &rt->dst;
|
||||
struct fib_result res;
|
||||
bool lock = false;
|
||||
|
||||
if (dst_metric_locked(dst, RTAX_MTU))
|
||||
if (ip_mtu_locked(dst))
|
||||
return;
|
||||
|
||||
if (ipv4_mtu(dst) < mtu)
|
||||
return;
|
||||
|
||||
if (mtu < ip_rt_min_pmtu)
|
||||
if (mtu < ip_rt_min_pmtu) {
|
||||
lock = true;
|
||||
mtu = ip_rt_min_pmtu;
|
||||
}
|
||||
|
||||
if (rt->rt_pmtu == mtu &&
|
||||
time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
|
||||
@ -995,7 +1003,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
|
||||
if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
|
||||
struct fib_nh *nh = &FIB_RES_NH(res);
|
||||
|
||||
update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
|
||||
update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
|
||||
jiffies + ip_rt_mtu_expires);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -1250,7 +1258,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
|
||||
|
||||
mtu = READ_ONCE(dst->dev->mtu);
|
||||
|
||||
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
|
||||
if (unlikely(ip_mtu_locked(dst))) {
|
||||
if (rt->rt_uses_gateway && mtu > 576)
|
||||
mtu = 576;
|
||||
}
|
||||
@ -1473,6 +1481,7 @@ static struct rtable *rt_dst_alloc(struct net_device *dev,
|
||||
rt->rt_is_input = 0;
|
||||
rt->rt_iif = 0;
|
||||
rt->rt_pmtu = 0;
|
||||
rt->rt_mtu_locked = 0;
|
||||
rt->rt_gateway = 0;
|
||||
rt->rt_uses_gateway = 0;
|
||||
rt->rt_table_id = 0;
|
||||
@ -2393,6 +2402,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
|
||||
rt->rt_is_input = ort->rt_is_input;
|
||||
rt->rt_iif = ort->rt_iif;
|
||||
rt->rt_pmtu = ort->rt_pmtu;
|
||||
rt->rt_mtu_locked = ort->rt_mtu_locked;
|
||||
|
||||
rt->rt_genid = rt_genid_ipv4(net);
|
||||
rt->rt_flags = ort->rt_flags;
|
||||
@ -2495,6 +2505,8 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 table_id,
|
||||
memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
|
||||
if (rt->rt_pmtu && expires)
|
||||
metrics[RTAX_MTU - 1] = rt->rt_pmtu;
|
||||
if (rt->rt_mtu_locked && expires)
|
||||
metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU);
|
||||
if (rtnetlink_put_metrics(skb, metrics) < 0)
|
||||
goto nla_put_failure;
|
||||
|
||||
|
@ -97,6 +97,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
|
||||
xdst->u.rt.rt_gateway = rt->rt_gateway;
|
||||
xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway;
|
||||
xdst->u.rt.rt_pmtu = rt->rt_pmtu;
|
||||
xdst->u.rt.rt_mtu_locked = rt->rt_mtu_locked;
|
||||
xdst->u.rt.rt_table_id = rt->rt_table_id;
|
||||
INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user