diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c index 5633f8572800..8212bfd05733 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c @@ -122,7 +122,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, /* Handle add/replace event */ if (fi->fib_nhs == 1) { if (__mlx5_lag_is_active(ldev)) { - struct net_device *nh_dev = fi->fib_nh[0].nh_dev; + struct net_device *nh_dev = fi->fib_nh[0].fib_nh_dev; int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); mlx5_lag_set_port_affinity(ldev, ++i); @@ -134,10 +134,10 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, return; /* Verify next hops are ports of the same hca */ - if (!(fi->fib_nh[0].nh_dev == ldev->pf[0].netdev && - fi->fib_nh[1].nh_dev == ldev->pf[1].netdev) && - !(fi->fib_nh[0].nh_dev == ldev->pf[1].netdev && - fi->fib_nh[1].nh_dev == ldev->pf[0].netdev)) { + if (!(fi->fib_nh[0].fib_nh_dev == ldev->pf[0].netdev && + fi->fib_nh[1].fib_nh_dev == ldev->pf[1].netdev) && + !(fi->fib_nh[0].fib_nh_dev == ldev->pf[1].netdev && + fi->fib_nh[1].fib_nh_dev == ldev->pf[0].netdev)) { mlx5_core_warn(ldev->pf[0].dev, "Multipath offload require two ports of the same HCA\n"); return; } @@ -167,7 +167,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, /* nh added/removed */ if (event == FIB_EVENT_NH_DEL) { - int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->nh_dev); + int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); if (i >= 0) { i = (i + 1) % 2 + 1; /* peer port */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 52fed8c7bf1e..0ba9daa05a52 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -2873,12 +2873,13 @@ mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp, return false; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { + struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; struct in6_addr *gw; int ifindex, weight; - ifindex = mlxsw_sp_rt6->rt->fib6_nh.nh_dev->ifindex; - weight = mlxsw_sp_rt6->rt->fib6_nh.nh_weight; - gw = &mlxsw_sp_rt6->rt->fib6_nh.nh_gw; + ifindex = fib6_nh->fib_nh_dev->ifindex; + weight = fib6_nh->fib_nh_weight; + gw = &fib6_nh->fib_nh_gw6; if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex, weight)) return false; @@ -2944,7 +2945,7 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed) struct net_device *dev; list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { - dev = mlxsw_sp_rt6->rt->fib6_nh.nh_dev; + dev = mlxsw_sp_rt6->rt->fib6_nh.fib_nh_dev; val ^= dev->ifindex; } @@ -3610,7 +3611,7 @@ static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp, const struct fib_nh *fib_nh, enum mlxsw_sp_ipip_type *p_ipipt) { - struct net_device *dev = fib_nh->nh_dev; + struct net_device *dev = fib_nh->fib_nh_dev; return dev && fib_nh->nh_parent->fib_type == RTN_UNICAST && @@ -3637,7 +3638,7 @@ static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp, struct fib_nh *fib_nh) { const struct mlxsw_sp_ipip_ops *ipip_ops; - struct net_device *dev = fib_nh->nh_dev; + struct net_device *dev = fib_nh->fib_nh_dev; struct mlxsw_sp_ipip_entry *ipip_entry; struct mlxsw_sp_rif *rif; int err; @@ -3681,18 +3682,18 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, struct fib_nh *fib_nh) { - struct net_device *dev = fib_nh->nh_dev; + struct net_device *dev = fib_nh->fib_nh_dev; struct in_device *in_dev; int err; nh->nh_grp = nh_grp; nh->key.fib_nh = fib_nh; #ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight = fib_nh->nh_weight; + nh->nh_weight = fib_nh->fib_nh_weight; #else nh->nh_weight = 1; #endif - memcpy(&nh->gw_addr, &fib_nh->nh_gw, sizeof(fib_nh->nh_gw)); + memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4)); err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh); if (err) return err; @@ -3705,7 +3706,7 @@ static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp, in_dev = __in_dev_get_rtnl(dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->nh_flags & RTNH_F_LINKDOWN) + fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) return 0; err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh); @@ -3804,7 +3805,7 @@ static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp, const struct fib_info *fi) { - return fi->fib_nh->nh_scope == RT_SCOPE_LINK || + return fi->fib_nh->fib_nh_scope == RT_SCOPE_LINK || mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, fi->fib_nh, NULL); } @@ -3946,9 +3947,9 @@ mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp, struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; struct fib6_info *rt = mlxsw_sp_rt6->rt; - if (nh->rif && nh->rif->dev == rt->fib6_nh.nh_dev && + if (nh->rif && nh->rif->dev == rt->fib6_nh.fib_nh_dev && ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr, - &rt->fib6_nh.nh_gw)) + &rt->fib6_nh.fib_nh_gw6)) return nh; continue; } @@ -3966,7 +3967,7 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP) { - nh_grp->nexthops->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + nh_grp->nexthops->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; return; } @@ -3974,9 +3975,9 @@ mlxsw_sp_fib4_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; if (nh->offloaded) - nh->key.fib_nh->nh_flags |= RTNH_F_OFFLOAD; + nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; else - nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -3992,7 +3993,7 @@ mlxsw_sp_fib4_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) for (i = 0; i < nh_grp->count; i++) { struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i]; - nh->key.fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4008,19 +4009,20 @@ mlxsw_sp_fib6_entry_offload_set(struct mlxsw_sp_fib_entry *fib_entry) if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_LOCAL || fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE) { list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6, - list)->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; + list)->rt->fib6_nh.fib_nh_flags |= RTNH_F_OFFLOAD; return; } list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group; + struct fib6_nh *fib6_nh = &mlxsw_sp_rt6->rt->fib6_nh; struct mlxsw_sp_nexthop *nh; nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6); if (nh && nh->offloaded) - mlxsw_sp_rt6->rt->fib6_nh.nh_flags |= RTNH_F_OFFLOAD; + fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD; else - mlxsw_sp_rt6->rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; + fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4035,7 +4037,7 @@ mlxsw_sp_fib6_entry_offload_unset(struct mlxsw_sp_fib_entry *fib_entry) list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) { struct fib6_info *rt = mlxsw_sp_rt6->rt; - rt->fib6_nh.nh_flags &= ~RTNH_F_OFFLOAD; + rt->fib6_nh.fib_nh_flags &= ~RTNH_F_OFFLOAD; } } @@ -4913,7 +4915,7 @@ static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6) static bool mlxsw_sp_fib6_rt_can_mp(const struct fib6_info *rt) { /* RTF_CACHE routes are ignored */ - return (rt->fib6_flags & (RTF_GATEWAY | RTF_ADDRCONF)) == RTF_GATEWAY; + return !(rt->fib6_flags & RTF_ADDRCONF) && rt->fib6_nh.fib_nh_has_gw; } static struct fib6_info * @@ -4972,8 +4974,8 @@ static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt, enum mlxsw_sp_ipip_type *ret) { - return rt->fib6_nh.nh_dev && - mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.nh_dev, ret); + return rt->fib6_nh.fib_nh_dev && + mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh.fib_nh_dev, ret); } static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, @@ -4983,7 +4985,7 @@ static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp, { const struct mlxsw_sp_ipip_ops *ipip_ops; struct mlxsw_sp_ipip_entry *ipip_entry; - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; struct mlxsw_sp_rif *rif; int err; @@ -5026,11 +5028,11 @@ static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_nexthop *nh, const struct fib6_info *rt) { - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; nh->nh_grp = nh_grp; - nh->nh_weight = rt->fib6_nh.nh_weight; - memcpy(&nh->gw_addr, &rt->fib6_nh.nh_gw, sizeof(nh->gw_addr)); + nh->nh_weight = rt->fib6_nh.fib_nh_weight; + memcpy(&nh->gw_addr, &rt->fib6_nh.fib_nh_gw6, sizeof(nh->gw_addr)); mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh); list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list); @@ -5053,7 +5055,7 @@ static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp, static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp, const struct fib6_info *rt) { - return rt->fib6_flags & RTF_GATEWAY || + return rt->fib6_nh.fib_nh_has_gw || mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL); } diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index fa296a7c255d..30a49802fb51 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -2288,11 +2288,11 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, __be32 dst, nh = fi->fib_nh; nh_on_port = (fi->fib_dev == ofdpa_port->dev); - has_gw = !!nh->nh_gw; + has_gw = !!nh->fib_nh_gw4; if (has_gw && nh_on_port) { err = ofdpa_port_ipv4_nh(ofdpa_port, flags, - nh->nh_gw, &index); + nh->fib_nh_gw4, &index); if (err) return err; @@ -2749,7 +2749,7 @@ static int ofdpa_fib4_add(struct rocker *rocker, fen_info->tb_id, 0); if (err) return err; - fen_info->fi->fib_nh->nh_flags |= RTNH_F_OFFLOAD; + fen_info->fi->fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD; return 0; } @@ -2764,7 +2764,7 @@ static int ofdpa_fib4_del(struct rocker *rocker, ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); if (!ofdpa_port) return 0; - fen_info->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + fen_info->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; return ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst), fen_info->dst_len, fen_info->fi, fen_info->tb_id, OFDPA_OP_FLAG_REMOVE); @@ -2791,7 +2791,7 @@ static void ofdpa_fib4_abort(struct rocker *rocker) rocker); if (!ofdpa_port) continue; - flow_entry->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD; + flow_entry->fi->fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD; ofdpa_flow_tbl_del(ofdpa_port, OFDPA_OP_FLAG_REMOVE, flow_entry); } diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a64f21a97369..367dc2a0f84a 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -237,6 +237,20 @@ static inline struct in_device *__in_dev_get_rtnl(const struct net_device *dev) return rtnl_dereference(dev->ip_ptr); } +/* called with rcu_read_lock or rtnl held */ +static inline bool ip_ignore_linkdown(const struct net_device *dev) +{ + struct in_device *in_dev; + bool rc = false; + + in_dev = rcu_dereference_rtnl(dev->ip_ptr); + if (in_dev && + IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + rc = true; + + return rc; +} + static inline struct neigh_parms *__in_dev_arp_parms_get_rcu(const struct net_device *dev) { struct in_device *in_dev = __in_dev_get_rcu(dev); diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 269ec27385e9..ec8e6784a6f7 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -425,6 +425,14 @@ static inline void in6_dev_hold(struct inet6_dev *idev) refcount_inc(&idev->refcnt); } +/* called with rcu_read_lock held */ +static inline bool ip6_ignore_linkdown(const struct net_device *dev) +{ + const struct inet6_dev *idev = __in6_dev_get(dev); + + return !!idev->cnf.ignore_routes_with_linkdown; +} + void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); static inline void in6_ifa_put(struct inet6_ifaddr *ifp) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 2acb78a762ee..58dbb4e82908 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -125,13 +126,7 @@ struct rt6_exception { #define FIB6_MAX_DEPTH 5 struct fib6_nh { - struct in6_addr nh_gw; - struct net_device *nh_dev; - struct lwtunnel_state *nh_lwtstate; - - unsigned int nh_flags; - atomic_t nh_upper_bound; - int nh_weight; + struct fib_nh_common nh_common; }; struct fib6_info { @@ -441,13 +436,18 @@ void rt6_get_prefsrc(const struct rt6_info *rt, struct in6_addr *addr) static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_dev; + return f6i->fib6_nh.fib_nh_dev; } +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib6_nh_release(struct fib6_nh *fib6_nh); + static inline struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i) { - return f6i->fib6_nh.nh_lwtstate; + return f6i->fib6_nh.fib_nh_lws; } void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info, diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 7ab119936e69..342180a7285c 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -68,8 +68,8 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) static inline bool rt6_qualify_for_ecmp(const struct fib6_info *f6i) { - return (f6i->fib6_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == - RTF_GATEWAY; + return !(f6i->fib6_flags & (RTF_ADDRCONF|RTF_DYNAMIC)) && + f6i->fib6_nh.fib_nh_has_gw; } void ip6_route_input(struct sk_buff *skb); @@ -274,9 +274,11 @@ static inline struct in6_addr *rt6_nexthop(struct rt6_info *rt, static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *b) { - return a->fib6_nh.nh_dev == b->fib6_nh.nh_dev && - ipv6_addr_equal(&a->fib6_nh.nh_gw, &b->fib6_nh.nh_gw) && - !lwtunnel_cmp_encap(a->fib6_nh.nh_lwtstate, b->fib6_nh.nh_lwtstate); + struct fib6_nh *nha = &a->fib6_nh, *nhb = &b->fib6_nh; + + return nha->fib_nh_dev == nhb->fib_nh_dev && + ipv6_addr_equal(&nha->fib_nh_gw6, &nhb->fib_nh_gw6) && + !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws); } static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9c8214d2116d..12a6d759cf57 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -76,27 +76,48 @@ struct fnhe_hash_bucket { #define FNHE_HASH_SIZE (1 << FNHE_HASH_SHIFT) #define FNHE_RECLAIM_DEPTH 5 +struct fib_nh_common { + struct net_device *nhc_dev; + int nhc_oif; + unsigned int nhc_flags; + struct lwtunnel_state *nhc_lwtstate; + unsigned char nhc_scope; + u8 nhc_family; + u8 nhc_has_gw:1, + unused:7; + union { + __be32 ipv4; + struct in6_addr ipv6; + } nhc_gw; + + int nhc_weight; + atomic_t nhc_upper_bound; +}; + struct fib_nh { - struct net_device *nh_dev; + struct fib_nh_common nh_common; struct hlist_node nh_hash; struct fib_info *nh_parent; - unsigned int nh_flags; - unsigned char nh_scope; -#ifdef CONFIG_IP_ROUTE_MULTIPATH - int nh_weight; - atomic_t nh_upper_bound; -#endif #ifdef CONFIG_IP_ROUTE_CLASSID __u32 nh_tclassid; #endif - int nh_oif; - __be32 nh_gw; __be32 nh_saddr; int nh_saddr_genid; struct rtable __rcu * __percpu *nh_pcpu_rth_output; struct rtable __rcu *nh_rth_input; struct fnhe_hash_bucket __rcu *nh_exceptions; - struct lwtunnel_state *nh_lwtstate; +#define fib_nh_family nh_common.nhc_family +#define fib_nh_dev nh_common.nhc_dev +#define fib_nh_oif nh_common.nhc_oif +#define fib_nh_flags nh_common.nhc_flags +#define fib_nh_lws nh_common.nhc_lwtstate +#define fib_nh_scope nh_common.nhc_scope +#define fib_nh_family nh_common.nhc_family +#define fib_nh_has_gw nh_common.nhc_has_gw +#define fib_nh_gw4 nh_common.nhc_gw.ipv4 +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 +#define fib_nh_weight nh_common.nhc_weight +#define fib_nh_upper_bound nh_common.nhc_upper_bound }; /* @@ -125,7 +146,7 @@ struct fib_info { int fib_nhs; struct rcu_head rcu; struct fib_nh fib_nh[0]; -#define fib_dev fib_nh[0].nh_dev +#define fib_dev fib_nh[0].fib_nh_dev }; @@ -180,9 +201,9 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ FIB_RES_NH(res).nh_saddr : \ fib_info_update_nh_saddr((net), &FIB_RES_NH(res))) -#define FIB_RES_GW(res) (FIB_RES_NH(res).nh_gw) -#define FIB_RES_DEV(res) (FIB_RES_NH(res).nh_dev) -#define FIB_RES_OIF(res) (FIB_RES_NH(res).nh_oif) +#define FIB_RES_GW(res) (FIB_RES_NH(res).fib_nh_gw4) +#define FIB_RES_DEV(res) (FIB_RES_NH(res).fib_nh_dev) +#define FIB_RES_OIF(res) (FIB_RES_NH(res).fib_nh_oif) #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) @@ -416,6 +437,15 @@ void fib_select_multipath(struct fib_result *res, int hash); void fib_select_path(struct net *net, struct fib_result *res, struct flowi4 *fl4, const struct sk_buff *skb); +int fib_nh_init(struct net *net, struct fib_nh *fib_nh, + struct fib_config *cfg, int nh_weight, + struct netlink_ext_ack *extack); +void fib_nh_release(struct net *net, struct fib_nh *fib_nh); +int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *fc_encap, + u16 fc_encap_type, void *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack); +void fib_nh_common_release(struct fib_nh_common *nhc); + /* Exported by fib_trie.c */ void fib_trie_init(void); struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 6271bab63bfb..61ea7a24c8e5 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -63,13 +63,16 @@ TRACE_EVENT(fib_table_lookup, } if (nh) { + struct net_device *dev; + p32 = (__be32 *) __entry->saddr; *p32 = nh->nh_saddr; p32 = (__be32 *) __entry->gw; - *p32 = nh->nh_gw; + *p32 = nh->fib_nh_gw4; - __assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-"); + dev = nh->fib_nh_dev; + __assign_str(name, dev ? dev->name : "-"); } else { p32 = (__be32 *) __entry->saddr; *p32 = 0; diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index b088b54d699c..6d05ebdd669c 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -62,8 +62,8 @@ TRACE_EVENT(fib6_table_lookup, __entry->dport = 0; } - if (f6i->fib6_nh.nh_dev) { - __assign_str(name, f6i->fib6_nh.nh_dev); + if (f6i->fib6_nh.fib_nh_dev) { + __assign_str(name, f6i->fib6_nh.fib_nh_dev); } else { __assign_str(name, "-"); } @@ -75,7 +75,7 @@ TRACE_EVENT(fib6_table_lookup, } else if (f6i) { in6 = (struct in6_addr *)__entry->gw; - *in6 = f6i->fib6_nh.nh_gw; + *in6 = f6i->fib6_nh.fib_nh_gw6; } ), diff --git a/net/core/filter.c b/net/core/filter.c index 22eb2edf5573..887ab073a0ea 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4634,12 +4634,12 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, nh = &res.fi->fib_nh[res.nh_sel]; /* do not handle lwt encaps right now */ - if (nh->nh_lwtstate) + if (nh->fib_nh_lws) return BPF_FIB_LKUP_RET_UNSUPP_LWT; - dev = nh->nh_dev; - if (nh->nh_gw) - params->ipv4_dst = nh->nh_gw; + dev = nh->fib_nh_dev; + if (nh->fib_nh_gw4) + params->ipv4_dst = nh->fib_nh_gw4; params->rt_metric = res.fi->fib_priority; @@ -4748,13 +4748,13 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return BPF_FIB_LKUP_RET_FRAG_NEEDED; } - if (f6i->fib6_nh.nh_lwtstate) + if (f6i->fib6_nh.fib_nh_lws) return BPF_FIB_LKUP_RET_UNSUPP_LWT; - if (f6i->fib6_flags & RTF_GATEWAY) - *dst = f6i->fib6_nh.nh_gw; + if (f6i->fib6_nh.fib_nh_has_gw) + *dst = f6i->fib6_nh.fib_nh_gw6; - dev = f6i->fib6_nh.nh_dev; + dev = f6i->fib6_nh.fib_nh_dev; params->rt_metric = f6i->fib6_metric; /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ed14ec245584..ffbe24397dbe 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -324,16 +324,16 @@ bool fib_info_nh_uses_dev(struct fib_info *fi, const struct net_device *dev) for (ret = 0; ret < fi->fib_nhs; ret++) { struct fib_nh *nh = &fi->fib_nh[ret]; - if (nh->nh_dev == dev) { + if (nh->fib_nh_dev == dev) { dev_match = true; break; - } else if (l3mdev_master_ifindex_rcu(nh->nh_dev) == dev->ifindex) { + } else if (l3mdev_master_ifindex_rcu(nh->fib_nh_dev) == dev->ifindex) { dev_match = true; break; } } #else - if (fi->fib_nh[0].nh_dev == dev) + if (fi->fib_nh[0].fib_nh_dev == dev) dev_match = true; #endif @@ -390,7 +390,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dev_match = fib_info_nh_uses_dev(res.fi, dev); if (dev_match) { - ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NH(res).fib_nh_scope >= RT_SCOPE_HOST; return ret; } if (no_addr) @@ -402,7 +402,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) - ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NH(res).fib_nh_scope >= RT_SCOPE_HOST; } return ret; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8e185b5a2bf6..df777af7e278 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -204,18 +204,34 @@ static void rt_fibinfo_free_cpus(struct rtable __rcu * __percpu *rtp) free_percpu(rtp); } +void fib_nh_common_release(struct fib_nh_common *nhc) +{ + if (nhc->nhc_dev) + dev_put(nhc->nhc_dev); + + lwtstate_put(nhc->nhc_lwtstate); +} +EXPORT_SYMBOL_GPL(fib_nh_common_release); + +void fib_nh_release(struct net *net, struct fib_nh *fib_nh) +{ +#ifdef CONFIG_IP_ROUTE_CLASSID + if (fib_nh->nh_tclassid) + net->ipv4.fib_num_tclassid_users--; +#endif + fib_nh_common_release(&fib_nh->nh_common); + free_nh_exceptions(fib_nh); + rt_fibinfo_free_cpus(fib_nh->nh_pcpu_rth_output); + rt_fibinfo_free(&fib_nh->nh_rth_input); +} + /* Release a nexthop info record */ static void free_fib_info_rcu(struct rcu_head *head) { struct fib_info *fi = container_of(head, struct fib_info, rcu); change_nexthops(fi) { - if (nexthop_nh->nh_dev) - dev_put(nexthop_nh->nh_dev); - lwtstate_put(nexthop_nh->nh_lwtstate); - free_nh_exceptions(nexthop_nh); - rt_fibinfo_free_cpus(nexthop_nh->nh_pcpu_rth_output); - rt_fibinfo_free(&nexthop_nh->nh_rth_input); + fib_nh_release(fi->fib_net, nexthop_nh); } endfor_nexthops(fi); ip_fib_metrics_put(fi->fib_metrics); @@ -230,12 +246,7 @@ void free_fib_info(struct fib_info *fi) return; } fib_info_cnt--; -#ifdef CONFIG_IP_ROUTE_CLASSID - change_nexthops(fi) { - if (nexthop_nh->nh_tclassid) - fi->fib_net->ipv4.fib_num_tclassid_users--; - } endfor_nexthops(fi); -#endif + call_rcu(&fi->rcu, free_fib_info_rcu); } EXPORT_SYMBOL_GPL(free_fib_info); @@ -248,7 +259,7 @@ void fib_release_info(struct fib_info *fi) if (fi->fib_prefsrc) hlist_del(&fi->fib_lhash); change_nexthops(fi) { - if (!nexthop_nh->nh_dev) + if (!nexthop_nh->fib_nh_dev) continue; hlist_del(&nexthop_nh->nh_hash); } endfor_nexthops(fi) @@ -263,17 +274,17 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) const struct fib_nh *onh = ofi->fib_nh; for_nexthops(fi) { - if (nh->nh_oif != onh->nh_oif || - nh->nh_gw != onh->nh_gw || - nh->nh_scope != onh->nh_scope || + if (nh->fib_nh_oif != onh->fib_nh_oif || + nh->fib_nh_gw4 != onh->fib_nh_gw4 || + nh->fib_nh_scope != onh->fib_nh_scope || #ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight != onh->nh_weight || + nh->fib_nh_weight != onh->fib_nh_weight || #endif #ifdef CONFIG_IP_ROUTE_CLASSID nh->nh_tclassid != onh->nh_tclassid || #endif - lwtunnel_cmp_encap(nh->nh_lwtstate, onh->nh_lwtstate) || - ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK)) + lwtunnel_cmp_encap(nh->fib_nh_lws, onh->fib_nh_lws) || + ((nh->fib_nh_flags ^ onh->fib_nh_flags) & ~RTNH_COMPARE_MASK)) return -1; onh++; } endfor_nexthops(fi); @@ -298,7 +309,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; for_nexthops(fi) { - val ^= fib_devindex_hashfn(nh->nh_oif); + val ^= fib_devindex_hashfn(nh->fib_nh_oif); } endfor_nexthops(fi) return (val ^ (val >> 7) ^ (val >> 12)) & mask; @@ -347,9 +358,9 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; hlist_for_each_entry(nh, head, nh_hash) { - if (nh->nh_dev == dev && - nh->nh_gw == gw && - !(nh->nh_flags & RTNH_F_DEAD)) { + if (nh->fib_nh_dev == dev && + nh->fib_nh_gw4 == gw && + !(nh->fib_nh_flags & RTNH_F_DEAD)) { spin_unlock(&fib_info_lock); return 0; } @@ -384,10 +395,10 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi) /* grab encap info */ for_nexthops(fi) { - if (nh->nh_lwtstate) { + if (nh->fib_nh_lws) { /* RTA_ENCAP_TYPE */ nh_encapsize += lwtunnel_get_encap_size( - nh->nh_lwtstate); + nh->fib_nh_lws); /* RTA_ENCAP */ nh_encapsize += nla_total_size(2); } @@ -438,7 +449,7 @@ static int fib_detect_death(struct fib_info *fi, int order, struct neighbour *n; int state = NUD_NONE; - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); + n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].fib_nh_gw4, fi->fib_dev); if (n) { state = n->nud_state; neigh_release(n); @@ -457,6 +468,71 @@ static int fib_detect_death(struct fib_info *fi, int order, return 1; } +int fib_nh_common_init(struct fib_nh_common *nhc, struct nlattr *encap, + u16 encap_type, void *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + if (encap) { + struct lwtunnel_state *lwtstate; + int err; + + if (encap_type == LWTUNNEL_ENCAP_NONE) { + NL_SET_ERR_MSG(extack, "LWT encap type not specified"); + return -EINVAL; + } + err = lwtunnel_build_state(encap_type, encap, nhc->nhc_family, + cfg, &lwtstate, extack); + if (err) + return err; + + nhc->nhc_lwtstate = lwtstate_get(lwtstate); + } + + return 0; +} +EXPORT_SYMBOL_GPL(fib_nh_common_init); + +int fib_nh_init(struct net *net, struct fib_nh *nh, + struct fib_config *cfg, int nh_weight, + struct netlink_ext_ack *extack) +{ + int err = -ENOMEM; + + nh->fib_nh_family = AF_INET; + + nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); + if (!nh->nh_pcpu_rth_output) + goto err_out; + + err = fib_nh_common_init(&nh->nh_common, cfg->fc_encap, + cfg->fc_encap_type, cfg, GFP_KERNEL, extack); + if (err) + goto init_failure; + + nh->fib_nh_oif = cfg->fc_oif; + if (cfg->fc_gw) { + nh->fib_nh_gw4 = cfg->fc_gw; + nh->fib_nh_has_gw = 1; + } + nh->fib_nh_flags = cfg->fc_flags; + +#ifdef CONFIG_IP_ROUTE_CLASSID + nh->nh_tclassid = cfg->fc_flow; + if (nh->nh_tclassid) + net->ipv4.fib_num_tclassid_users++; +#endif +#ifdef CONFIG_IP_ROUTE_MULTIPATH + nh->fib_nh_weight = nh_weight; +#endif + return 0; + +init_failure: + rt_fibinfo_free_cpus(nh->nh_pcpu_rth_output); + nh->nh_pcpu_rth_output = NULL; +err_out: + return err; +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining, @@ -483,11 +559,15 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack) { + struct net *net = fi->fib_net; + struct fib_config fib_cfg; int ret; change_nexthops(fi) { int attrlen; + memset(&fib_cfg, 0, sizeof(fib_cfg)); + if (!rtnh_ok(rtnh, remaining)) { NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - extra data after nexthop"); @@ -500,56 +580,54 @@ static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, return -EINVAL; } - nexthop_nh->nh_flags = - (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; - nexthop_nh->nh_oif = rtnh->rtnh_ifindex; - nexthop_nh->nh_weight = rtnh->rtnh_hops + 1; + fib_cfg.fc_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + fib_cfg.fc_oif = rtnh->rtnh_ifindex; attrlen = rtnh_attrlen(rtnh); if (attrlen > 0) { struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - nexthop_nh->nh_gw = nla ? nla_get_in_addr(nla) : 0; -#ifdef CONFIG_IP_ROUTE_CLASSID + if (nla) + fib_cfg.fc_gw = nla_get_in_addr(nla); + nla = nla_find(attrs, attrlen, RTA_FLOW); - nexthop_nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; - if (nexthop_nh->nh_tclassid) - fi->fib_net->ipv4.fib_num_tclassid_users++; -#endif - nla = nla_find(attrs, attrlen, RTA_ENCAP); - if (nla) { - struct lwtunnel_state *lwtstate; - struct nlattr *nla_entype; + if (nla) + fib_cfg.fc_flow = nla_get_u32(nla); - nla_entype = nla_find(attrs, attrlen, - RTA_ENCAP_TYPE); - if (!nla_entype) { - NL_SET_BAD_ATTR(extack, nla); - NL_SET_ERR_MSG(extack, - "Encap type is missing"); - goto err_inval; - } - - ret = lwtunnel_build_state(nla_get_u16( - nla_entype), - nla, AF_INET, cfg, - &lwtstate, extack); - if (ret) - goto errout; - nexthop_nh->nh_lwtstate = - lwtstate_get(lwtstate); - } + fib_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP); + nla = nla_find(attrs, attrlen, RTA_ENCAP_TYPE); + if (nla) + fib_cfg.fc_encap_type = nla_get_u16(nla); } + ret = fib_nh_init(net, nexthop_nh, &fib_cfg, + rtnh->rtnh_hops + 1, extack); + if (ret) + goto errout; + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); - return 0; - -err_inval: ret = -EINVAL; - + if (cfg->fc_oif && fi->fib_nh->fib_nh_oif != cfg->fc_oif) { + NL_SET_ERR_MSG(extack, + "Nexthop device index does not match RTA_OIF"); + goto errout; + } + if (cfg->fc_gw && fi->fib_nh->fib_nh_gw4 != cfg->fc_gw) { + NL_SET_ERR_MSG(extack, + "Nexthop gateway does not match RTA_GATEWAY"); + goto errout; + } +#ifdef CONFIG_IP_ROUTE_CLASSID + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { + NL_SET_ERR_MSG(extack, + "Nexthop class id does not match RTA_FLOW"); + goto errout; + } +#endif + ret = 0; errout: return ret; } @@ -558,49 +636,51 @@ static void fib_rebalance(struct fib_info *fi) { int total; int w; - struct in_device *in_dev; if (fi->fib_nhs < 2) return; total = 0; for_nexthops(fi) { - if (nh->nh_flags & RTNH_F_DEAD) + if (nh->fib_nh_flags & RTNH_F_DEAD) continue; - in_dev = __in_dev_get_rtnl(nh->nh_dev); - - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nh->nh_flags & RTNH_F_LINKDOWN) + if (ip_ignore_linkdown(nh->fib_nh_dev) && + nh->fib_nh_flags & RTNH_F_LINKDOWN) continue; - total += nh->nh_weight; + total += nh->fib_nh_weight; } endfor_nexthops(fi); w = 0; change_nexthops(fi) { int upper_bound; - in_dev = __in_dev_get_rtnl(nexthop_nh->nh_dev); - - if (nexthop_nh->nh_flags & RTNH_F_DEAD) { + if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) { upper_bound = -1; - } else if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nexthop_nh->nh_flags & RTNH_F_LINKDOWN) { + } else if (ip_ignore_linkdown(nexthop_nh->fib_nh_dev) && + nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) { upper_bound = -1; } else { - w += nexthop_nh->nh_weight; + w += nexthop_nh->fib_nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64)w << 31, total) - 1; } - atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); + atomic_set(&nexthop_nh->fib_nh_upper_bound, upper_bound); } endfor_nexthops(fi); } #else /* CONFIG_IP_ROUTE_MULTIPATH */ +static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, + int remaining, struct fib_config *cfg, + struct netlink_ext_ack *extack) +{ + NL_SET_ERR_MSG(extack, "Multipath support not enabled in kernel"); + + return -EINVAL; +} + #define fib_rebalance(fi) do { } while (0) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -620,7 +700,7 @@ static int fib_encap_match(u16 encap_type, ret = lwtunnel_build_state(encap_type, encap, AF_INET, cfg, &lwtstate, extack); if (!ret) { - result = lwtunnel_cmp_encap(lwtstate, nh->nh_lwtstate); + result = lwtunnel_cmp_encap(lwtstate, nh->fib_nh_lws); lwtstate_free(lwtstate); } @@ -649,8 +729,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, cfg->fc_flow != fi->fib_nh->nh_tclassid) return 1; #endif - if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && - (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) + if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->fib_nh_oif) && + (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->fib_nh_gw4)) return 0; return 1; } @@ -668,7 +748,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) + if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->fib_nh_oif) return 1; attrlen = rtnh_attrlen(rtnh); @@ -676,7 +756,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, struct nlattr *nla, *attrs = rtnh_attrs(rtnh); nla = nla_find(attrs, attrlen, RTA_GATEWAY); - if (nla && nla_get_in_addr(nla) != nh->nh_gw) + if (nla && nla_get_in_addr(nla) != nh->fib_nh_gw4) return 1; #ifdef CONFIG_IP_ROUTE_CLASSID nla = nla_find(attrs, attrlen, RTA_FLOW); @@ -783,10 +863,10 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, struct net_device *dev; net = cfg->fc_nlinfo.nl_net; - if (nh->nh_gw) { + if (nh->fib_nh_gw4) { struct fib_result res; - if (nh->nh_flags & RTNH_F_ONLINK) { + if (nh->fib_nh_flags & RTNH_F_ONLINK) { unsigned int addr_type; if (cfg->fc_scope >= RT_SCOPE_LINK) { @@ -794,7 +874,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, "Nexthop has invalid scope"); return -EINVAL; } - dev = __dev_get_by_index(net, nh->nh_oif); + dev = __dev_get_by_index(net, nh->fib_nh_oif); if (!dev) { NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); return -ENODEV; @@ -804,26 +884,27 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, "Nexthop device is not up"); return -ENETDOWN; } - addr_type = inet_addr_type_dev_table(net, dev, nh->nh_gw); + addr_type = inet_addr_type_dev_table(net, dev, + nh->fib_nh_gw4); if (addr_type != RTN_UNICAST) { NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); return -EINVAL; } if (!netif_carrier_ok(dev)) - nh->nh_flags |= RTNH_F_LINKDOWN; - nh->nh_dev = dev; + nh->fib_nh_flags |= RTNH_F_LINKDOWN; + nh->fib_nh_dev = dev; dev_hold(dev); - nh->nh_scope = RT_SCOPE_LINK; + nh->fib_nh_scope = RT_SCOPE_LINK; return 0; } rcu_read_lock(); { struct fib_table *tbl = NULL; struct flowi4 fl4 = { - .daddr = nh->nh_gw, + .daddr = nh->fib_nh_gw4, .flowi4_scope = cfg->fc_scope + 1, - .flowi4_oif = nh->nh_oif, + .flowi4_oif = nh->fib_nh_oif, .flowi4_iif = LOOPBACK_IFINDEX, }; @@ -860,9 +941,9 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out; } - nh->nh_scope = res.scope; - nh->nh_oif = FIB_RES_OIF(res); - nh->nh_dev = dev = FIB_RES_DEV(res); + nh->fib_nh_scope = res.scope; + nh->fib_nh_oif = FIB_RES_OIF(res); + nh->fib_nh_dev = dev = FIB_RES_DEV(res); if (!dev) { NL_SET_ERR_MSG(extack, "No egress device for nexthop gateway"); @@ -870,19 +951,19 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, } dev_hold(dev); if (!netif_carrier_ok(dev)) - nh->nh_flags |= RTNH_F_LINKDOWN; + nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; } else { struct in_device *in_dev; - if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { + if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) { NL_SET_ERR_MSG(extack, "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); return -EINVAL; } rcu_read_lock(); err = -ENODEV; - in_dev = inetdev_by_index(net, nh->nh_oif); + in_dev = inetdev_by_index(net, nh->fib_nh_oif); if (!in_dev) goto out; err = -ENETDOWN; @@ -890,11 +971,11 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); goto out; } - nh->nh_dev = in_dev->dev; - dev_hold(nh->nh_dev); - nh->nh_scope = RT_SCOPE_HOST; - if (!netif_carrier_ok(nh->nh_dev)) - nh->nh_flags |= RTNH_F_LINKDOWN; + nh->fib_nh_dev = in_dev->dev; + dev_hold(nh->fib_nh_dev); + nh->fib_nh_scope = RT_SCOPE_HOST; + if (!netif_carrier_ok(nh->fib_nh_dev)) + nh->fib_nh_flags |= RTNH_F_LINKDOWN; err = 0; } out: @@ -986,8 +1067,8 @@ static void fib_info_hash_move(struct hlist_head *new_info_hash, __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) { - nh->nh_saddr = inet_select_addr(nh->nh_dev, - nh->nh_gw, + nh->nh_saddr = inet_select_addr(nh->fib_nh_dev, + nh->fib_nh_gw4, nh->nh_parent->fib_scope); nh->nh_saddr_genid = atomic_read(&net->ipv4.dev_addr_genid); @@ -1096,69 +1177,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi->fib_nhs = nhs; change_nexthops(fi) { nexthop_nh->nh_parent = fi; - nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); - if (!nexthop_nh->nh_pcpu_rth_output) - goto failure; } endfor_nexthops(fi) - if (cfg->fc_mp) { -#ifdef CONFIG_IP_ROUTE_MULTIPATH + if (cfg->fc_mp) err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack); - if (err != 0) - goto failure; - if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) { - NL_SET_ERR_MSG(extack, - "Nexthop device index does not match RTA_OIF"); - goto err_inval; - } - if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) { - NL_SET_ERR_MSG(extack, - "Nexthop gateway does not match RTA_GATEWAY"); - goto err_inval; - } -#ifdef CONFIG_IP_ROUTE_CLASSID - if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) { - NL_SET_ERR_MSG(extack, - "Nexthop class id does not match RTA_FLOW"); - goto err_inval; - } -#endif -#else - NL_SET_ERR_MSG(extack, - "Multipath support not enabled in kernel"); - goto err_inval; -#endif - } else { - struct fib_nh *nh = fi->fib_nh; + else + err = fib_nh_init(net, fi->fib_nh, cfg, 1, extack); - if (cfg->fc_encap) { - struct lwtunnel_state *lwtstate; - - if (cfg->fc_encap_type == LWTUNNEL_ENCAP_NONE) { - NL_SET_ERR_MSG(extack, - "LWT encap type not specified"); - goto err_inval; - } - err = lwtunnel_build_state(cfg->fc_encap_type, - cfg->fc_encap, AF_INET, cfg, - &lwtstate, extack); - if (err) - goto failure; - - nh->nh_lwtstate = lwtstate_get(lwtstate); - } - nh->nh_oif = cfg->fc_oif; - nh->nh_gw = cfg->fc_gw; - nh->nh_flags = cfg->fc_flags; -#ifdef CONFIG_IP_ROUTE_CLASSID - nh->nh_tclassid = cfg->fc_flow; - if (nh->nh_tclassid) - fi->fib_net->ipv4.fib_num_tclassid_users++; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH - nh->nh_weight = 1; -#endif - } + if (err != 0) + goto failure; if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) { @@ -1195,15 +1222,15 @@ struct fib_info *fib_create_info(struct fib_config *cfg, "Route with host scope can not have multiple nexthops"); goto err_inval; } - if (nh->nh_gw) { + if (nh->fib_nh_gw4) { NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); goto err_inval; } - nh->nh_scope = RT_SCOPE_NOWHERE; - nh->nh_dev = dev_get_by_index(net, fi->fib_nh->nh_oif); + nh->fib_nh_scope = RT_SCOPE_NOWHERE; + nh->fib_nh_dev = dev_get_by_index(net, fi->fib_nh->fib_nh_oif); err = -ENODEV; - if (!nh->nh_dev) + if (!nh->fib_nh_dev) goto failure; } else { int linkdown = 0; @@ -1212,7 +1239,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, err = fib_check_nh(cfg, nexthop_nh, extack); if (err != 0) goto failure; - if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) + if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN) linkdown++; } endfor_nexthops(fi) if (linkdown == fi->fib_nhs) @@ -1254,9 +1281,9 @@ link_it: struct hlist_head *head; unsigned int hash; - if (!nexthop_nh->nh_dev) + if (!nexthop_nh->fib_nh_dev) continue; - hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex); + hash = fib_devindex_hashfn(nexthop_nh->fib_nh_dev->ifindex); head = &fib_info_devhash[hash]; hlist_add_head(&nexthop_nh->nh_hash, head); } endfor_nexthops(fi) @@ -1315,31 +1342,27 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { - if (fi->fib_nh->nh_gw && - nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) + if (fi->fib_nh->fib_nh_gw4 && + nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->fib_nh_gw4)) goto nla_put_failure; - if (fi->fib_nh->nh_oif && - nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) + if (fi->fib_nh->fib_nh_oif && + nla_put_u32(skb, RTA_OIF, fi->fib_nh->fib_nh_oif)) goto nla_put_failure; - if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { - struct in_device *in_dev; - + if (fi->fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) { rcu_read_lock(); - in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + if (ip_ignore_linkdown(fi->fib_nh->fib_nh_dev)) rtm->rtm_flags |= RTNH_F_DEAD; rcu_read_unlock(); } - if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD) + if (fi->fib_nh->fib_nh_flags & RTNH_F_OFFLOAD) rtm->rtm_flags |= RTNH_F_OFFLOAD; #ifdef CONFIG_IP_ROUTE_CLASSID if (fi->fib_nh[0].nh_tclassid && nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->nh_lwtstate && - lwtunnel_fill_encap(skb, fi->fib_nh->nh_lwtstate) < 0) + if (fi->fib_nh->fib_nh_lws && + lwtunnel_fill_encap(skb, fi->fib_nh->fib_nh_lws) < 0) goto nla_put_failure; } #ifdef CONFIG_IP_ROUTE_MULTIPATH @@ -1356,30 +1379,26 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, if (!rtnh) goto nla_put_failure; - rtnh->rtnh_flags = nh->nh_flags & 0xFF; - if (nh->nh_flags & RTNH_F_LINKDOWN) { - struct in_device *in_dev; - + rtnh->rtnh_flags = nh->fib_nh_flags & 0xFF; + if (nh->fib_nh_flags & RTNH_F_LINKDOWN) { rcu_read_lock(); - in_dev = __in_dev_get_rcu(nh->nh_dev); - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) + if (ip_ignore_linkdown(nh->fib_nh_dev)) rtnh->rtnh_flags |= RTNH_F_DEAD; rcu_read_unlock(); } - rtnh->rtnh_hops = nh->nh_weight - 1; - rtnh->rtnh_ifindex = nh->nh_oif; + rtnh->rtnh_hops = nh->fib_nh_weight - 1; + rtnh->rtnh_ifindex = nh->fib_nh_oif; - if (nh->nh_gw && - nla_put_in_addr(skb, RTA_GATEWAY, nh->nh_gw)) + if (nh->fib_nh_gw4 && + nla_put_in_addr(skb, RTA_GATEWAY, nh->fib_nh_gw4)) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (nh->nh_tclassid && nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (nh->nh_lwtstate && - lwtunnel_fill_encap(skb, nh->nh_lwtstate) < 0) + if (nh->fib_nh_lws && + lwtunnel_fill_encap(skb, nh->fib_nh_lws) < 0) goto nla_put_failure; /* length of rtnetlink header + attributes */ @@ -1427,28 +1446,26 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local) return ret; } -static int call_fib_nh_notifiers(struct fib_nh *fib_nh, +static int call_fib_nh_notifiers(struct fib_nh *nh, enum fib_event_type event_type) { - struct in_device *in_dev = __in_dev_get_rtnl(fib_nh->nh_dev); + bool ignore_link_down = ip_ignore_linkdown(nh->fib_nh_dev); struct fib_nh_notifier_info info = { - .fib_nh = fib_nh, + .fib_nh = nh, }; switch (event_type) { case FIB_EVENT_NH_ADD: - if (fib_nh->nh_flags & RTNH_F_DEAD) + if (nh->fib_nh_flags & RTNH_F_DEAD) break; - if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->nh_flags & RTNH_F_LINKDOWN) + if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) break; - return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type, + return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info); case FIB_EVENT_NH_DEL: - if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - fib_nh->nh_flags & RTNH_F_LINKDOWN) || - (fib_nh->nh_flags & RTNH_F_DEAD)) - return call_fib4_notifiers(dev_net(fib_nh->nh_dev), + if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) || + (nh->fib_nh_flags & RTNH_F_DEAD)) + return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type, &info.info); default: break; @@ -1502,7 +1519,7 @@ void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) struct fib_nh *nh; hlist_for_each_entry(nh, head, nh_hash) { - if (nh->nh_dev == dev) + if (nh->fib_nh_dev == dev) nh_update_mtu(nh, dev->mtu, orig_mtu); } } @@ -1530,22 +1547,22 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) int dead; BUG_ON(!fi->fib_nhs); - if (nh->nh_dev != dev || fi == prev_fi) + if (nh->fib_nh_dev != dev || fi == prev_fi) continue; prev_fi = fi; dead = 0; change_nexthops(fi) { - if (nexthop_nh->nh_flags & RTNH_F_DEAD) + if (nexthop_nh->fib_nh_flags & RTNH_F_DEAD) dead++; - else if (nexthop_nh->nh_dev == dev && - nexthop_nh->nh_scope != scope) { + else if (nexthop_nh->fib_nh_dev == dev && + nexthop_nh->fib_nh_scope != scope) { switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nexthop_nh->nh_flags |= RTNH_F_DEAD; + nexthop_nh->fib_nh_flags |= RTNH_F_DEAD; /* fall through */ case NETDEV_CHANGE: - nexthop_nh->nh_flags |= RTNH_F_LINKDOWN; + nexthop_nh->fib_nh_flags |= RTNH_F_LINKDOWN; break; } call_fib_nh_notifiers(nexthop_nh, @@ -1554,7 +1571,7 @@ int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force) } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (event == NETDEV_UNREGISTER && - nexthop_nh->nh_dev == dev) { + nexthop_nh->fib_nh_dev == dev) { dead = fi->fib_nhs; break; } @@ -1614,8 +1631,8 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res) if (next_fi->fib_scope != res->scope || fa->fa_type != RTN_UNICAST) continue; - if (!next_fi->fib_nh[0].nh_gw || - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) + if (!next_fi->fib_nh[0].fib_nh_gw4 || + next_fi->fib_nh[0].fib_nh_scope != RT_SCOPE_LINK) continue; fib_alias_accessed(fa); @@ -1686,24 +1703,24 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags) int alive; BUG_ON(!fi->fib_nhs); - if (nh->nh_dev != dev || fi == prev_fi) + if (nh->fib_nh_dev != dev || fi == prev_fi) continue; prev_fi = fi; alive = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags & nh_flags)) { + if (!(nexthop_nh->fib_nh_flags & nh_flags)) { alive++; continue; } - if (!nexthop_nh->nh_dev || - !(nexthop_nh->nh_dev->flags & IFF_UP)) + if (!nexthop_nh->fib_nh_dev || + !(nexthop_nh->fib_nh_dev->flags & IFF_UP)) continue; - if (nexthop_nh->nh_dev != dev || + if (nexthop_nh->fib_nh_dev != dev || !__in_dev_get_rtnl(dev)) continue; alive++; - nexthop_nh->nh_flags &= ~nh_flags; + nexthop_nh->fib_nh_flags &= ~nh_flags; call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD); } endfor_nexthops(fi) @@ -1723,13 +1740,13 @@ static bool fib_good_nh(const struct fib_nh *nh) { int state = NUD_REACHABLE; - if (nh->nh_scope == RT_SCOPE_LINK) { + if (nh->fib_nh_scope == RT_SCOPE_LINK) { struct neighbour *n; rcu_read_lock_bh(); - n = __ipv4_neigh_lookup_noref(nh->nh_dev, - (__force u32)nh->nh_gw); + n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, + (__force u32)nh->fib_nh_gw4); if (n) state = n->nud_state; @@ -1755,7 +1772,7 @@ void fib_select_multipath(struct fib_result *res, int hash) } } - if (hash > atomic_read(&nh->nh_upper_bound)) + if (hash > atomic_read(&nh->fib_nh_upper_bound)) continue; res->nh_sel = nhsel; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1704f432de1f..1e3b492690f9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1471,18 +1471,16 @@ found: continue; for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { const struct fib_nh *nh = &fi->fib_nh[nhsel]; - struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev); - if (nh->nh_flags & RTNH_F_DEAD) + if (nh->fib_nh_flags & RTNH_F_DEAD) continue; - if (in_dev && - IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && - nh->nh_flags & RTNH_F_LINKDOWN && + if (ip_ignore_linkdown(nh->fib_nh_dev) && + nh->fib_nh_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) continue; if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { if (flp->flowi4_oif && - flp->flowi4_oif != nh->nh_oif) + flp->flowi4_oif != nh->fib_nh_oif) continue; } @@ -2653,7 +2651,7 @@ static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT) flags = RTF_REJECT; - if (fi && fi->fib_nh->nh_gw) + if (fi && fi->fib_nh->fib_nh_gw4) flags |= RTF_GATEWAY; if (mask == htonl(0xFFFFFFFF)) flags |= RTF_HOST; @@ -2704,7 +2702,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v) "%d\t%08X\t%d\t%u\t%u", fi->fib_dev ? fi->fib_dev->name : "*", prefix, - fi->fib_nh->nh_gw, flags, 0, 0, + fi->fib_nh->fib_nh_gw4, flags, 0, 0, fi->fib_priority, mask, (fi->fib_advmss ? diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f2688fce39e1..7977514d90f5 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -644,7 +644,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, unsigned int i; int depth; - genid = fnhe_genid(dev_net(nh->nh_dev)); + genid = fnhe_genid(dev_net(nh->fib_nh_dev)); hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); @@ -1356,7 +1356,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) { struct fib_info *fi = res->fi; struct fib_nh *nh = &fi->fib_nh[res->nh_sel]; - struct net_device *dev = nh->nh_dev; + struct net_device *dev = nh->fib_nh_dev; u32 mtu = 0; if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || @@ -1374,7 +1374,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) if (likely(!mtu)) mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); - return mtu - lwtunnel_headroom(nh->nh_lwtstate, mtu); + return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); } static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, @@ -1531,8 +1531,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { - rt->rt_gateway = nh->nh_gw; + if (nh->fib_nh_gw4 && nh->fib_nh_scope == RT_SCOPE_LINK) { + rt->rt_gateway = nh->fib_nh_gw4; rt->rt_uses_gateway = 1; } ip_dst_init_metrics(&rt->dst, fi->fib_metrics); @@ -1540,7 +1540,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; #endif - rt->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); + rt->dst.lwtstate = lwtstate_get(nh->fib_nh_lws); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr, do_cache); else if (do_cache) @@ -2075,7 +2075,7 @@ local_input: if (do_cache) { struct fib_nh *nh = &FIB_RES_NH(*res); - rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate); + rth->dst.lwtstate = lwtstate_get(nh->fib_nh_lws); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { WARN_ON(rth->dst.input == lwtunnel_input); rth->dst.lwtstate->orig_input = rth->dst.input; @@ -2264,8 +2264,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && - !(nh->nh_gw && - nh->nh_scope == RT_SCOPE_LINK))) { + !(nh->fib_nh_gw4 && + nh->fib_nh_scope == RT_SCOPE_LINK))) { do_cache = false; goto add; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ae17a966ae3..2e8d1d2d8d3d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -173,7 +173,8 @@ static int addrconf_ifdown(struct net_device *dev, int how); static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, - u32 flags, u32 noflags); + u32 flags, u32 noflags, + bool no_gw); static void addrconf_dad_start(struct inet6_ifaddr *ifp); static void addrconf_dad_work(struct work_struct *w); @@ -1230,10 +1231,8 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_r { struct fib6_info *f6i; - f6i = addrconf_get_prefix_route(&ifp->addr, - ifp->prefix_len, - ifp->idev->dev, - 0, RTF_GATEWAY | RTF_DEFAULT); + f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + ifp->idev->dev, 0, RTF_DEFAULT, true); if (f6i) { if (del_rt) ip6_del_rt(dev_net(ifp->idev->dev), f6i); @@ -2402,7 +2401,8 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric, static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, int plen, const struct net_device *dev, - u32 flags, u32 noflags) + u32 flags, u32 noflags, + bool no_gw) { struct fib6_node *fn; struct fib6_info *rt = NULL; @@ -2419,7 +2419,9 @@ static struct fib6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_dev->ifindex != dev->ifindex) + if (rt->fib6_nh.fib_nh_dev->ifindex != dev->ifindex) + continue; + if (no_gw && rt->fib6_nh.fib_nh_has_gw) continue; if ((rt->fib6_flags & flags) != flags) continue; @@ -2717,7 +2719,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) pinfo->prefix_len, dev, RTF_ADDRCONF | RTF_PREFIX_RT, - RTF_GATEWAY | RTF_DEFAULT); + RTF_DEFAULT, true); if (rt) { /* Autoconf prefix route */ @@ -4588,10 +4590,8 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp, struct fib6_info *f6i; u32 prio; - f6i = addrconf_get_prefix_route(&ifp->addr, - ifp->prefix_len, - ifp->idev->dev, - 0, RTF_GATEWAY | RTF_DEFAULT); + f6i = addrconf_get_prefix_route(&ifp->addr, ifp->prefix_len, + ifp->idev->dev, 0, RTF_DEFAULT, true); if (!f6i) return -ENOENT; @@ -5972,7 +5972,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) struct fib6_info *rt; rt = addrconf_get_prefix_route(&ifp->peer_addr, 128, - ifp->idev->dev, 0, 0); + ifp->idev->dev, 0, 0, + false); if (rt) ip6_del_rt(net, rt); } diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 6613d8dbb0e5..8c00609a1513 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -199,10 +199,7 @@ void fib6_info_destroy_rcu(struct rcu_head *head) free_percpu(f6i->rt6i_pcpu); } - lwtstate_put(f6i->fib6_nh.nh_lwtstate); - - if (f6i->fib6_nh.nh_dev) - dev_put(f6i->fib6_nh.nh_dev); + fib6_nh_release(&f6i->fib6_nh); ip_fib_metrics_put(f6i->fib6_metrics); @@ -2297,6 +2294,7 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) { struct fib6_info *rt = v; struct ipv6_route_iter *iter = seq->private; + unsigned int flags = rt->fib6_flags; const struct net_device *dev; seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); @@ -2306,15 +2304,17 @@ static int ipv6_route_seq_show(struct seq_file *seq, void *v) #else seq_puts(seq, "00000000000000000000000000000000 00 "); #endif - if (rt->fib6_flags & RTF_GATEWAY) - seq_printf(seq, "%pi6", &rt->fib6_nh.nh_gw); - else + if (rt->fib6_nh.fib_nh_has_gw) { + flags |= RTF_GATEWAY; + seq_printf(seq, "%pi6", &rt->fib6_nh.fib_nh_gw6); + } else { seq_puts(seq, "00000000000000000000000000000000"); + } - dev = rt->fib6_nh.nh_dev; + dev = rt->fib6_nh.fib_nh_dev; seq_printf(seq, " %08x %08x %08x %08x %8s\n", rt->fib6_metric, atomic_read(&rt->fib6_ref), 0, - rt->fib6_flags, dev ? dev->name : ""); + flags, dev ? dev->name : ""); iter->w.leaf = NULL; return 0; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 659ecf4e4b3c..66c8b294e02b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1276,8 +1276,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { - neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, - rt->fib6_nh.nh_dev, NULL, + neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, + rt->fib6_nh.fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, @@ -1306,8 +1306,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) return; } - neigh = ip6_neigh_lookup(&rt->fib6_nh.nh_gw, - rt->fib6_nh.nh_dev, NULL, + neigh = ip6_neigh_lookup(&rt->fib6_nh.fib_nh_gw6, + rt->fib6_nh.fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 61f231f58da5..e0ee30cbd079 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -441,14 +441,14 @@ struct fib6_info *fib6_multipath_select(const struct net *net, if (!fl6->mp_hash) fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL); - if (fl6->mp_hash <= atomic_read(&match->fib6_nh.nh_upper_bound)) + if (fl6->mp_hash <= atomic_read(&match->fib6_nh.fib_nh_upper_bound)) return match; list_for_each_entry_safe(sibling, next_sibling, &match->fib6_siblings, fib6_siblings) { int nh_upper_bound; - nh_upper_bound = atomic_read(&sibling->fib6_nh.nh_upper_bound); + nh_upper_bound = atomic_read(&sibling->fib6_nh.fib_nh_upper_bound); if (fl6->mp_hash > nh_upper_bound) continue; if (rt6_score_route(sibling, oif, strict) < 0) @@ -473,13 +473,13 @@ static inline struct fib6_info *rt6_device_match(struct net *net, struct fib6_info *sprt; if (!oif && ipv6_addr_any(saddr) && - !(rt->fib6_nh.nh_flags & RTNH_F_DEAD)) + !(rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD)) return rt; for (sprt = rt; sprt; sprt = rcu_dereference(sprt->fib6_next)) { - const struct net_device *dev = sprt->fib6_nh.nh_dev; + const struct net_device *dev = sprt->fib6_nh.fib_nh_dev; - if (sprt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (sprt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) continue; if (oif) { @@ -495,7 +495,7 @@ static inline struct fib6_info *rt6_device_match(struct net *net, if (oif && flags & RT6_LOOKUP_F_IFACE) return net->ipv6.fib6_null_entry; - return rt->fib6_nh.nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; + return rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD ? net->ipv6.fib6_null_entry : rt; } #ifdef CONFIG_IPV6_ROUTER_PREF @@ -533,11 +533,11 @@ static void rt6_probe(struct fib6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - if (!rt || !(rt->fib6_flags & RTF_GATEWAY)) + if (!rt || !rt->fib6_nh.fib_nh_has_gw) return; - nh_gw = &rt->fib6_nh.nh_gw; - dev = rt->fib6_nh.nh_dev; + nh_gw = &rt->fib6_nh.fib_nh_gw6; + dev = rt->fib6_nh.fib_nh_dev; rcu_read_lock_bh(); idev = __in6_dev_get(dev); neigh = __ipv6_neigh_lookup_noref(dev, nh_gw); @@ -582,7 +582,7 @@ static inline void rt6_probe(struct fib6_info *rt) */ static inline int rt6_check_dev(struct fib6_info *rt, int oif) { - const struct net_device *dev = rt->fib6_nh.nh_dev; + const struct net_device *dev = rt->fib6_nh.fib_nh_dev; if (!oif || dev->ifindex == oif) return 2; @@ -595,12 +595,12 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt) struct neighbour *neigh; if (rt->fib6_flags & RTF_NONEXTHOP || - !(rt->fib6_flags & RTF_GATEWAY)) + !rt->fib6_nh.fib_nh_has_gw) return RT6_NUD_SUCCEED; rcu_read_lock_bh(); - neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.nh_dev, - &rt->fib6_nh.nh_gw); + neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh.fib_nh_dev, + &rt->fib6_nh.fib_nh_gw6); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) @@ -639,21 +639,6 @@ static int rt6_score_route(struct fib6_info *rt, int oif, int strict) return m; } -/* called with rc_read_lock held */ -static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i) -{ - const struct net_device *dev = fib6_info_nh_dev(f6i); - bool rc = false; - - if (dev) { - const struct inet6_dev *idev = __in6_dev_get(dev); - - rc = !!idev->cnf.ignore_routes_with_linkdown; - } - - return rc; -} - static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, int *mpri, struct fib6_info *match, bool *do_rr) @@ -661,11 +646,11 @@ static struct fib6_info *find_match(struct fib6_info *rt, int oif, int strict, int m; bool match_do_rr = false; - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) goto out; - if (fib6_ignore_linkdown(rt) && - rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && + if (ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev) && + rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && !(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE)) goto out; @@ -784,7 +769,7 @@ static struct fib6_info *rt6_select(struct net *net, struct fib6_node *fn, static bool rt6_is_gw_or_nonexthop(const struct fib6_info *rt) { - return (rt->fib6_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); + return (rt->fib6_flags & RTF_NONEXTHOP) || rt->fib6_nh.fib_nh_has_gw; } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -870,7 +855,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, /* called with rcu_lock held */ static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt) { - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) { /* for copies of local routes, dst->dev needs to be the @@ -964,8 +949,8 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort) rt->dst.input = ip6_forward; } - if (ort->fib6_nh.nh_lwtstate) { - rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.nh_lwtstate); + if (ort->fib6_nh.fib_nh_lws) { + rt->dst.lwtstate = lwtstate_get(ort->fib6_nh.fib_nh_lws); lwtunnel_set_redirect(&rt->dst); } @@ -989,8 +974,11 @@ static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort) rt->rt6i_dst = ort->fib6_dst; rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL; - rt->rt6i_gateway = ort->fib6_nh.nh_gw; rt->rt6i_flags = ort->fib6_flags; + if (ort->fib6_nh.fib_nh_has_gw) { + rt->rt6i_gateway = ort->fib6_nh.fib_nh_gw6; + rt->rt6i_flags |= RTF_GATEWAY; + } rt6_set_from(rt, ort); #ifdef CONFIG_IPV6_SUBTREES rt->rt6i_src = ort->fib6_src; @@ -1035,7 +1023,7 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt) static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) { unsigned short flags = fib6_info_dst_flags(rt); - struct net_device *dev = rt->fib6_nh.nh_dev; + struct net_device *dev = rt->fib6_nh.fib_nh_dev; struct rt6_info *nrt; if (!fib6_info_hold_safe(rt)) @@ -1419,7 +1407,7 @@ static unsigned int fib6_mtu(const struct fib6_info *rt) mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); - return mtu - lwtunnel_headroom(rt->fib6_nh.nh_lwtstate, mtu); + return mtu - lwtunnel_headroom(rt->fib6_nh.fib_nh_lws, mtu); } static int rt6_insert_exception(struct rt6_info *nrt, @@ -1872,7 +1860,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, rcu_read_unlock(); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && - !(f6i->fib6_flags & RTF_GATEWAY))) { + !f6i->fib6_nh.fib_nh_has_gw)) { /* Create a RTF_CACHE clone which will not be * owned by the fib6 tree. It is for the special case where * the daddr in the skb during the neighbor look-up is different @@ -2436,22 +2424,22 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) continue; if (fib6_check_expired(rt)) continue; if (rt->fib6_flags & RTF_REJECT) break; - if (!(rt->fib6_flags & RTF_GATEWAY)) + if (!rt->fib6_nh.fib_nh_has_gw) continue; - if (fl6->flowi6_oif != rt->fib6_nh.nh_dev->ifindex) + if (fl6->flowi6_oif != rt->fib6_nh.fib_nh_dev->ifindex) continue; /* rt_cache's gateway might be different from its 'parent' * in the case of an ip redirect. * So we keep searching in the exception table if the gateway * is different. */ - if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.nh_gw)) { + if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh.fib_nh_gw6)) { rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); @@ -2898,17 +2886,143 @@ out: return err; } +static bool fib6_is_reject(u32 flags, struct net_device *dev, int addr_type) +{ + if ((flags & RTF_REJECT) || + (dev && (dev->flags & IFF_LOOPBACK) && + !(addr_type & IPV6_ADDR_LOOPBACK) && + !(flags & RTF_LOCAL))) + return true; + + return false; +} + +int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, + struct fib6_config *cfg, gfp_t gfp_flags, + struct netlink_ext_ack *extack) +{ + struct net_device *dev = NULL; + struct inet6_dev *idev = NULL; + int addr_type; + int err; + + fib6_nh->fib_nh_family = AF_INET6; + + err = -ENODEV; + if (cfg->fc_ifindex) { + dev = dev_get_by_index(net, cfg->fc_ifindex); + if (!dev) + goto out; + idev = in6_dev_get(dev); + if (!idev) + goto out; + } + + if (cfg->fc_flags & RTNH_F_ONLINK) { + if (!dev) { + NL_SET_ERR_MSG(extack, + "Nexthop device required for onlink"); + goto out; + } + + if (!(dev->flags & IFF_UP)) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + + fib6_nh->fib_nh_flags |= RTNH_F_ONLINK; + } + + fib6_nh->fib_nh_weight = 1; + + /* We cannot add true routes via loopback here, + * they would result in kernel looping; promote them to reject routes + */ + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, dev, addr_type)) { + /* hold loopback dev/idev if we haven't done so. */ + if (dev != net->loopback_dev) { + if (dev) { + dev_put(dev); + in6_dev_put(idev); + } + dev = net->loopback_dev; + dev_hold(dev); + idev = in6_dev_get(dev); + if (!idev) { + err = -ENODEV; + goto out; + } + } + goto set_dev; + } + + if (cfg->fc_flags & RTF_GATEWAY) { + err = ip6_validate_gw(net, cfg, &dev, &idev, extack); + if (err) + goto out; + + fib6_nh->fib_nh_gw6 = cfg->fc_gateway; + fib6_nh->fib_nh_has_gw = 1; + } + + err = -ENODEV; + if (!dev) + goto out; + + if (idev->cnf.disable_ipv6) { + NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); + err = -EACCES; + goto out; + } + + if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) { + NL_SET_ERR_MSG(extack, "Nexthop device is not up"); + err = -ENETDOWN; + goto out; + } + + if (!(cfg->fc_flags & (RTF_LOCAL | RTF_ANYCAST)) && + !netif_carrier_ok(dev)) + fib6_nh->fib_nh_flags |= RTNH_F_LINKDOWN; + + err = fib_nh_common_init(&fib6_nh->nh_common, cfg->fc_encap, + cfg->fc_encap_type, cfg, gfp_flags, extack); + if (err) + goto out; +set_dev: + fib6_nh->fib_nh_dev = dev; + fib6_nh->fib_nh_oif = dev->ifindex; + err = 0; +out: + if (idev) + in6_dev_put(idev); + + if (err) { + lwtstate_put(fib6_nh->fib_nh_lws); + fib6_nh->fib_nh_lws = NULL; + if (dev) + dev_put(dev); + } + + return err; +} + +void fib6_nh_release(struct fib6_nh *fib6_nh) +{ + fib_nh_common_release(&fib6_nh->nh_common); +} + static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, gfp_t gfp_flags, struct netlink_ext_ack *extack) { struct net *net = cfg->fc_nlinfo.nl_net; struct fib6_info *rt = NULL; - struct net_device *dev = NULL; - struct inet6_dev *idev = NULL; struct fib6_table *table; - int addr_type; int err = -EINVAL; + int addr_type; /* RTF_PCPU is an internal flag; can not be set by userspace */ if (cfg->fc_flags & RTF_PCPU) { @@ -2942,30 +3056,6 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } #endif - if (cfg->fc_ifindex) { - err = -ENODEV; - dev = dev_get_by_index(net, cfg->fc_ifindex); - if (!dev) - goto out; - idev = in6_dev_get(dev); - if (!idev) - goto out; - } - - if (cfg->fc_flags & RTNH_F_ONLINK) { - if (!dev) { - NL_SET_ERR_MSG(extack, - "Nexthop device required for onlink"); - err = -ENODEV; - goto out; - } - - if (!(dev->flags & IFF_UP)) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } - } err = -ENOBUFS; if (cfg->fc_nlinfo.nlh && @@ -3009,18 +3099,10 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, cfg->fc_protocol = RTPROT_BOOT; rt->fib6_protocol = cfg->fc_protocol; - addr_type = ipv6_addr_type(&cfg->fc_dst); - - if (cfg->fc_encap) { - struct lwtunnel_state *lwtstate; - - err = lwtunnel_build_state(cfg->fc_encap_type, - cfg->fc_encap, AF_INET6, cfg, - &lwtstate, extack); - if (err) - goto out; - rt->fib6_nh.nh_lwtstate = lwtstate_get(lwtstate); - } + rt->fib6_table = table; + rt->fib6_metric = cfg->fc_metric; + rt->fib6_type = cfg->fc_type; + rt->fib6_flags = cfg->fc_flags & ~RTF_GATEWAY; ipv6_addr_prefix(&rt->fib6_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->fib6_dst.plen = cfg->fc_dst_len; @@ -3031,62 +3113,20 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, ipv6_addr_prefix(&rt->fib6_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->fib6_src.plen = cfg->fc_src_len; #endif - - rt->fib6_metric = cfg->fc_metric; - rt->fib6_nh.nh_weight = 1; - - rt->fib6_type = cfg->fc_type; + err = fib6_nh_init(net, &rt->fib6_nh, cfg, gfp_flags, extack); + if (err) + goto out; /* We cannot add true routes via loopback here, - they would result in kernel looping; promote them to reject routes + * they would result in kernel looping; promote them to reject routes */ - if ((cfg->fc_flags & RTF_REJECT) || - (dev && (dev->flags & IFF_LOOPBACK) && - !(addr_type & IPV6_ADDR_LOOPBACK) && - !(cfg->fc_flags & RTF_LOCAL))) { - /* hold loopback dev/idev if we haven't done so. */ - if (dev != net->loopback_dev) { - if (dev) { - dev_put(dev); - in6_dev_put(idev); - } - dev = net->loopback_dev; - dev_hold(dev); - idev = in6_dev_get(dev); - if (!idev) { - err = -ENODEV; - goto out; - } - } - rt->fib6_flags = RTF_REJECT|RTF_NONEXTHOP; - goto install_route; - } - - if (cfg->fc_flags & RTF_GATEWAY) { - err = ip6_validate_gw(net, cfg, &dev, &idev, extack); - if (err) - goto out; - - rt->fib6_nh.nh_gw = cfg->fc_gateway; - } - - err = -ENODEV; - if (!dev) - goto out; - - if (idev->cnf.disable_ipv6) { - NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device"); - err = -EACCES; - goto out; - } - - if (!(dev->flags & IFF_UP) && !cfg->fc_ignore_dev_down) { - NL_SET_ERR_MSG(extack, "Nexthop device is not up"); - err = -ENETDOWN; - goto out; - } + addr_type = ipv6_addr_type(&cfg->fc_dst); + if (fib6_is_reject(cfg->fc_flags, rt->fib6_nh.fib_nh_dev, addr_type)) + rt->fib6_flags = RTF_REJECT | RTF_NONEXTHOP; if (!ipv6_addr_any(&cfg->fc_prefsrc)) { + struct net_device *dev = fib6_info_nh_dev(rt); + if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) { NL_SET_ERR_MSG(extack, "Invalid source address"); err = -EINVAL; @@ -3097,26 +3137,8 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, } else rt->fib6_prefsrc.plen = 0; - rt->fib6_flags = cfg->fc_flags; - -install_route: - if (!(rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) && - !netif_carrier_ok(dev)) - rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; - rt->fib6_nh.nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK); - rt->fib6_nh.nh_dev = dev; - rt->fib6_table = table; - - if (idev) - in6_dev_put(idev); - return rt; out: - if (dev) - dev_put(dev); - if (idev) - in6_dev_put(idev); - fib6_info_release(rt); return ERR_PTR(err); } @@ -3257,6 +3279,8 @@ static int ip6_route_del(struct fib6_config *cfg, if (fn) { for_each_fib6_node_rt_rcu(fn) { + struct fib6_nh *nh; + if (cfg->fc_flags & RTF_CACHE) { int rc; @@ -3271,12 +3295,14 @@ static int ip6_route_del(struct fib6_config *cfg, } continue; } + + nh = &rt->fib6_nh; if (cfg->fc_ifindex && - (!rt->fib6_nh.nh_dev || - rt->fib6_nh.nh_dev->ifindex != cfg->fc_ifindex)) + (!nh->fib_nh_dev || + nh->fib_nh_dev->ifindex != cfg->fc_ifindex)) continue; if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->fib6_nh.nh_gw)) + !ipv6_addr_equal(&cfg->fc_gateway, &nh->fib_nh_gw6)) continue; if (cfg->fc_metric && cfg->fc_metric != rt->fib6_metric) continue; @@ -3447,11 +3473,12 @@ static struct fib6_info *rt6_get_route_info(struct net *net, goto out; for_each_fib6_node_rt_rcu(fn) { - if (rt->fib6_nh.nh_dev->ifindex != ifindex) + if (rt->fib6_nh.fib_nh_dev->ifindex != ifindex) continue; - if ((rt->fib6_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) + if (!(rt->fib6_flags & RTF_ROUTEINFO) || + !rt->fib6_nh.fib_nh_has_gw) continue; - if (!ipv6_addr_equal(&rt->fib6_nh.nh_gw, gwaddr)) + if (!ipv6_addr_equal(&rt->fib6_nh.fib_nh_gw6, gwaddr)) continue; if (!fib6_info_hold_safe(rt)) continue; @@ -3509,9 +3536,11 @@ struct fib6_info *rt6_get_dflt_router(struct net *net, rcu_read_lock(); for_each_fib6_node_rt_rcu(&table->tb6_root) { - if (dev == rt->fib6_nh.nh_dev && + struct fib6_nh *nh = &rt->fib6_nh; + + if (dev == nh->fib_nh_dev && ((rt->fib6_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && - ipv6_addr_equal(&rt->fib6_nh.nh_gw, addr)) + ipv6_addr_equal(&nh->fib_nh_gw6, addr)) break; } if (rt && !fib6_info_hold_safe(rt)) @@ -3748,7 +3777,7 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg) struct net *net = ((struct arg_dev_net_ip *)arg)->net; struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr; - if (((void *)rt->fib6_nh.nh_dev == dev || !dev) && + if (((void *)rt->fib6_nh.fib_nh_dev == dev || !dev) && rt != net->ipv6.fib6_null_entry && ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) { spin_lock_bh(&rt6_exception_lock); @@ -3770,7 +3799,7 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) fib6_clean_all(net, fib6_remove_prefsrc, &adni); } -#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT) /* Remove routers and update dst entries when gateway turn into host. */ static int fib6_clean_tohost(struct fib6_info *rt, void *arg) @@ -3778,7 +3807,8 @@ static int fib6_clean_tohost(struct fib6_info *rt, void *arg) struct in6_addr *gateway = (struct in6_addr *)arg; if (((rt->fib6_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && - ipv6_addr_equal(gateway, &rt->fib6_nh.nh_gw)) { + rt->fib6_nh.fib_nh_has_gw && + ipv6_addr_equal(gateway, &rt->fib6_nh.fib_nh_gw6)) { return -1; } @@ -3826,9 +3856,9 @@ static struct fib6_info *rt6_multipath_first_sibling(const struct fib6_info *rt) static bool rt6_is_dead(const struct fib6_info *rt) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD || - (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN && - fib6_ignore_linkdown(rt))) + if (rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD || + (rt->fib6_nh.fib_nh_flags & RTNH_F_LINKDOWN && + ip6_ignore_linkdown(rt->fib6_nh.fib_nh_dev))) return true; return false; @@ -3840,11 +3870,11 @@ static int rt6_multipath_total_weight(const struct fib6_info *rt) int total = 0; if (!rt6_is_dead(rt)) - total += rt->fib6_nh.nh_weight; + total += rt->fib6_nh.fib_nh_weight; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (!rt6_is_dead(iter)) - total += iter->fib6_nh.nh_weight; + total += iter->fib6_nh.fib_nh_weight; } return total; @@ -3855,11 +3885,11 @@ static void rt6_upper_bound_set(struct fib6_info *rt, int *weight, int total) int upper_bound = -1; if (!rt6_is_dead(rt)) { - *weight += rt->fib6_nh.nh_weight; + *weight += rt->fib6_nh.fib_nh_weight; upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31, total) - 1; } - atomic_set(&rt->fib6_nh.nh_upper_bound, upper_bound); + atomic_set(&rt->fib6_nh.fib_nh_upper_bound, upper_bound); } static void rt6_multipath_upper_bound_set(struct fib6_info *rt, int total) @@ -3902,8 +3932,9 @@ static int fib6_ifup(struct fib6_info *rt, void *p_arg) const struct arg_netdev_event *arg = p_arg; struct net *net = dev_net(arg->dev); - if (rt != net->ipv6.fib6_null_entry && rt->fib6_nh.nh_dev == arg->dev) { - rt->fib6_nh.nh_flags &= ~arg->nh_flags; + if (rt != net->ipv6.fib6_null_entry && + rt->fib6_nh.fib_nh_dev == arg->dev) { + rt->fib6_nh.fib_nh_flags &= ~arg->nh_flags; fib6_update_sernum_upto_root(net, rt); rt6_multipath_rebalance(rt); } @@ -3931,10 +3962,10 @@ static bool rt6_multipath_uses_dev(const struct fib6_info *rt, { struct fib6_info *iter; - if (rt->fib6_nh.nh_dev == dev) + if (rt->fib6_nh.fib_nh_dev == dev) return true; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) + if (iter->fib6_nh.fib_nh_dev == dev) return true; return false; @@ -3955,12 +3986,12 @@ static unsigned int rt6_multipath_dead_count(const struct fib6_info *rt, struct fib6_info *iter; unsigned int dead = 0; - if (rt->fib6_nh.nh_dev == down_dev || - rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (rt->fib6_nh.fib_nh_dev == down_dev || + rt->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == down_dev || - iter->fib6_nh.nh_flags & RTNH_F_DEAD) + if (iter->fib6_nh.fib_nh_dev == down_dev || + iter->fib6_nh.fib_nh_flags & RTNH_F_DEAD) dead++; return dead; @@ -3972,11 +4003,11 @@ static void rt6_multipath_nh_flags_set(struct fib6_info *rt, { struct fib6_info *iter; - if (rt->fib6_nh.nh_dev == dev) - rt->fib6_nh.nh_flags |= nh_flags; + if (rt->fib6_nh.fib_nh_dev == dev) + rt->fib6_nh.fib_nh_flags |= nh_flags; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) - if (iter->fib6_nh.nh_dev == dev) - iter->fib6_nh.nh_flags |= nh_flags; + if (iter->fib6_nh.fib_nh_dev == dev) + iter->fib6_nh.fib_nh_flags |= nh_flags; } /* called with write lock held for table with rt */ @@ -3991,12 +4022,12 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) switch (arg->event) { case NETDEV_UNREGISTER: - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; case NETDEV_DOWN: if (rt->should_flush) return -1; if (!rt->fib6_nsiblings) - return rt->fib6_nh.nh_dev == dev ? -1 : 0; + return rt->fib6_nh.fib_nh_dev == dev ? -1 : 0; if (rt6_multipath_uses_dev(rt, dev)) { unsigned int count; @@ -4012,10 +4043,10 @@ static int fib6_ifdown(struct fib6_info *rt, void *p_arg) } return -2; case NETDEV_CHANGE: - if (rt->fib6_nh.nh_dev != dev || + if (rt->fib6_nh.fib_nh_dev != dev || rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) break; - rt->fib6_nh.nh_flags |= RTNH_F_LINKDOWN; + rt->fib6_nh.fib_nh_flags |= RTNH_F_LINKDOWN; rt6_multipath_rebalance(rt); break; } @@ -4071,7 +4102,7 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg) Since RFC 1981 doesn't include administrative MTU increase update PMTU increase is a MUST. (i.e. jumbo frame) */ - if (rt->fib6_nh.nh_dev == arg->dev && + if (rt->fib6_nh.fib_nh_dev == arg->dev && !fib6_metric_locked(rt, RTAX_MTU)) { u32 mtu = rt->fib6_pmtu; @@ -4362,7 +4393,7 @@ static int ip6_route_multipath_add(struct fib6_config *cfg, goto cleanup; } - rt->fib6_nh.nh_weight = rtnh->rtnh_hops + 1; + rt->fib6_nh.fib_nh_weight = rtnh->rtnh_hops + 1; err = ip6_route_info_append(info->nl_net, &rt6_nh_list, rt, &r_cfg); @@ -4529,7 +4560,7 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */ + NLA_ALIGN(sizeof(struct rtnexthop)) + nla_total_size(16) /* RTA_GATEWAY */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate); + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws); nexthop_len *= rt->fib6_nsiblings; } @@ -4547,41 +4578,41 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) + nla_total_size(sizeof(struct rta_cacheinfo)) + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + nla_total_size(1) /* RTA_PREF */ - + lwtunnel_get_encap_size(rt->fib6_nh.nh_lwtstate) + + lwtunnel_get_encap_size(rt->fib6_nh.fib_nh_lws) + nexthop_len; } -static int rt6_nexthop_info(struct sk_buff *skb, struct fib6_info *rt, +static int rt6_nexthop_info(struct sk_buff *skb, const struct fib6_nh *fib6_nh, unsigned int *flags, bool skip_oif) { - if (rt->fib6_nh.nh_flags & RTNH_F_DEAD) + if (fib6_nh->fib_nh_flags & RTNH_F_DEAD) *flags |= RTNH_F_DEAD; - if (rt->fib6_nh.nh_flags & RTNH_F_LINKDOWN) { + if (fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN) { *flags |= RTNH_F_LINKDOWN; rcu_read_lock(); - if (fib6_ignore_linkdown(rt)) + if (ip6_ignore_linkdown(fib6_nh->fib_nh_dev)) *flags |= RTNH_F_DEAD; rcu_read_unlock(); } - if (rt->fib6_flags & RTF_GATEWAY) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->fib6_nh.nh_gw) < 0) + if (fib6_nh->fib_nh_has_gw) { + if (nla_put_in6_addr(skb, RTA_GATEWAY, &fib6_nh->fib_nh_gw6) < 0) goto nla_put_failure; } - *flags |= (rt->fib6_nh.nh_flags & RTNH_F_ONLINK); - if (rt->fib6_nh.nh_flags & RTNH_F_OFFLOAD) + *flags |= (fib6_nh->fib_nh_flags & RTNH_F_ONLINK); + if (fib6_nh->fib_nh_flags & RTNH_F_OFFLOAD) *flags |= RTNH_F_OFFLOAD; /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && rt->fib6_nh.nh_dev && - nla_put_u32(skb, RTA_OIF, rt->fib6_nh.nh_dev->ifindex)) + if (!skip_oif && fib6_nh->fib_nh_dev && + nla_put_u32(skb, RTA_OIF, fib6_nh->fib_nh_dev->ifindex)) goto nla_put_failure; - if (rt->fib6_nh.nh_lwtstate && - lwtunnel_fill_encap(skb, rt->fib6_nh.nh_lwtstate) < 0) + if (fib6_nh->fib_nh_lws && + lwtunnel_fill_encap(skb, fib6_nh->fib_nh_lws) < 0) goto nla_put_failure; return 0; @@ -4591,9 +4622,9 @@ nla_put_failure: } /* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) +static int rt6_add_nexthop(struct sk_buff *skb, const struct fib6_nh *fib6_nh) { - const struct net_device *dev = rt->fib6_nh.nh_dev; + const struct net_device *dev = fib6_nh->fib_nh_dev; struct rtnexthop *rtnh; unsigned int flags = 0; @@ -4601,10 +4632,10 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct fib6_info *rt) if (!rtnh) goto nla_put_failure; - rtnh->rtnh_hops = rt->fib6_nh.nh_weight - 1; + rtnh->rtnh_hops = fib6_nh->fib_nh_weight - 1; rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; - if (rt6_nexthop_info(skb, rt, &flags, true) < 0) + if (rt6_nexthop_info(skb, fib6_nh, &flags, true) < 0) goto nla_put_failure; rtnh->rtnh_flags = flags; @@ -4734,18 +4765,19 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (!mp) goto nla_put_failure; - if (rt6_add_nexthop(skb, rt) < 0) + if (rt6_add_nexthop(skb, &rt->fib6_nh) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { - if (rt6_add_nexthop(skb, sibling) < 0) + if (rt6_add_nexthop(skb, &sibling->fib6_nh) < 0) goto nla_put_failure; } nla_nest_end(skb, mp); } else { - if (rt6_nexthop_info(skb, rt, &rtm->rtm_flags, false) < 0) + if (rt6_nexthop_info(skb, &rt->fib6_nh, &rtm->rtm_flags, + false) < 0) goto nla_put_failure; } @@ -4772,7 +4804,7 @@ nla_put_failure: static bool fib6_info_uses_dev(const struct fib6_info *f6i, const struct net_device *dev) { - if (f6i->fib6_nh.nh_dev == dev) + if (f6i->fib6_nh.fib_nh_dev == dev) return true; if (f6i->fib6_nsiblings) { @@ -4780,7 +4812,7 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i, list_for_each_entry_safe(sibling, next_sibling, &f6i->fib6_siblings, fib6_siblings) { - if (sibling->fib6_nh.nh_dev == dev) + if (sibling->fib6_nh.fib_nh_dev == dev) return true; } } @@ -5065,7 +5097,7 @@ static int ip6_route_dev_notify(struct notifier_block *this, return NOTIFY_OK; if (event == NETDEV_REGISTER) { - net->ipv6.fib6_null_entry->fib6_nh.nh_dev = dev; + net->ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = dev; net->ipv6.ip6_null_entry->dst.dev = dev; net->ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES @@ -5400,7 +5432,7 @@ void __init ip6_route_init_special_entries(void) /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ - init_net.ipv6.fib6_null_entry->fib6_nh.nh_dev = init_net.loopback_dev; + init_net.ipv6.fib6_null_entry->fib6_nh.fib_nh_dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES