diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 5264c1a49d86..d536a9340cd5 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1750,21 +1750,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, } min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len - + VXLAN_HLEN + iphdr_len - + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); + + VXLAN_HLEN + iphdr_len; /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); if (unlikely(err)) - goto out_free; - - skb = vlan_hwaccel_push_inside(skb); - if (WARN_ON(!skb)) - return -ENOMEM; + return err; err = iptunnel_handle_offloads(skb, type); if (err) - goto out_free; + return err; vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = VXLAN_HF_VNI; @@ -1788,19 +1783,16 @@ static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst, if (vxflags & VXLAN_F_GPE) { err = vxlan_build_gpe_hdr(vxh, vxflags, skb->protocol); if (err < 0) - goto out_free; + return err; inner_protocol = skb->protocol; } skb_set_inner_protocol(skb, inner_protocol); return 0; - -out_free: - kfree_skb(skb); - return err; } -static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, +static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct net_device *dev, + struct vxlan_sock *sock4, struct sk_buff *skb, int oif, u8 tos, __be32 daddr, __be32 *saddr, struct dst_cache *dst_cache, @@ -1810,6 +1802,9 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, struct rtable *rt = NULL; struct flowi4 fl4; + if (!sock4) + return ERR_PTR(-EIO); + if (tos && !info) use_cache = false; if (use_cache) { @@ -1827,16 +1822,27 @@ static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan, fl4.saddr = *saddr; rt = ip_route_output_key(vxlan->net, &fl4); - if (!IS_ERR(rt)) { + if (likely(!IS_ERR(rt))) { + if (rt->dst.dev == dev) { + netdev_dbg(dev, "circular route to %pI4\n", &daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } + *saddr = fl4.saddr; if (use_cache) dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); + } else { + netdev_dbg(dev, "no route to %pI4\n", &daddr); + return ERR_PTR(-ENETUNREACH); } return rt; } #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, + struct net_device *dev, + struct vxlan_sock *sock6, struct sk_buff *skb, int oif, u8 tos, __be32 label, const struct in6_addr *daddr, @@ -1844,7 +1850,6 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, struct dst_cache *dst_cache, const struct ip_tunnel_info *info) { - struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); bool use_cache = ip_tunnel_dst_cache_usable(skb, info); struct dst_entry *ndst; struct flowi6 fl6; @@ -1872,8 +1877,16 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, err = ipv6_stub->ipv6_dst_lookup(vxlan->net, sock6->sock->sk, &ndst, &fl6); - if (err < 0) - return ERR_PTR(err); + if (unlikely(err < 0)) { + netdev_dbg(dev, "no route to %pI6\n", daddr); + return ERR_PTR(-ENETUNREACH); + } + + if (unlikely(ndst->dev == dev)) { + netdev_dbg(dev, "circular route to %pI6\n", daddr); + dst_release(ndst); + return ERR_PTR(-ELOOP); + } *saddr = fl6.saddr; if (use_cache) @@ -1927,23 +1940,55 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, } } +static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, + struct vxlan_dev *vxlan, union vxlan_addr *daddr, + __be32 dst_port, __be32 vni, struct dst_entry *dst, + u32 rt_flags) +{ +#if IS_ENABLED(CONFIG_IPV6) + /* IPv6 rt-flags are checked against RTF_LOCAL, but the value of + * RTF_LOCAL is equal to RTCF_LOCAL. So to keep code simple + * we can use RTCF_LOCAL which works for ipv4 and ipv6 route entry. + */ + BUILD_BUG_ON(RTCF_LOCAL != RTF_LOCAL); +#endif + /* Bypass encapsulation if the destination is local */ + if (rt_flags & RTCF_LOCAL && + !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { + struct vxlan_dev *dst_vxlan; + + dst_release(dst); + dst_vxlan = vxlan_find_vni(vxlan->net, vni, + daddr->sa.sa_family, dst_port, + vxlan->flags); + if (!dst_vxlan) { + dev->stats.tx_errors++; + kfree_skb(skb); + + return -ENOENT; + } + vxlan_encap_bypass(skb, vxlan, dst_vxlan); + return 1; + } + + return 0; +} + static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { struct dst_cache *dst_cache; struct ip_tunnel_info *info; struct vxlan_dev *vxlan = netdev_priv(dev); - struct sock *sk; - struct rtable *rt = NULL; - const struct iphdr *old_iph; + const struct iphdr *old_iph = ip_hdr(skb); union vxlan_addr *dst; union vxlan_addr remote_ip, local_ip; union vxlan_addr *src; struct vxlan_metadata _md; struct vxlan_metadata *md = &_md; __be16 src_port = 0, dst_port; + struct dst_entry *ndst = NULL; __be32 vni, label; - __be16 df = 0; __u8 tos, ttl; int err; u32 flags = vxlan->flags; @@ -1953,19 +1998,40 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, info = skb_tunnel_info(skb); if (rdst) { + dst = &rdst->remote_ip; + if (vxlan_addr_any(dst)) { + if (did_rsc) { + /* short-circuited back to local bridge */ + vxlan_encap_bypass(skb, vxlan, vxlan); + return; + } + goto drop; + } + dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; vni = rdst->remote_vni; - dst = &rdst->remote_ip; src = &vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; + md->gbp = skb->mark; + ttl = vxlan->cfg.ttl; + if (!ttl && vxlan_addr_multicast(dst)) + ttl = 1; + + tos = vxlan->cfg.tos; + if (tos == 1) + tos = ip_tunnel_get_dsfield(old_iph, skb); + + if (dst->sa.sa_family == AF_INET) + udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); + else + udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); + label = vxlan->cfg.label; } else { if (!info) { WARN_ONCE(1, "%s: Missing encapsulation instructions\n", dev->name); goto drop; } - dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = tunnel_id_to_key32(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; @@ -1975,182 +2041,108 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, local_ip.sin6.sin6_addr = info->key.u.ipv6.src; } dst = &remote_ip; + dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; + vni = tunnel_id_to_key32(info->key.tun_id); src = &local_ip; dst_cache = &info->dst_cache; - } - - if (vxlan_addr_any(dst)) { - if (did_rsc) { - /* short-circuited back to local bridge */ - vxlan_encap_bypass(skb, vxlan, vxlan); - return; - } - goto drop; - } - - old_iph = ip_hdr(skb); - - ttl = vxlan->cfg.ttl; - if (!ttl && vxlan_addr_multicast(dst)) - ttl = 1; - - tos = vxlan->cfg.tos; - if (tos == 1) - tos = ip_tunnel_get_dsfield(old_iph, skb); - - label = vxlan->cfg.label; - src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, - vxlan->cfg.port_max, true); - - if (info) { + if (info->options_len) + md = ip_tunnel_info_opts(info); ttl = info->key.ttl; tos = info->key.tos; label = info->key.label; udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); - - if (info->options_len) - md = ip_tunnel_info_opts(info); - } else { - md->gbp = skb->mark; } + src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min, + vxlan->cfg.port_max, true); if (dst->sa.sa_family == AF_INET) { struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); + struct rtable *rt; + __be16 df = 0; - if (!sock4) - goto drop; - sk = sock4->sock->sk; - - rt = vxlan_get_route(vxlan, skb, + rt = vxlan_get_route(vxlan, dev, sock4, skb, rdst ? rdst->remote_ifindex : 0, tos, dst->sin.sin_addr.s_addr, &src->sin.sin_addr.s_addr, dst_cache, info); - if (IS_ERR(rt)) { - netdev_dbg(dev, "no route to %pI4\n", - &dst->sin.sin_addr.s_addr); - dev->stats.tx_carrier_errors++; + if (IS_ERR(rt)) goto tx_error; - } - - if (rt->dst.dev == dev) { - netdev_dbg(dev, "circular route to %pI4\n", - &dst->sin.sin_addr.s_addr); - dev->stats.collisions++; - goto rt_tx_error; - } /* Bypass encapsulation if the destination is local */ - if (!info && rt->rt_flags & RTCF_LOCAL && - !(rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { - struct vxlan_dev *dst_vxlan; - - ip_rt_put(rt); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port, - vxlan->flags); - if (!dst_vxlan) - goto tx_error; - vxlan_encap_bypass(skb, vxlan, dst_vxlan); - return; + if (!info) { + err = encap_bypass_if_local(skb, dev, vxlan, dst, + dst_port, vni, &rt->dst, + rt->rt_flags); + if (err) + return; + } else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) { + df = htons(IP_DF); } - if (!info) - udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX); - else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT) - df = htons(IP_DF); - + ndst = &rt->dst; tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip4_dst_hoplimit(&rt->dst); - err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr), + err = vxlan_build_skb(skb, ndst, sizeof(struct iphdr), vni, md, flags, udp_sum); if (err < 0) - goto xmit_tx_error; + goto tx_error; - udp_tunnel_xmit_skb(rt, sk, skb, src->sin.sin_addr.s_addr, + udp_tunnel_xmit_skb(rt, sock4->sock->sk, skb, src->sin.sin_addr.s_addr, dst->sin.sin_addr.s_addr, tos, ttl, df, src_port, dst_port, xnet, !udp_sum); #if IS_ENABLED(CONFIG_IPV6) } else { struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); - struct dst_entry *ndst; - u32 rt6i_flags; - if (!sock6) - goto drop; - sk = sock6->sock->sk; - - ndst = vxlan6_get_route(vxlan, skb, + ndst = vxlan6_get_route(vxlan, dev, sock6, skb, rdst ? rdst->remote_ifindex : 0, tos, label, &dst->sin6.sin6_addr, &src->sin6.sin6_addr, dst_cache, info); if (IS_ERR(ndst)) { - netdev_dbg(dev, "no route to %pI6\n", - &dst->sin6.sin6_addr); - dev->stats.tx_carrier_errors++; + ndst = NULL; goto tx_error; } - if (ndst->dev == dev) { - netdev_dbg(dev, "circular route to %pI6\n", - &dst->sin6.sin6_addr); - dst_release(ndst); - dev->stats.collisions++; - goto tx_error; + if (!info) { + u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; + + err = encap_bypass_if_local(skb, dev, vxlan, dst, + dst_port, vni, ndst, + rt6i_flags); + if (err) + return; } - /* Bypass encapsulation if the destination is local */ - rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags; - if (!info && rt6i_flags & RTF_LOCAL && - !(rt6i_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) { - struct vxlan_dev *dst_vxlan; - - dst_release(ndst); - dst_vxlan = vxlan_find_vni(vxlan->net, vni, - dst->sa.sa_family, dst_port, - vxlan->flags); - if (!dst_vxlan) - goto tx_error; - vxlan_encap_bypass(skb, vxlan, dst_vxlan); - return; - } - - if (!info) - udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX); - tos = ip_tunnel_ecn_encap(tos, old_iph, skb); ttl = ttl ? : ip6_dst_hoplimit(ndst); skb_scrub_packet(skb, xnet); err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr), vni, md, flags, udp_sum); - if (err < 0) { - dst_release(ndst); - dev->stats.tx_errors++; - return; - } - udp_tunnel6_xmit_skb(ndst, sk, skb, dev, + if (err < 0) + goto tx_error; + + udp_tunnel6_xmit_skb(ndst, sock6->sock->sk, skb, dev, &src->sin6.sin6_addr, &dst->sin6.sin6_addr, tos, ttl, label, src_port, dst_port, !udp_sum); #endif } - return; drop: dev->stats.tx_dropped++; - goto tx_free; - -xmit_tx_error: - /* skb is already freed. */ - skb = NULL; -rt_tx_error: - ip_rt_put(rt); -tx_error: - dev->stats.tx_errors++; -tx_free: dev_kfree_skb(skb); + return; + +tx_error: + if (err == -ELOOP) + dev->stats.collisions++; + else if (err == -ENETUNREACH) + dev->stats.tx_carrier_errors++; + dst_release(ndst); + dev->stats.tx_errors++; + kfree_skb(skb); } /* Transmit local packets over Vxlan @@ -2429,9 +2421,7 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) struct vxlan_sock *sock4 = rcu_dereference(vxlan->vn4_sock); struct rtable *rt; - if (!sock4) - return -EINVAL; - rt = vxlan_get_route(vxlan, skb, 0, info->key.tos, + rt = vxlan_get_route(vxlan, dev, sock4, skb, 0, info->key.tos, info->key.u.ipv4.dst, &info->key.u.ipv4.src, NULL, info); if (IS_ERR(rt)) @@ -2439,9 +2429,10 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) ip_rt_put(rt); } else { #if IS_ENABLED(CONFIG_IPV6) + struct vxlan_sock *sock6 = rcu_dereference(vxlan->vn6_sock); struct dst_entry *ndst; - ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos, + ndst = vxlan6_get_route(vxlan, dev, sock6, skb, 0, info->key.tos, info->key.label, &info->key.u.ipv6.dst, &info->key.u.ipv6.src, NULL, info); if (IS_ERR(ndst)) @@ -2529,10 +2520,8 @@ static void vxlan_setup(struct net_device *dev) dev->features |= NETIF_F_GSO_SOFTWARE; dev->vlan_features = dev->features; - dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; - dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX; netif_keep_dst(dev); dev->priv_flags |= IFF_NO_QUEUE; diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 3319d97d789d..8d5fcd6284ce 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -399,22 +399,6 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb) skb->vlan_tci = 0; return skb; } -/* - * vlan_hwaccel_push_inside - pushes vlan tag to the payload - * @skb: skbuff to tag - * - * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the - * VLAN tag from @skb->vlan_tci inside to the payload. - * - * Following the skb_unshare() example, in case of error, the calling function - * doesn't have to worry about freeing the original skb. - */ -static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb) -{ - if (skb_vlan_tag_present(skb)) - skb = __vlan_hwaccel_push_inside(skb); - return skb; -} /** * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 308adc4154f4..49a59202f85e 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -281,16 +281,6 @@ struct vxlan_dev { struct net_device *vxlan_dev_create(struct net *net, const char *name, u8 name_assign_type, struct vxlan_config *conf); -static inline __be16 vxlan_dev_dst_port(struct vxlan_dev *vxlan, - unsigned short family) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (family == AF_INET6) - return inet_sk(vxlan->vn6_sock->sock->sk)->inet_sport; -#endif - return inet_sk(vxlan->vn4_sock->sock->sk)->inet_sport; -} - static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, netdev_features_t features) {