From f35581d64e55fc65753a62957b3b98127d560d07 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 31 Jan 2017 22:59:51 -0800 Subject: [PATCH 1/5] ip_tunnels: new IP_TUNNEL_INFO_BRIDGE flag for ip_tunnel_info mode New ip_tunnel_info flag to represent bridged tunnel metadata. Used by bridge driver later in the series to pass per vlan dst metadata to bridge ports. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/net/ip_tunnels.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index 3d4ca4df1209..95056796657c 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -58,6 +58,7 @@ struct ip_tunnel_key { /* Flags for ip_tunnel_info mode. */ #define IP_TUNNEL_INFO_TX 0x01 /* represents tx tunnel parameters */ #define IP_TUNNEL_INFO_IPV6 0x02 /* key contains IPv6 addresses */ +#define IP_TUNNEL_INFO_BRIDGE 0x04 /* represents a bridged tunnel id */ /* Maximum tunnel options length. */ #define IP_TUNNEL_OPTS_MAX \ From 3ad7a4b141ebd6091494913672d7166d5c2764e4 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 31 Jan 2017 22:59:52 -0800 Subject: [PATCH 2/5] vxlan: support fdb and learning in COLLECT_METADATA mode Vxlan COLLECT_METADATA mode today solves the per-vni netdev scalability problem in l3 networks. It expects all forwarding information to be present in dst_metadata. This patch series enhances collect metadata mode to include the case where only vni is present in dst_metadata, and the vxlan driver can then use the rest of the forwarding information datbase to make forwarding decisions. There is no change to default COLLECT_METADATA behaviour. These changes only apply to COLLECT_METADATA when used with the bridging use-case with a special dst_metadata tunnel info flag (eg: where vxlan device is part of a bridge). For all this to work, the vxlan driver will need to now support a single fdb table hashed by mac + vni. This series essentially makes this happen. use-case and workflow: vxlan collect metadata device participates in bridging vlan to vn-segments. Bridge driver above the vxlan device, sends the vni corresponding to the vlan in the dst_metadata. vxlan driver will lookup forwarding database with (mac + vni) for the required remote destination information to forward the packet. Changes introduced by this patch: - allow learning and forwarding database state in vxlan netdev in COLLECT_METADATA mode. Current behaviour is not changed by default. tunnel info flag IP_TUNNEL_INFO_BRIDGE is used to support the new bridge friendly mode. - A single fdb table hashed by (mac, vni) to allow fdb entries with multiple vnis in the same fdb table - rx path already has the vni - tx path expects a vni in the packet with dst_metadata - prior to this series, fdb remote_dsts carried remote vni and the vxlan device carrying the fdb table represented the source vni. With the vxlan device now representing multiple vnis, this patch adds a src vni attribute to the fdb entry. The remote vni already uses NDA_VNI attribute. This patch introduces NDA_SRC_VNI netlink attribute to represent the src vni in a multi vni fdb table. iproute2 example (patched and pruned iproute2 output to just show relevant fdb entries): example shows same host mac learnt on two vni's. before (netdev per vni): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self after this patch with collect metadata in bridged mode (single netdev): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 196 +++++++++++++++++++++------------ include/uapi/linux/neighbour.h | 1 + 2 files changed, 126 insertions(+), 71 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 2e48ce22eabf..2374a75dcb55 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -75,6 +75,7 @@ struct vxlan_fdb { struct list_head remotes; u8 eth_addr[ETH_ALEN]; u16 state; /* see ndm_state */ + __be32 vni; u8 flags; /* see ndm_flags */ }; @@ -302,6 +303,10 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (rdst->remote_vni != vxlan->default_dst.remote_vni && nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni))) goto nla_put_failure; + if ((vxlan->flags & VXLAN_F_COLLECT_METADATA) && fdb->vni && + nla_put_u32(skb, NDA_SRC_VNI, + be32_to_cpu(fdb->vni))) + goto nla_put_failure; if (rdst->remote_ifindex && nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) goto nla_put_failure; @@ -400,34 +405,51 @@ static u32 eth_hash(const unsigned char *addr) return hash_64(value, FDB_HASH_BITS); } +static u32 eth_vni_hash(const unsigned char *addr, __be32 vni) +{ + /* use 1 byte of OUI and 3 bytes of NIC */ + u32 key = get_unaligned((u32 *)(addr + 2)); + + return jhash_2words(key, vni, vxlan_salt) & (FDB_HASH_SIZE - 1); +} + /* Hash chain to use given mac address */ static inline struct hlist_head *vxlan_fdb_head(struct vxlan_dev *vxlan, - const u8 *mac) + const u8 *mac, __be32 vni) { - return &vxlan->fdb_head[eth_hash(mac)]; + if (vxlan->flags & VXLAN_F_COLLECT_METADATA) + return &vxlan->fdb_head[eth_vni_hash(mac, vni)]; + else + return &vxlan->fdb_head[eth_hash(mac)]; } /* Look up Ethernet address in forwarding table */ static struct vxlan_fdb *__vxlan_find_mac(struct vxlan_dev *vxlan, - const u8 *mac) + const u8 *mac, __be32 vni) { - struct hlist_head *head = vxlan_fdb_head(vxlan, mac); + struct hlist_head *head = vxlan_fdb_head(vxlan, mac, vni); struct vxlan_fdb *f; hlist_for_each_entry_rcu(f, head, hlist) { - if (ether_addr_equal(mac, f->eth_addr)) - return f; + if (ether_addr_equal(mac, f->eth_addr)) { + if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { + if (vni == f->vni) + return f; + } else { + return f; + } + } } return NULL; } static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, - const u8 *mac) + const u8 *mac, __be32 vni) { struct vxlan_fdb *f; - f = __vxlan_find_mac(vxlan, mac); + f = __vxlan_find_mac(vxlan, mac, vni); if (f) f->used = jiffies; @@ -605,15 +627,15 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff) static int vxlan_fdb_create(struct vxlan_dev *vxlan, const u8 *mac, union vxlan_addr *ip, __u16 state, __u16 flags, - __be16 port, __be32 vni, __u32 ifindex, - __u8 ndm_flags) + __be16 port, __be32 src_vni, __be32 vni, + __u32 ifindex, __u8 ndm_flags) { struct vxlan_rdst *rd = NULL; struct vxlan_fdb *f; int notify = 0; int rc; - f = __vxlan_find_mac(vxlan, mac); + f = __vxlan_find_mac(vxlan, mac, src_vni); if (f) { if (flags & NLM_F_EXCL) { netdev_dbg(vxlan->dev, @@ -670,6 +692,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, f->state = state; f->flags = ndm_flags; f->updated = f->used = jiffies; + f->vni = src_vni; INIT_LIST_HEAD(&f->remotes); memcpy(f->eth_addr, mac, ETH_ALEN); @@ -681,7 +704,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan, ++vxlan->addrcnt; hlist_add_head_rcu(&f->hlist, - vxlan_fdb_head(vxlan, mac)); + vxlan_fdb_head(vxlan, mac, src_vni)); } if (notify) { @@ -718,8 +741,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f) } static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, - union vxlan_addr *ip, __be16 *port, __be32 *vni, - u32 *ifindex) + union vxlan_addr *ip, __be16 *port, __be32 *src_vni, + __be32 *vni, u32 *ifindex) { struct net *net = dev_net(vxlan->dev); int err; @@ -757,6 +780,14 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan, *vni = vxlan->default_dst.remote_vni; } + if (tb[NDA_SRC_VNI]) { + if (nla_len(tb[NDA_SRC_VNI]) != sizeof(u32)) + return -EINVAL; + *src_vni = cpu_to_be32(nla_get_u32(tb[NDA_SRC_VNI])); + } else { + *src_vni = vxlan->default_dst.remote_vni; + } + if (tb[NDA_IFINDEX]) { struct net_device *tdev; @@ -782,7 +813,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* struct net *net = dev_net(vxlan->dev); */ union vxlan_addr ip; __be16 port; - __be32 vni; + __be32 src_vni, vni; u32 ifindex; int err; @@ -795,7 +826,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], if (tb[NDA_DST] == NULL) return -EINVAL; - err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); + err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex); if (err) return err; @@ -804,36 +835,24 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], spin_lock_bh(&vxlan->hash_lock); err = vxlan_fdb_create(vxlan, addr, &ip, ndm->ndm_state, flags, - port, vni, ifindex, ndm->ndm_flags); + port, src_vni, vni, ifindex, ndm->ndm_flags); spin_unlock_bh(&vxlan->hash_lock); return err; } -/* Delete entry (via netlink) */ -static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) +static int __vxlan_fdb_delete(struct vxlan_dev *vxlan, + const unsigned char *addr, union vxlan_addr ip, + __be16 port, __be32 src_vni, u32 vni, u32 ifindex, + u16 vid) { - struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; struct vxlan_rdst *rd = NULL; - union vxlan_addr ip; - __be16 port; - __be32 vni; - u32 ifindex; - int err; + int err = -ENOENT; - err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex); - if (err) - return err; - - err = -ENOENT; - - spin_lock_bh(&vxlan->hash_lock); - f = vxlan_find_mac(vxlan, addr); + f = vxlan_find_mac(vxlan, addr, src_vni); if (!f) - goto out; + return err; if (!vxlan_addr_any(&ip)) { rd = vxlan_fdb_find_rdst(f, &ip, port, vni, ifindex); @@ -841,8 +860,6 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], goto out; } - err = 0; - /* remove a destination if it's not the only one on the list, * otherwise destroy the fdb entry */ @@ -856,6 +873,28 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], vxlan_fdb_destroy(vxlan, f); out: + return 0; +} + +/* Delete entry (via netlink) */ +static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + union vxlan_addr ip; + __be32 src_vni, vni; + __be16 port; + u32 ifindex; + int err; + + err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex); + if (err) + return err; + + spin_lock_bh(&vxlan->hash_lock); + err = __vxlan_fdb_delete(vxlan, addr, ip, port, src_vni, vni, ifindex, + vid); spin_unlock_bh(&vxlan->hash_lock); return err; @@ -901,12 +940,13 @@ out: * Return true if packet is bogus and should be dropped. */ static bool vxlan_snoop(struct net_device *dev, - union vxlan_addr *src_ip, const u8 *src_mac) + union vxlan_addr *src_ip, const u8 *src_mac, + __be32 vni) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_fdb *f; - f = vxlan_find_mac(vxlan, src_mac); + f = vxlan_find_mac(vxlan, src_mac, vni); if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); @@ -935,6 +975,7 @@ static bool vxlan_snoop(struct net_device *dev, NUD_REACHABLE, NLM_F_EXCL|NLM_F_CREATE, vxlan->cfg.dst_port, + vni, vxlan->default_dst.remote_vni, 0, NTF_SELF); spin_unlock(&vxlan->hash_lock); @@ -1202,7 +1243,7 @@ static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed, static bool vxlan_set_mac(struct vxlan_dev *vxlan, struct vxlan_sock *vs, - struct sk_buff *skb) + struct sk_buff *skb, __be32 vni) { union vxlan_addr saddr; @@ -1226,7 +1267,7 @@ static bool vxlan_set_mac(struct vxlan_dev *vxlan, } if ((vxlan->flags & VXLAN_F_LEARN) && - vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source)) + vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source, vni)) return false; return true; @@ -1268,6 +1309,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) __be16 protocol = htons(ETH_P_TEB); bool raw_proto = false; void *oiph; + __be32 vni = 0; /* Need UDP and VXLAN header to be present */ if (!pskb_may_pull(skb, VXLAN_HLEN)) @@ -1289,7 +1331,12 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) if (!vs) goto drop; - vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni)); + vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); + + if ((vs->flags & VXLAN_F_COLLECT_METADATA) && !vni) + goto drop; + + vxlan = vxlan_vs_find_vni(vs, vni); if (!vxlan) goto drop; @@ -1307,7 +1354,6 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) goto drop; if (vxlan_collect_metadata(vs)) { - __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni); struct metadata_dst *tun_dst; tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, @@ -1345,7 +1391,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) } if (!raw_proto) { - if (!vxlan_set_mac(vxlan, vs, skb)) + if (!vxlan_set_mac(vxlan, vs, skb, vni)) goto drop; } else { skb_reset_mac_header(skb); @@ -1377,7 +1423,7 @@ drop: return 0; } -static int arp_reduce(struct net_device *dev, struct sk_buff *skb) +static int arp_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) { struct vxlan_dev *vxlan = netdev_priv(dev); struct arphdr *parp; @@ -1424,7 +1470,7 @@ static int arp_reduce(struct net_device *dev, struct sk_buff *skb) goto out; } - f = vxlan_find_mac(vxlan, n->ha); + f = vxlan_find_mac(vxlan, n->ha, vni); if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { /* bridge-local neighbor */ neigh_release(n); @@ -1548,7 +1594,7 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request, return reply; } -static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) +static int neigh_reduce(struct net_device *dev, struct sk_buff *skb, __be32 vni) { struct vxlan_dev *vxlan = netdev_priv(dev); struct nd_msg *msg; @@ -1585,7 +1631,7 @@ static int neigh_reduce(struct net_device *dev, struct sk_buff *skb) goto out; } - f = vxlan_find_mac(vxlan, n->ha); + f = vxlan_find_mac(vxlan, n->ha, vni); if (f && vxlan_addr_any(&(first_remote_rcu(f)->remote_ip))) { /* bridge-local neighbor */ neigh_release(n); @@ -1906,7 +1952,7 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan, /* Bypass encapsulation if the destination is local */ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, - struct vxlan_dev *dst_vxlan) + struct vxlan_dev *dst_vxlan, __be32 vni) { struct pcpu_sw_netstats *tx_stats, *rx_stats; union vxlan_addr loopback; @@ -1932,7 +1978,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, } if (dst_vxlan->flags & VXLAN_F_LEARN) - vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source); + vxlan_snoop(skb->dev, &loopback, eth_hdr(skb)->h_source, vni); u64_stats_update_begin(&tx_stats->syncp); tx_stats->tx_packets++; @@ -1976,7 +2022,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, return -ENOENT; } - vxlan_encap_bypass(skb, vxlan, dst_vxlan); + vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni); return 1; } @@ -1984,7 +2030,8 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev, } static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, - struct vxlan_rdst *rdst, bool did_rsc) + __be32 default_vni, struct vxlan_rdst *rdst, + bool did_rsc) { struct dst_cache *dst_cache; struct ip_tunnel_info *info; @@ -2011,14 +2058,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (vxlan_addr_any(dst)) { if (did_rsc) { /* short-circuited back to local bridge */ - vxlan_encap_bypass(skb, vxlan, vxlan); + vxlan_encap_bypass(skb, vxlan, vxlan, default_vni); return; } goto drop; } dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port; - vni = rdst->remote_vni; + vni = (rdst->remote_vni) ? : default_vni; src = &vxlan->cfg.saddr; dst_cache = &rdst->dst_cache; md->gbp = skb->mark; @@ -2173,23 +2220,29 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) bool did_rsc = false; struct vxlan_rdst *rdst, *fdst = NULL; struct vxlan_fdb *f; + __be32 vni = 0; info = skb_tunnel_info(skb); skb_reset_mac_header(skb); if (vxlan->flags & VXLAN_F_COLLECT_METADATA) { - if (info && info->mode & IP_TUNNEL_INFO_TX) - vxlan_xmit_one(skb, dev, NULL, false); - else - kfree_skb(skb); - return NETDEV_TX_OK; + if (info && info->mode & IP_TUNNEL_INFO_BRIDGE && + info->mode & IP_TUNNEL_INFO_TX) { + vni = tunnel_id_to_key32(info->key.tun_id); + } else { + if (info && info->mode & IP_TUNNEL_INFO_TX) + vxlan_xmit_one(skb, dev, vni, NULL, false); + else + kfree_skb(skb); + return NETDEV_TX_OK; + } } if (vxlan->flags & VXLAN_F_PROXY) { eth = eth_hdr(skb); if (ntohs(eth->h_proto) == ETH_P_ARP) - return arp_reduce(dev, skb); + return arp_reduce(dev, skb, vni); #if IS_ENABLED(CONFIG_IPV6) else if (ntohs(eth->h_proto) == ETH_P_IPV6 && pskb_may_pull(skb, sizeof(struct ipv6hdr) @@ -2200,13 +2253,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) msg = (struct nd_msg *)skb_transport_header(skb); if (msg->icmph.icmp6_code == 0 && msg->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) - return neigh_reduce(dev, skb); + return neigh_reduce(dev, skb, vni); } #endif } eth = eth_hdr(skb); - f = vxlan_find_mac(vxlan, eth->h_dest); + f = vxlan_find_mac(vxlan, eth->h_dest, vni); did_rsc = false; if (f && (f->flags & NTF_ROUTER) && (vxlan->flags & VXLAN_F_RSC) && @@ -2214,11 +2267,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) ntohs(eth->h_proto) == ETH_P_IPV6)) { did_rsc = route_shortcircuit(dev, skb); if (did_rsc) - f = vxlan_find_mac(vxlan, eth->h_dest); + f = vxlan_find_mac(vxlan, eth->h_dest, vni); } if (f == NULL) { - f = vxlan_find_mac(vxlan, all_zeros_mac); + f = vxlan_find_mac(vxlan, all_zeros_mac, vni); if (f == NULL) { if ((vxlan->flags & VXLAN_F_L2MISS) && !is_multicast_ether_addr(eth->h_dest)) @@ -2239,11 +2292,11 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) } skb1 = skb_clone(skb, GFP_ATOMIC); if (skb1) - vxlan_xmit_one(skb1, dev, rdst, did_rsc); + vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc); } if (fdst) - vxlan_xmit_one(skb, dev, fdst, did_rsc); + vxlan_xmit_one(skb, dev, vni, fdst, did_rsc); else kfree_skb(skb); return NETDEV_TX_OK; @@ -2307,12 +2360,12 @@ static int vxlan_init(struct net_device *dev) return 0; } -static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan) +static void vxlan_fdb_delete_default(struct vxlan_dev *vxlan, __be32 vni) { struct vxlan_fdb *f; spin_lock_bh(&vxlan->hash_lock); - f = __vxlan_find_mac(vxlan, all_zeros_mac); + f = __vxlan_find_mac(vxlan, all_zeros_mac, vni); if (f) vxlan_fdb_destroy(vxlan, f); spin_unlock_bh(&vxlan->hash_lock); @@ -2322,7 +2375,7 @@ static void vxlan_uninit(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); - vxlan_fdb_delete_default(vxlan); + vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); free_percpu(dev->tstats); } @@ -2923,6 +2976,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, NLM_F_EXCL|NLM_F_CREATE, vxlan->cfg.dst_port, vxlan->default_dst.remote_vni, + vxlan->default_dst.remote_vni, vxlan->default_dst.remote_ifindex, NTF_SELF); if (err) @@ -2931,7 +2985,7 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, err = register_netdevice(dev); if (err) { - vxlan_fdb_delete_default(vxlan); + vxlan_fdb_delete_default(vxlan, vxlan->cfg.vni); return err; } diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h index bd99a8d80f36..f3d16dbe09d6 100644 --- a/include/uapi/linux/neighbour.h +++ b/include/uapi/linux/neighbour.h @@ -26,6 +26,7 @@ enum { NDA_IFINDEX, NDA_MASTER, NDA_LINK_NETNSID, + NDA_SRC_VNI, __NDA_MAX }; From b3c7ef0adadc5768e0baa786213c6bd1ce521a77 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 31 Jan 2017 22:59:53 -0800 Subject: [PATCH 3/5] bridge: uapi: add per vlan tunnel info New nested netlink attribute to associate tunnel info per vlan. This is used by bridge driver to send tunnel metadata to bridge ports in vlan tunnel mode. This patch also adds new per port flag IFLA_BRPORT_VLAN_TUNNEL to enable vlan tunnel mode. off by default. One example use for this is a vxlan bridging gateway or vtep which maps vlans to vn-segments (or vnis). User can configure per-vlan tunnel information which the bridge driver can use to bridge vlan into the corresponding vn-segment. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + include/uapi/linux/if_bridge.h | 11 +++++++++++ include/uapi/linux/if_link.h | 1 + 3 files changed, 13 insertions(+) diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index debc9d5904e5..c5847dc75a93 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -47,6 +47,7 @@ struct br_ip_list { #define BR_PROXYARP_WIFI BIT(10) #define BR_MCAST_FLOOD BIT(11) #define BR_MULTICAST_TO_UNICAST BIT(12) +#define BR_VLAN_TUNNEL BIT(13) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index ab92bca6d448..a9e6244ce438 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -118,6 +118,7 @@ enum { IFLA_BRIDGE_FLAGS, IFLA_BRIDGE_MODE, IFLA_BRIDGE_VLAN_INFO, + IFLA_BRIDGE_VLAN_TUNNEL_INFO, __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) @@ -134,6 +135,16 @@ struct bridge_vlan_info { __u16 vid; }; +enum { + IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC, + IFLA_BRIDGE_VLAN_TUNNEL_ID, + IFLA_BRIDGE_VLAN_TUNNEL_VID, + IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, + __IFLA_BRIDGE_VLAN_TUNNEL_MAX, +}; + +#define IFLA_BRIDGE_VLAN_TUNNEL_MAX (__IFLA_BRIDGE_VLAN_TUNNEL_MAX - 1) + struct bridge_vlan_xstats { __u64 rx_bytes; __u64 rx_packets; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b9aa5641ebe5..320fc1e747ee 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -322,6 +322,7 @@ enum { IFLA_BRPORT_PAD, IFLA_BRPORT_MCAST_FLOOD, IFLA_BRPORT_MCAST_TO_UCAST, + IFLA_BRPORT_VLAN_TUNNEL, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) From efa5356b0d9753b9d7e63e41459eba106cce30f3 Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 31 Jan 2017 22:59:54 -0800 Subject: [PATCH 4/5] bridge: per vlan dst_metadata netlink support This patch adds support to attach per vlan tunnel info dst metadata. This enables bridge driver to map vlan to tunnel_info at ingress and egress. It uses the kernel dst_metadata infrastructure. The initial use case is vlan to vni bridging, but the api is generic to extend to any tunnel_info in the future: - Uapi to configure/unconfigure/dump per vlan tunnel data - netlink functions to configure vlan and tunnel_info mapping - Introduces bridge port flag BR_LWT_VLAN to enable attach/detach dst_metadata to bridged packets on ports. off by default. - changes to existing code is mainly refactor some existing vlan handling netlink code + hooks for new vlan tunnel code - I have kept the vlan tunnel code isolated in separate files. - most of the netlink vlan tunnel code is handling of vlan-tunid ranges (follows the vlan range handling code). To conserve space vlan-tunid by default are always dumped in ranges if applicable. Use case: example use for this is a vxlan bridging gateway or vtep which maps vlans to vn-segments (or vnis). iproute2 example (patched and pruned iproute2 output to just show relevant fdb entries): example shows same host mac learnt on two vni's and vlan 100 maps to vni 1000, vlan 101 maps to vni 1001 before (netdev per vni): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan1001 vlan 101 master bridge 00:02:00:00:00:03 dev vxlan1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan1000 vlan 100 master bridge 00:02:00:00:00:03 dev vxlan1000 dst 12.0.0.8 self after this patch with collect metdata in bridged mode (single netdev): $bridge fdb show | grep "00:02:00:00:00:03" 00:02:00:00:00:03 dev vxlan0 vlan 101 master bridge 00:02:00:00:00:03 dev vxlan0 src_vni 1001 dst 12.0.0.8 self 00:02:00:00:00:03 dev vxlan0 vlan 100 master bridge 00:02:00:00:00:03 dev vxlan0 src_vni 1000 dst 12.0.0.8 self CC: Nikolay Aleksandrov Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/Makefile | 5 +- net/bridge/br_netlink.c | 146 ++++++++++------ net/bridge/br_netlink_tunnel.c | 296 +++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 10 ++ net/bridge/br_private_tunnel.h | 72 ++++++++ net/bridge/br_vlan.c | 17 +- net/bridge/br_vlan_tunnel.c | 149 +++++++++++++++++ 7 files changed, 644 insertions(+), 51 deletions(-) create mode 100644 net/bridge/br_netlink_tunnel.c create mode 100644 net/bridge/br_private_tunnel.h create mode 100644 net/bridge/br_vlan_tunnel.c diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 0aefc011b668..40b1ede527ca 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -6,7 +6,8 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ br_ioctl.o br_stp.o br_stp_bpdu.o \ - br_stp_if.o br_stp_timer.o br_netlink.o + br_stp_if.o br_stp_timer.o br_netlink.o \ + br_netlink_tunnel.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o @@ -18,7 +19,7 @@ obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o -bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o +bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 1ca25498fe4d..fc5d885dbb22 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -20,6 +20,7 @@ #include "br_private.h" #include "br_private_stp.h" +#include "br_private_tunnel.h" static int __get_num_vlan_infos(struct net_bridge_vlan_group *vg, u32 filter_mask) @@ -95,9 +96,10 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, u32 filter_mask) { struct net_bridge_vlan_group *vg = NULL; - struct net_bridge_port *p; + struct net_bridge_port *p = NULL; struct net_bridge *br; int num_vlan_infos; + size_t vinfo_sz = 0; rcu_read_lock(); if (br_port_exists(dev)) { @@ -110,8 +112,13 @@ static size_t br_get_link_af_size_filtered(const struct net_device *dev, num_vlan_infos = br_get_num_vlan_infos(vg, filter_mask); rcu_read_unlock(); + if (p && (p->flags & BR_VLAN_TUNNEL)) + vinfo_sz += br_get_vlan_tunnel_info_size(vg); + /* Each VLAN is returned in bridge_vlan_info along with flags */ - return num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); + vinfo_sz += num_vlan_infos * nla_total_size(sizeof(struct bridge_vlan_info)); + + return vinfo_sz; } static inline size_t br_port_info_size(void) @@ -128,6 +135,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_UNICAST_FLOOD */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ @@ -194,7 +202,9 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u16(skb, IFLA_BRPORT_NO, p->port_no) || nla_put_u8(skb, IFLA_BRPORT_TOPOLOGY_CHANGE_ACK, p->topology_change_ack) || - nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending)) + nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || + nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & + BR_VLAN_TUNNEL))) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -417,6 +427,9 @@ static int br_fill_ifinfo(struct sk_buff *skb, err = br_fill_ifvlaninfo_compressed(skb, vg); else err = br_fill_ifvlaninfo(skb, vg); + + if (port && (port->flags & BR_VLAN_TUNNEL)) + err = br_fill_vlan_tunnel_info(skb, vg); rcu_read_unlock(); if (err) goto nla_put_failure; @@ -517,60 +530,91 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, return err; } +static int br_process_vlan_info(struct net_bridge *br, + struct net_bridge_port *p, int cmd, + struct bridge_vlan_info *vinfo_curr, + struct bridge_vlan_info **vinfo_last) +{ + if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK) + return -EINVAL; + + if (vinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + /* check if we are already processing a range */ + if (*vinfo_last) + return -EINVAL; + *vinfo_last = vinfo_curr; + /* don't allow range of pvids */ + if ((*vinfo_last)->flags & BRIDGE_VLAN_INFO_PVID) + return -EINVAL; + return 0; + } + + if (*vinfo_last) { + struct bridge_vlan_info tmp_vinfo; + int v, err; + + if (!(vinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END)) + return -EINVAL; + + if (vinfo_curr->vid <= (*vinfo_last)->vid) + return -EINVAL; + + memcpy(&tmp_vinfo, *vinfo_last, + sizeof(struct bridge_vlan_info)); + for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { + tmp_vinfo.vid = v; + err = br_vlan_info(br, p, cmd, &tmp_vinfo); + if (err) + break; + } + *vinfo_last = NULL; + + return 0; + } + + return br_vlan_info(br, p, cmd, vinfo_curr); +} + static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, int cmd) { - struct bridge_vlan_info *vinfo_start = NULL; - struct bridge_vlan_info *vinfo = NULL; + struct bridge_vlan_info *vinfo_curr = NULL; + struct bridge_vlan_info *vinfo_last = NULL; struct nlattr *attr; - int err = 0; - int rem; + struct vtunnel_info tinfo_last = {}; + struct vtunnel_info tinfo_curr = {}; + int err = 0, rem; nla_for_each_nested(attr, af_spec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) - continue; - if (nla_len(attr) != sizeof(struct bridge_vlan_info)) - return -EINVAL; - vinfo = nla_data(attr); - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) - return -EINVAL; - if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (vinfo_start) + err = 0; + switch (nla_type(attr)) { + case IFLA_BRIDGE_VLAN_TUNNEL_INFO: + if (!(p->flags & BR_VLAN_TUNNEL)) return -EINVAL; - vinfo_start = vinfo; - /* don't allow range of pvids */ - if (vinfo_start->flags & BRIDGE_VLAN_INFO_PVID) - return -EINVAL; - continue; - } - - if (vinfo_start) { - struct bridge_vlan_info tmp_vinfo; - int v; - - if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END)) - return -EINVAL; - - if (vinfo->vid <= vinfo_start->vid) - return -EINVAL; - - memcpy(&tmp_vinfo, vinfo_start, - sizeof(struct bridge_vlan_info)); - - for (v = vinfo_start->vid; v <= vinfo->vid; v++) { - tmp_vinfo.vid = v; - err = br_vlan_info(br, p, cmd, &tmp_vinfo); - if (err) - break; - } - vinfo_start = NULL; - } else { - err = br_vlan_info(br, p, cmd, vinfo); - } - if (err) + err = br_parse_vlan_tunnel_info(attr, &tinfo_curr); + if (err) + return err; + err = br_process_vlan_tunnel_info(br, p, cmd, + &tinfo_curr, + &tinfo_last); + if (err) + return err; break; + case IFLA_BRIDGE_VLAN_INFO: + if (nla_len(attr) != sizeof(struct bridge_vlan_info)) + return -EINVAL; + vinfo_curr = nla_data(attr); + err = br_process_vlan_info(br, p, cmd, vinfo_curr, + &vinfo_last); + if (err) + return err; + break; + } + + if (err) + return err; } return err; @@ -630,8 +674,9 @@ static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], /* Process bridge protocol info on port */ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) { - int err; unsigned long old_flags = p->flags; + bool br_vlan_tunnel_old = false; + int err; br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); @@ -644,6 +689,11 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); + br_vlan_tunnel_old = (p->flags & BR_VLAN_TUNNEL) ? true : false; + br_set_port_flag(p, tb, IFLA_BRPORT_VLAN_TUNNEL, BR_VLAN_TUNNEL); + if (br_vlan_tunnel_old && !(p->flags & BR_VLAN_TUNNEL)) + nbp_vlan_tunnel_info_flush(p); + if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); if (err) diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c new file mode 100644 index 000000000000..99c68012c9d4 --- /dev/null +++ b/net/bridge/br_netlink_tunnel.c @@ -0,0 +1,296 @@ +/* + * Bridge per vlan tunnel port dst_metadata netlink control interface + * + * Authors: + * Roopa Prabhu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "br_private.h" +#include "br_private_tunnel.h" + +static size_t __get_vlan_tinfo_size(void) +{ + return nla_total_size(0) + /* nest IFLA_BRIDGE_VLAN_TUNNEL_INFO */ + nla_total_size(sizeof(u32)) + /* IFLA_BRIDGE_VLAN_TUNNEL_ID */ + nla_total_size(sizeof(u16)) + /* IFLA_BRIDGE_VLAN_TUNNEL_VID */ + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_VLAN_TUNNEL_FLAGS */ +} + +static bool vlan_tunnel_id_isrange(struct net_bridge_vlan *v, + struct net_bridge_vlan *v_end) +{ + __be32 tunid_curr = tunnel_id_to_key32(v->tinfo.tunnel_id); + __be32 tunid_end = tunnel_id_to_key32(v_end->tinfo.tunnel_id); + + return (be32_to_cpu(tunid_curr) - be32_to_cpu(tunid_end)) == 1; +} + +static int __get_num_vlan_tunnel_infos(struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *v, *v_start = NULL, *v_end = NULL; + int num_tinfos = 0; + + /* Count number of vlan infos */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v) || !v->tinfo.tunnel_id) + continue; + + if (!v_start) { + goto initvars; + } else if ((v->vid - v_end->vid) == 1 && + vlan_tunnel_id_isrange(v_end, v) == 1) { + v_end = v; + continue; + } else { + if ((v_end->vid - v->vid) > 0 && + vlan_tunnel_id_isrange(v_end, v) > 0) + num_tinfos += 2; + else + num_tinfos += 1; + } +initvars: + v_start = v; + v_end = v; + } + + if (v_start) { + if ((v_end->vid - v->vid) > 0 && + vlan_tunnel_id_isrange(v_end, v) > 0) + num_tinfos += 2; + else + num_tinfos += 1; + } + + return num_tinfos; +} + +int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg) +{ + int num_tinfos; + + if (!vg) + return 0; + + rcu_read_lock(); + num_tinfos = __get_num_vlan_tunnel_infos(vg); + rcu_read_unlock(); + + return num_tinfos * __get_vlan_tinfo_size(); +} + +static int br_fill_vlan_tinfo(struct sk_buff *skb, u16 vid, + __be64 tunnel_id, u16 flags) +{ + __be32 tid = tunnel_id_to_key32(tunnel_id); + struct nlattr *tmap; + + tmap = nla_nest_start(skb, IFLA_BRIDGE_VLAN_TUNNEL_INFO); + if (!tmap) + return -EMSGSIZE; + if (nla_put_u32(skb, IFLA_BRIDGE_VLAN_TUNNEL_ID, + be32_to_cpu(tid))) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_VID, + vid)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BRIDGE_VLAN_TUNNEL_FLAGS, + flags)) + goto nla_put_failure; + nla_nest_end(skb, tmap); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, tmap); + + return -EMSGSIZE; +} + +static int br_fill_vlan_tinfo_range(struct sk_buff *skb, + struct net_bridge_vlan *vtbegin, + struct net_bridge_vlan *vtend) +{ + int err; + + if (vtbegin && vtend && (vtend->vid - vtbegin->vid) > 0) { + /* add range to skb */ + err = br_fill_vlan_tinfo(skb, vtbegin->vid, + vtbegin->tinfo.tunnel_id, + BRIDGE_VLAN_INFO_RANGE_BEGIN); + if (err) + return err; + + err = br_fill_vlan_tinfo(skb, vtend->vid, + vtend->tinfo.tunnel_id, + BRIDGE_VLAN_INFO_RANGE_END); + if (err) + return err; + } else { + err = br_fill_vlan_tinfo(skb, vtbegin->vid, + vtbegin->tinfo.tunnel_id, + 0); + if (err) + return err; + } + + return 0; +} + +int br_fill_vlan_tunnel_info(struct sk_buff *skb, + struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *vtbegin = NULL; + struct net_bridge_vlan *vtend = NULL; + struct net_bridge_vlan *v; + int err; + + /* Count number of vlan infos */ + list_for_each_entry_rcu(v, &vg->vlan_list, vlist) { + /* only a context, bridge vlan not activated */ + if (!br_vlan_should_use(v)) + continue; + + if (!v->tinfo.tunnel_dst) + continue; + + if (!vtbegin) { + goto initvars; + } else if ((v->vid - vtend->vid) == 1 && + vlan_tunnel_id_isrange(v, vtend)) { + vtend = v; + continue; + } else { + err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); + if (err) + return err; + } +initvars: + vtbegin = v; + vtend = v; + } + + if (vtbegin) { + err = br_fill_vlan_tinfo_range(skb, vtbegin, vtend); + if (err) + return err; + } + + return 0; +} + +static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = { + [IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 }, + [IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 }, + [IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 }, +}; + +static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, + u16 vid, u32 tun_id) +{ + int err = 0; + + if (!p) + return -EINVAL; + + switch (cmd) { + case RTM_SETLINK: + err = nbp_vlan_tunnel_info_add(p, vid, tun_id); + break; + case RTM_DELLINK: + nbp_vlan_tunnel_info_delete(p, vid); + break; + } + + return err; +} + +int br_parse_vlan_tunnel_info(struct nlattr *attr, + struct vtunnel_info *tinfo) +{ + struct nlattr *tb[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1]; + u32 tun_id; + u16 vid, flags = 0; + int err; + + memset(tinfo, 0, sizeof(*tinfo)); + + if (!tb[IFLA_BRIDGE_VLAN_TUNNEL_ID] || + !tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]) + return -EINVAL; + + err = nla_parse_nested(tb, IFLA_BRIDGE_VLAN_TUNNEL_MAX, + attr, vlan_tunnel_policy); + if (err < 0) + return err; + + tun_id = nla_get_u32(tb[IFLA_BRIDGE_VLAN_TUNNEL_ID]); + vid = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_VID]); + if (vid >= VLAN_VID_MASK) + return -ERANGE; + + if (tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]) + flags = nla_get_u16(tb[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS]); + + tinfo->tunid = tun_id; + tinfo->vid = vid; + tinfo->flags = flags; + + return 0; +} + +int br_process_vlan_tunnel_info(struct net_bridge *br, + struct net_bridge_port *p, int cmd, + struct vtunnel_info *tinfo_curr, + struct vtunnel_info *tinfo_last) +{ + int err; + + if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { + if (tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) + return -EINVAL; + memcpy(tinfo_last, tinfo_curr, sizeof(struct vtunnel_info)); + } else if (tinfo_curr->flags & BRIDGE_VLAN_INFO_RANGE_END) { + int t, v; + + if (!(tinfo_last->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN)) + return -EINVAL; + if ((tinfo_curr->vid - tinfo_last->vid) != + (tinfo_curr->tunid - tinfo_last->tunid)) + return -EINVAL; + t = tinfo_last->tunid; + for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { + err = br_vlan_tunnel_info(p, cmd, v, t); + if (err) + return err; + t++; + } + memset(tinfo_last, 0, sizeof(struct vtunnel_info)); + memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); + } else { + if (tinfo_last->flags) + return -EINVAL; + err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, + tinfo_curr->tunid); + if (err) + return err; + memset(tinfo_last, 0, sizeof(struct vtunnel_info)); + memset(tinfo_curr, 0, sizeof(struct vtunnel_info)); + } + + return 0; +} diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 0b82a227fc34..61de90f28afa 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -91,6 +91,11 @@ struct br_vlan_stats { struct u64_stats_sync syncp; }; +struct br_tunnel_info { + __be64 tunnel_id; + struct metadata_dst *tunnel_dst; +}; + /** * struct net_bridge_vlan - per-vlan entry * @@ -113,6 +118,7 @@ struct br_vlan_stats { */ struct net_bridge_vlan { struct rhash_head vnode; + struct rhash_head tnode; u16 vid; u16 flags; struct br_vlan_stats __percpu *stats; @@ -124,6 +130,9 @@ struct net_bridge_vlan { atomic_t refcnt; struct net_bridge_vlan *brvlan; }; + + struct br_tunnel_info tinfo; + struct list_head vlist; struct rcu_head rcu; @@ -145,6 +154,7 @@ struct net_bridge_vlan { */ struct net_bridge_vlan_group { struct rhashtable vlan_hash; + struct rhashtable tunnel_hash; struct list_head vlan_list; u16 num_vlans; u16 pvid; diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h new file mode 100644 index 000000000000..1c8d0d5302cb --- /dev/null +++ b/net/bridge/br_private_tunnel.h @@ -0,0 +1,72 @@ +/* + * Bridge per vlan tunnels + * + * Authors: + * Roopa Prabhu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _BR_PRIVATE_TUNNEL_H +#define _BR_PRIVATE_TUNNEL_H + +struct vtunnel_info { + u32 tunid; + u16 vid; + u16 flags; +}; + +/* br_netlink_tunnel.c */ +int br_parse_vlan_tunnel_info(struct nlattr *attr, + struct vtunnel_info *tinfo); +int br_process_vlan_tunnel_info(struct net_bridge *br, + struct net_bridge_port *p, + int cmd, + struct vtunnel_info *tinfo_curr, + struct vtunnel_info *tinfo_last); +int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); +int br_fill_vlan_tunnel_info(struct sk_buff *skb, + struct net_bridge_vlan_group *vg); + +#ifdef CONFIG_BRIDGE_VLAN_FILTERING +/* br_vlan_tunnel.c */ +int vlan_tunnel_init(struct net_bridge_vlan_group *vg); +void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg); +int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid); +int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id); +void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port); +void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan); +#else +static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg) +{ + return 0; +} + +static inline int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, + u16 vid) +{ + return 0; +} + +static inline int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, + u16 vid, u32 tun_id) +{ + return 0; +} + +static inline void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port) +{ +} + +static inline void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan) +{ +} + +#endif + +#endif diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index b6de4f457161..64002e3941ca 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -5,6 +5,7 @@ #include #include "br_private.h" +#include "br_private_tunnel.h" static inline int br_vlan_cmp(struct rhashtable_compare_arg *arg, const void *ptr) @@ -310,6 +311,7 @@ static int __vlan_del(struct net_bridge_vlan *v) } if (masterv != v) { + vlan_tunnel_info_del(vg, v); rhashtable_remove_fast(&vg->vlan_hash, &v->vnode, br_vlan_rht_params); __vlan_del_list(v); @@ -325,6 +327,7 @@ static void __vlan_group_free(struct net_bridge_vlan_group *vg) { WARN_ON(!list_empty(&vg->vlan_list)); rhashtable_destroy(&vg->vlan_hash); + vlan_tunnel_deinit(vg); kfree(vg); } @@ -613,6 +616,8 @@ int br_vlan_delete(struct net_bridge *br, u16 vid) br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid); br_fdb_delete_by_port(br, NULL, vid, 0); + vlan_tunnel_info_del(vg, v); + return __vlan_del(v); } @@ -918,6 +923,9 @@ int br_vlan_init(struct net_bridge *br) ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; + ret = vlan_tunnel_init(vg); + if (ret) + goto err_tunnel_init; INIT_LIST_HEAD(&vg->vlan_list); br->vlan_proto = htons(ETH_P_8021Q); br->default_pvid = 1; @@ -932,6 +940,8 @@ out: return ret; err_vlan_add: + vlan_tunnel_deinit(vg); +err_tunnel_init: rhashtable_destroy(&vg->vlan_hash); err_rhtbl: kfree(vg); @@ -961,6 +971,9 @@ int nbp_vlan_init(struct net_bridge_port *p) ret = rhashtable_init(&vg->vlan_hash, &br_vlan_rht_params); if (ret) goto err_rhtbl; + ret = vlan_tunnel_init(vg); + if (ret) + goto err_tunnel_init; INIT_LIST_HEAD(&vg->vlan_list); rcu_assign_pointer(p->vlgrp, vg); if (p->br->default_pvid) { @@ -976,8 +989,10 @@ out: err_vlan_add: RCU_INIT_POINTER(p->vlgrp, NULL); synchronize_rcu(); - rhashtable_destroy(&vg->vlan_hash); + vlan_tunnel_deinit(vg); err_vlan_enabled: +err_tunnel_init: + rhashtable_destroy(&vg->vlan_hash); err_rhtbl: kfree(vg); diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c new file mode 100644 index 000000000000..b3fd29d20a3c --- /dev/null +++ b/net/bridge/br_vlan_tunnel.c @@ -0,0 +1,149 @@ +/* + * Bridge per vlan tunnel port dst_metadata handling code + * + * Authors: + * Roopa Prabhu + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include "br_private.h" +#include "br_private_tunnel.h" + +static inline int br_vlan_tunid_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct net_bridge_vlan *vle = ptr; + __be64 tunid = *(__be64 *)arg->key; + + return vle->tinfo.tunnel_id != tunid; +} + +static const struct rhashtable_params br_vlan_tunnel_rht_params = { + .head_offset = offsetof(struct net_bridge_vlan, tnode), + .key_offset = offsetof(struct net_bridge_vlan, tinfo.tunnel_id), + .key_len = sizeof(__be64), + .nelem_hint = 3, + .locks_mul = 1, + .obj_cmpfn = br_vlan_tunid_cmp, + .automatic_shrinking = true, +}; + +void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan) +{ + if (!vlan->tinfo.tunnel_dst) + return; + rhashtable_remove_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); + vlan->tinfo.tunnel_id = 0; + dst_release(&vlan->tinfo.tunnel_dst->dst); + vlan->tinfo.tunnel_dst = NULL; +} + +static int __vlan_tunnel_info_add(struct net_bridge_vlan_group *vg, + struct net_bridge_vlan *vlan, u32 tun_id) +{ + struct metadata_dst *metadata = NULL; + __be64 key = key32_to_tunnel_id(cpu_to_be32(tun_id)); + int err; + + if (vlan->tinfo.tunnel_dst) + return -EEXIST; + + metadata = __ip_tun_set_dst(0, 0, 0, 0, 0, TUNNEL_KEY, + key, 0); + if (!metadata) + return -EINVAL; + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX | IP_TUNNEL_INFO_BRIDGE; + vlan->tinfo.tunnel_dst = metadata; + vlan->tinfo.tunnel_id = key; + + err = rhashtable_lookup_insert_fast(&vg->tunnel_hash, &vlan->tnode, + br_vlan_tunnel_rht_params); + if (err) + goto out; + + return 0; +out: + dst_release(&vlan->tinfo.tunnel_dst->dst); + + return err; +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *vlan; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + vlan = br_vlan_find(vg, vid); + if (!vlan) + return -EINVAL; + + return __vlan_tunnel_info_add(vg, vlan, tun_id); +} + +/* Must be protected by RTNL. + * Must be called with vid in range from 1 to 4094 inclusive. + */ +int nbp_vlan_tunnel_info_delete(struct net_bridge_port *port, u16 vid) +{ + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + v = br_vlan_find(vg, vid); + if (!v) + return -ENOENT; + + vlan_tunnel_info_del(vg, v); + + return 0; +} + +static void __vlan_tunnel_info_flush(struct net_bridge_vlan_group *vg) +{ + struct net_bridge_vlan *vlan, *tmp; + + list_for_each_entry_safe(vlan, tmp, &vg->vlan_list, vlist) + vlan_tunnel_info_del(vg, vlan); +} + +void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port) +{ + struct net_bridge_vlan_group *vg; + + ASSERT_RTNL(); + + vg = nbp_vlan_group(port); + __vlan_tunnel_info_flush(vg); +} + +int vlan_tunnel_init(struct net_bridge_vlan_group *vg) +{ + return rhashtable_init(&vg->tunnel_hash, &br_vlan_tunnel_rht_params); +} + +void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg) +{ + rhashtable_destroy(&vg->tunnel_hash); +} From 11538d039ac6efcf4f1a6c536e1b87cd3668a9fd Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Tue, 31 Jan 2017 22:59:55 -0800 Subject: [PATCH 5/5] bridge: vlan dst_metadata hooks in ingress and egress paths - ingress hook: - if port is a tunnel port, use tunnel info in attached dst_metadata to map it to a local vlan - egress hook: - if port is a tunnel port, use tunnel info attached to vlan to set dst_metadata on the skb CC: Nikolay Aleksandrov Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- net/bridge/br_forward.c | 2 +- net/bridge/br_input.c | 8 ++++- net/bridge/br_private.h | 2 ++ net/bridge/br_private_tunnel.h | 11 +++++++ net/bridge/br_vlan.c | 7 +++++ net/bridge/br_vlan_tunnel.c | 54 ++++++++++++++++++++++++++++++++++ 6 files changed, 82 insertions(+), 2 deletions(-) diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 5a1f8ef49899..6bfac29318f2 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -80,7 +80,7 @@ static void __br_forward(const struct net_bridge_port *to, int br_hook; vg = nbp_vlan_group_rcu(to); - skb = br_handle_vlan(to->br, vg, skb); + skb = br_handle_vlan(to->br, to, vg, skb); if (!skb) return; diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 855b72fbe1da..fba38d8a1a08 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -21,6 +21,7 @@ #include #include #include "br_private.h" +#include "br_private_tunnel.h" /* Hook for brouter */ br_should_route_hook_t __rcu *br_should_route_hook __read_mostly; @@ -57,7 +58,7 @@ static int br_pass_frame_up(struct sk_buff *skb) indev = skb->dev; skb->dev = brdev; - skb = br_handle_vlan(br, vg, skb); + skb = br_handle_vlan(br, NULL, vg, skb); if (!skb) return NET_RX_DROP; /* update the multicast stats if the packet is IGMP/MLD */ @@ -261,6 +262,11 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) return RX_HANDLER_CONSUMED; p = br_port_get_rcu(skb->dev); + if (p->flags & BR_VLAN_TUNNEL) { + if (br_handle_ingress_vlan_tunnel(skb, p, + nbp_vlan_group_rcu(p))) + goto drop; + } if (unlikely(is_link_local_ether_addr(dest))) { u16 fwd_mask = p->br->group_fwd_mask_required; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 61de90f28afa..40177df45ba6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -775,6 +775,7 @@ bool br_allowed_egress(struct net_bridge_vlan_group *vg, const struct sk_buff *skb); bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid); struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb); int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); @@ -874,6 +875,7 @@ static inline bool br_should_learn(struct net_bridge_port *p, } static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h index 1c8d0d5302cb..4a447a378ab3 100644 --- a/net/bridge/br_private_tunnel.h +++ b/net/bridge/br_private_tunnel.h @@ -40,6 +40,11 @@ int nbp_vlan_tunnel_info_add(struct net_bridge_port *port, u16 vid, u32 tun_id); void nbp_vlan_tunnel_info_flush(struct net_bridge_port *port); void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan); +int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg); +int br_handle_egress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_vlan *vlan); #else static inline int vlan_tunnel_init(struct net_bridge_vlan_group *vg) { @@ -67,6 +72,12 @@ static inline void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, { } +static inline int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) +{ + return 0; +} #endif #endif diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 64002e3941ca..62e68c0dc687 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -341,6 +341,7 @@ static void __vlan_flush(struct net_bridge_vlan_group *vg) } struct sk_buff *br_handle_vlan(struct net_bridge *br, + const struct net_bridge_port *p, struct net_bridge_vlan_group *vg, struct sk_buff *skb) { @@ -381,6 +382,12 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, if (v->flags & BRIDGE_VLAN_INFO_UNTAGGED) skb->vlan_tci = 0; + + if (p && (p->flags & BR_VLAN_TUNNEL) && + br_handle_egress_vlan_tunnel(skb, v)) { + kfree_skb(skb); + return NULL; + } out: return skb; } diff --git a/net/bridge/br_vlan_tunnel.c b/net/bridge/br_vlan_tunnel.c index b3fd29d20a3c..b2b79a070162 100644 --- a/net/bridge/br_vlan_tunnel.c +++ b/net/bridge/br_vlan_tunnel.c @@ -39,6 +39,13 @@ static const struct rhashtable_params br_vlan_tunnel_rht_params = { .automatic_shrinking = true, }; +static struct net_bridge_vlan *br_vlan_tunnel_lookup(struct rhashtable *tbl, + u64 tunnel_id) +{ + return rhashtable_lookup_fast(tbl, &tunnel_id, + br_vlan_tunnel_rht_params); +} + void vlan_tunnel_info_del(struct net_bridge_vlan_group *vg, struct net_bridge_vlan *vlan) { @@ -147,3 +154,50 @@ void vlan_tunnel_deinit(struct net_bridge_vlan_group *vg) { rhashtable_destroy(&vg->tunnel_hash); } + +int br_handle_ingress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_port *p, + struct net_bridge_vlan_group *vg) +{ + struct ip_tunnel_info *tinfo = skb_tunnel_info(skb); + struct net_bridge_vlan *vlan; + + if (!vg || !tinfo) + return 0; + + /* if already tagged, ignore */ + if (skb_vlan_tagged(skb)) + return 0; + + /* lookup vid, given tunnel id */ + vlan = br_vlan_tunnel_lookup(&vg->tunnel_hash, tinfo->key.tun_id); + if (!vlan) + return 0; + + skb_dst_drop(skb); + + __vlan_hwaccel_put_tag(skb, p->br->vlan_proto, vlan->vid); + + return 0; +} + +int br_handle_egress_vlan_tunnel(struct sk_buff *skb, + struct net_bridge_vlan *vlan) +{ + int err; + + if (!vlan || !vlan->tinfo.tunnel_id) + return 0; + + if (unlikely(!skb_vlan_tag_present(skb))) + return 0; + + skb_dst_drop(skb); + err = skb_vlan_pop(skb); + if (err) + return err; + + skb_dst_set(skb, dst_clone(&vlan->tinfo.tunnel_dst->dst)); + + return 0; +}