2022-03-01 05:04:30 +00:00
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Vxlan private header file
*
*/
# ifndef _VXLAN_PRIVATE_H
# define _VXLAN_PRIVATE_H
2022-03-01 05:04:36 +00:00
# include <linux/rhashtable.h>
2022-03-01 05:04:30 +00:00
extern unsigned int vxlan_net_id ;
extern const u8 all_zeros_mac [ ETH_ALEN + 2 ] ;
2022-03-01 05:04:36 +00:00
extern const struct rhashtable_params vxlan_vni_rht_params ;
2022-03-01 05:04:30 +00:00
# define PORT_HASH_BITS 8
# define PORT_HASH_SIZE (1 << PORT_HASH_BITS)
/* per-network namespace private data for this module */
struct vxlan_net {
struct list_head vxlan_list ;
struct hlist_head sock_list [ PORT_HASH_SIZE ] ;
spinlock_t sock_lock ;
struct notifier_block nexthop_notifier_block ;
} ;
/* Forwarding table entry */
struct vxlan_fdb {
struct hlist_node hlist ; /* linked list of entries */
struct rcu_head rcu ;
unsigned long updated ; /* jiffies */
unsigned long used ;
struct list_head remotes ;
u8 eth_addr [ ETH_ALEN ] ;
u16 state ; /* see ndm_state */
__be32 vni ;
u16 flags ; /* see ndm_flags and below */
struct list_head nh_list ;
struct nexthop __rcu * nh ;
struct vxlan_dev __rcu * vdev ;
} ;
# define NTF_VXLAN_ADDED_BY_USER 0x100
/* Virtual Network hash table head */
static inline struct hlist_head * vni_head ( struct vxlan_sock * vs , __be32 vni )
{
return & vs - > vni_list [ hash_32 ( ( __force u32 ) vni , VNI_HASH_BITS ) ] ;
}
/* Socket hash table head */
static inline struct hlist_head * vs_head ( struct net * net , __be16 port )
{
struct vxlan_net * vn = net_generic ( net , vxlan_net_id ) ;
return & vn - > sock_list [ hash_32 ( ntohs ( port ) , PORT_HASH_BITS ) ] ;
}
/* First remote destination for a forwarding entry.
* Guaranteed to be non - NULL because remotes are never deleted .
*/
static inline struct vxlan_rdst * first_remote_rcu ( struct vxlan_fdb * fdb )
{
if ( rcu_access_pointer ( fdb - > nh ) )
return NULL ;
return list_entry_rcu ( fdb - > remotes . next , struct vxlan_rdst , list ) ;
}
static inline struct vxlan_rdst * first_remote_rtnl ( struct vxlan_fdb * fdb )
{
if ( rcu_access_pointer ( fdb - > nh ) )
return NULL ;
return list_first_entry ( & fdb - > remotes , struct vxlan_rdst , list ) ;
}
# if IS_ENABLED(CONFIG_IPV6)
static inline
bool vxlan_addr_equal ( const union vxlan_addr * a , const union vxlan_addr * b )
{
if ( a - > sa . sa_family ! = b - > sa . sa_family )
return false ;
if ( a - > sa . sa_family = = AF_INET6 )
return ipv6_addr_equal ( & a - > sin6 . sin6_addr , & b - > sin6 . sin6_addr ) ;
else
return a - > sin . sin_addr . s_addr = = b - > sin . sin_addr . s_addr ;
}
2023-03-15 15:11:49 +02:00
static inline int vxlan_nla_get_addr ( union vxlan_addr * ip ,
const struct nlattr * nla )
{
if ( nla_len ( nla ) > = sizeof ( struct in6_addr ) ) {
ip - > sin6 . sin6_addr = nla_get_in6_addr ( nla ) ;
ip - > sa . sa_family = AF_INET6 ;
return 0 ;
} else if ( nla_len ( nla ) > = sizeof ( __be32 ) ) {
ip - > sin . sin_addr . s_addr = nla_get_in_addr ( nla ) ;
ip - > sa . sa_family = AF_INET ;
return 0 ;
} else {
return - EAFNOSUPPORT ;
}
}
static inline int vxlan_nla_put_addr ( struct sk_buff * skb , int attr ,
const union vxlan_addr * ip )
{
if ( ip - > sa . sa_family = = AF_INET6 )
return nla_put_in6_addr ( skb , attr , & ip - > sin6 . sin6_addr ) ;
else
return nla_put_in_addr ( skb , attr , ip - > sin . sin_addr . s_addr ) ;
}
vxlan: mdb: Add MDB control path support
Implement MDB control path support, enabling the creation, deletion,
replacement and dumping of MDB entries in a similar fashion to the
bridge driver. Unlike the bridge driver, each entry stores a list of
remote VTEPs to which matched packets need to be replicated to and not a
list of bridge ports.
The motivating use case is the installation of MDB entries by a user
space control plane in response to received EVPN routes. As such, only
allow permanent MDB entries to be installed and do not implement
snooping functionality, avoiding a lot of unnecessary complexity.
Since entries can only be modified by user space under RTNL, use RTNL as
the write lock. Use RCU to ensure that MDB entries and remotes are not
freed while being accessed from the data path during transmission.
In terms of uAPI, reuse the existing MDB netlink interface, but add a
few new attributes to request and response messages:
* IP address of the destination VXLAN tunnel endpoint where the
multicast receivers reside.
* UDP destination port number to use to connect to the remote VXLAN
tunnel endpoint.
* VXLAN VNI Network Identifier to use to connect to the remote VXLAN
tunnel endpoint. Required when Ingress Replication (IR) is used and
the remote VTEP is not a member of originating broadcast domain
(VLAN/VNI) [1].
* Source VNI Network Identifier the MDB entry belongs to. Used only when
the VXLAN device is in external mode.
* Interface index of the outgoing interface to reach the remote VXLAN
tunnel endpoint. This is required when the underlay destination IP is
multicast (P2MP), as the multicast routing tables are not consulted.
All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest
which is strictly validated by the bridge driver, thereby automatically
rejecting the new attributes.
[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 15:11:51 +02:00
static inline bool vxlan_addr_is_multicast ( const union vxlan_addr * ip )
{
if ( ip - > sa . sa_family = = AF_INET6 )
return ipv6_addr_is_multicast ( & ip - > sin6 . sin6_addr ) ;
else
return ipv4_is_multicast ( ip - > sin . sin_addr . s_addr ) ;
}
2022-03-01 05:04:30 +00:00
# else /* !CONFIG_IPV6 */
static inline
bool vxlan_addr_equal ( const union vxlan_addr * a , const union vxlan_addr * b )
{
return a - > sin . sin_addr . s_addr = = b - > sin . sin_addr . s_addr ;
}
2023-03-15 15:11:49 +02:00
static inline int vxlan_nla_get_addr ( union vxlan_addr * ip ,
const struct nlattr * nla )
{
if ( nla_len ( nla ) > = sizeof ( struct in6_addr ) ) {
return - EAFNOSUPPORT ;
} else if ( nla_len ( nla ) > = sizeof ( __be32 ) ) {
ip - > sin . sin_addr . s_addr = nla_get_in_addr ( nla ) ;
ip - > sa . sa_family = AF_INET ;
return 0 ;
} else {
return - EAFNOSUPPORT ;
}
}
static inline int vxlan_nla_put_addr ( struct sk_buff * skb , int attr ,
const union vxlan_addr * ip )
{
return nla_put_in_addr ( skb , attr , ip - > sin . sin_addr . s_addr ) ;
}
vxlan: mdb: Add MDB control path support
Implement MDB control path support, enabling the creation, deletion,
replacement and dumping of MDB entries in a similar fashion to the
bridge driver. Unlike the bridge driver, each entry stores a list of
remote VTEPs to which matched packets need to be replicated to and not a
list of bridge ports.
The motivating use case is the installation of MDB entries by a user
space control plane in response to received EVPN routes. As such, only
allow permanent MDB entries to be installed and do not implement
snooping functionality, avoiding a lot of unnecessary complexity.
Since entries can only be modified by user space under RTNL, use RTNL as
the write lock. Use RCU to ensure that MDB entries and remotes are not
freed while being accessed from the data path during transmission.
In terms of uAPI, reuse the existing MDB netlink interface, but add a
few new attributes to request and response messages:
* IP address of the destination VXLAN tunnel endpoint where the
multicast receivers reside.
* UDP destination port number to use to connect to the remote VXLAN
tunnel endpoint.
* VXLAN VNI Network Identifier to use to connect to the remote VXLAN
tunnel endpoint. Required when Ingress Replication (IR) is used and
the remote VTEP is not a member of originating broadcast domain
(VLAN/VNI) [1].
* Source VNI Network Identifier the MDB entry belongs to. Used only when
the VXLAN device is in external mode.
* Interface index of the outgoing interface to reach the remote VXLAN
tunnel endpoint. This is required when the underlay destination IP is
multicast (P2MP), as the multicast routing tables are not consulted.
All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest
which is strictly validated by the bridge driver, thereby automatically
rejecting the new attributes.
[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 15:11:51 +02:00
static inline bool vxlan_addr_is_multicast ( const union vxlan_addr * ip )
{
return ipv4_is_multicast ( ip - > sin . sin_addr . s_addr ) ;
}
2022-03-01 05:04:30 +00:00
# endif
vxlan: mdb: Add MDB control path support
Implement MDB control path support, enabling the creation, deletion,
replacement and dumping of MDB entries in a similar fashion to the
bridge driver. Unlike the bridge driver, each entry stores a list of
remote VTEPs to which matched packets need to be replicated to and not a
list of bridge ports.
The motivating use case is the installation of MDB entries by a user
space control plane in response to received EVPN routes. As such, only
allow permanent MDB entries to be installed and do not implement
snooping functionality, avoiding a lot of unnecessary complexity.
Since entries can only be modified by user space under RTNL, use RTNL as
the write lock. Use RCU to ensure that MDB entries and remotes are not
freed while being accessed from the data path during transmission.
In terms of uAPI, reuse the existing MDB netlink interface, but add a
few new attributes to request and response messages:
* IP address of the destination VXLAN tunnel endpoint where the
multicast receivers reside.
* UDP destination port number to use to connect to the remote VXLAN
tunnel endpoint.
* VXLAN VNI Network Identifier to use to connect to the remote VXLAN
tunnel endpoint. Required when Ingress Replication (IR) is used and
the remote VTEP is not a member of originating broadcast domain
(VLAN/VNI) [1].
* Source VNI Network Identifier the MDB entry belongs to. Used only when
the VXLAN device is in external mode.
* Interface index of the outgoing interface to reach the remote VXLAN
tunnel endpoint. This is required when the underlay destination IP is
multicast (P2MP), as the multicast routing tables are not consulted.
All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest
which is strictly validated by the bridge driver, thereby automatically
rejecting the new attributes.
[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 15:11:51 +02:00
static inline size_t vxlan_addr_size ( const union vxlan_addr * ip )
{
if ( ip - > sa . sa_family = = AF_INET6 )
return sizeof ( struct in6_addr ) ;
else
return sizeof ( __be32 ) ;
}
2022-03-01 05:04:36 +00:00
static inline struct vxlan_vni_node *
vxlan_vnifilter_lookup ( struct vxlan_dev * vxlan , __be32 vni )
{
struct vxlan_vni_group * vg ;
vg = rcu_dereference_rtnl ( vxlan - > vnigrp ) ;
if ( ! vg )
return NULL ;
return rhashtable_lookup_fast ( & vg - > vni_hash , & vni ,
vxlan_vni_rht_params ) ;
}
2022-03-01 05:04:31 +00:00
/* vxlan_core.c */
int vxlan_fdb_create ( struct vxlan_dev * vxlan ,
const u8 * mac , union vxlan_addr * ip ,
__u16 state , __be16 port , __be32 src_vni ,
__be32 vni , __u32 ifindex , __u16 ndm_flags ,
u32 nhid , struct vxlan_fdb * * fdb ,
struct netlink_ext_ack * extack ) ;
int __vxlan_fdb_delete ( struct vxlan_dev * vxlan ,
const unsigned char * addr , union vxlan_addr ip ,
__be16 port , __be32 src_vni , __be32 vni ,
u32 ifindex , bool swdev_notify ) ;
u32 eth_vni_hash ( const unsigned char * addr , __be32 vni ) ;
u32 fdb_head_index ( struct vxlan_dev * vxlan , const u8 * mac , __be32 vni ) ;
int vxlan_fdb_update ( struct vxlan_dev * vxlan ,
const u8 * mac , union vxlan_addr * ip ,
__u16 state , __u16 flags ,
__be16 port , __be32 src_vni , __be32 vni ,
__u32 ifindex , __u16 ndm_flags , u32 nhid ,
bool swdev_notify , struct netlink_ext_ack * extack ) ;
2023-03-15 15:11:50 +02:00
void vxlan_xmit_one ( struct sk_buff * skb , struct net_device * dev ,
__be32 default_vni , struct vxlan_rdst * rdst , bool did_rsc ) ;
2022-03-01 05:04:36 +00:00
int vxlan_vni_in_use ( struct net * src_net , struct vxlan_dev * vxlan ,
struct vxlan_config * conf , __be32 vni ) ;
/* vxlan_vnifilter.c */
int vxlan_vnigroup_init ( struct vxlan_dev * vxlan ) ;
void vxlan_vnigroup_uninit ( struct vxlan_dev * vxlan ) ;
void vxlan_vnifilter_init ( void ) ;
void vxlan_vnifilter_uninit ( void ) ;
2022-03-01 05:04:38 +00:00
void vxlan_vnifilter_count ( struct vxlan_dev * vxlan , __be32 vni ,
struct vxlan_vni_node * vninode ,
int type , unsigned int len ) ;
2022-03-01 05:04:36 +00:00
void vxlan_vs_add_vnigrp ( struct vxlan_dev * vxlan ,
struct vxlan_sock * vs ,
bool ipv6 ) ;
void vxlan_vs_del_vnigrp ( struct vxlan_dev * vxlan ) ;
int vxlan_vnilist_update_group ( struct vxlan_dev * vxlan ,
union vxlan_addr * old_remote_ip ,
union vxlan_addr * new_remote_ip ,
struct netlink_ext_ack * extack ) ;
2022-03-01 05:04:31 +00:00
2022-03-01 05:04:35 +00:00
/* vxlan_multicast.c */
2022-03-01 05:04:36 +00:00
int vxlan_multicast_join ( struct vxlan_dev * vxlan ) ;
int vxlan_multicast_leave ( struct vxlan_dev * vxlan ) ;
bool vxlan_group_used ( struct vxlan_net * vn , struct vxlan_dev * dev ,
__be32 vni , union vxlan_addr * rip , int rifindex ) ;
2022-03-01 05:04:35 +00:00
int vxlan_igmp_join ( struct vxlan_dev * vxlan , union vxlan_addr * rip ,
int rifindex ) ;
int vxlan_igmp_leave ( struct vxlan_dev * vxlan , union vxlan_addr * rip ,
int rifindex ) ;
vxlan: mdb: Add MDB control path support
Implement MDB control path support, enabling the creation, deletion,
replacement and dumping of MDB entries in a similar fashion to the
bridge driver. Unlike the bridge driver, each entry stores a list of
remote VTEPs to which matched packets need to be replicated to and not a
list of bridge ports.
The motivating use case is the installation of MDB entries by a user
space control plane in response to received EVPN routes. As such, only
allow permanent MDB entries to be installed and do not implement
snooping functionality, avoiding a lot of unnecessary complexity.
Since entries can only be modified by user space under RTNL, use RTNL as
the write lock. Use RCU to ensure that MDB entries and remotes are not
freed while being accessed from the data path during transmission.
In terms of uAPI, reuse the existing MDB netlink interface, but add a
few new attributes to request and response messages:
* IP address of the destination VXLAN tunnel endpoint where the
multicast receivers reside.
* UDP destination port number to use to connect to the remote VXLAN
tunnel endpoint.
* VXLAN VNI Network Identifier to use to connect to the remote VXLAN
tunnel endpoint. Required when Ingress Replication (IR) is used and
the remote VTEP is not a member of originating broadcast domain
(VLAN/VNI) [1].
* Source VNI Network Identifier the MDB entry belongs to. Used only when
the VXLAN device is in external mode.
* Interface index of the outgoing interface to reach the remote VXLAN
tunnel endpoint. This is required when the underlay destination IP is
multicast (P2MP), as the multicast routing tables are not consulted.
All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest
which is strictly validated by the bridge driver, thereby automatically
rejecting the new attributes.
[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 15:11:51 +02:00
/* vxlan_mdb.c */
int vxlan_mdb_dump ( struct net_device * dev , struct sk_buff * skb ,
struct netlink_callback * cb ) ;
int vxlan_mdb_add ( struct net_device * dev , struct nlattr * tb [ ] , u16 nlmsg_flags ,
struct netlink_ext_ack * extack ) ;
int vxlan_mdb_del ( struct net_device * dev , struct nlattr * tb [ ] ,
struct netlink_ext_ack * extack ) ;
2023-12-17 10:32:41 +02:00
int vxlan_mdb_del_bulk ( struct net_device * dev , struct nlattr * tb [ ] ,
struct netlink_ext_ack * extack ) ;
2023-10-25 15:30:17 +03:00
int vxlan_mdb_get ( struct net_device * dev , struct nlattr * tb [ ] , u32 portid ,
u32 seq , struct netlink_ext_ack * extack ) ;
vxlan: Add MDB data path support
Integrate MDB support into the Tx path of the VXLAN driver, allowing it
to selectively forward IP multicast traffic according to the matched MDB
entry.
If MDB entries are configured (i.e., 'VXLAN_F_MDB' is set) and the
packet is an IP multicast packet, perform up to three different lookups
according to the following priority:
1. For an (S, G) entry, using {Source VNI, Source IP, Destination IP}.
2. For a (*, G) entry, using {Source VNI, Destination IP}.
3. For the catchall MDB entry (0.0.0.0 or ::), using the source VNI.
The catchall MDB entry is similar to the catchall FDB entry
(00:00:00:00:00:00) that is currently used to transmit BUM (broadcast,
unknown unicast and multicast) traffic. However, unlike the catchall FDB
entry, this entry is only used to transmit unregistered IP multicast
traffic that is not link-local. Therefore, when configured, the catchall
FDB entry will only transmit BULL (broadcast, unknown unicast,
link-local multicast) traffic.
The catchall MDB entry is useful in deployments where inter-subnet
multicast forwarding is used and not all the VTEPs in a tenant domain
are members in all the broadcast domains. In such deployments it is
advantageous to transmit BULL (broadcast, unknown unicast and link-local
multicast) and unregistered IP multicast traffic on different tunnels.
If the same tunnel was used, a VTEP only interested in IP multicast
traffic would also pull all the BULL traffic and drop it as it is not a
member in the originating broadcast domain [1].
If the packet did not match an MDB entry (or if the packet is not an IP
multicast packet), return it to the Tx path, allowing it to be forwarded
according to the FDB.
If the packet did match an MDB entry, forward it to the associated
remote VTEPs. However, if the entry is a (*, G) entry and the associated
remote is in INCLUDE mode, then skip over it as the source IP is not in
its source list (otherwise the packet would have matched on an (S, G)
entry). Similarly, if the associated remote is marked as BLOCKED (can
only be set on (S, G) entries), then skip over it as well as the remote
is in EXCLUDE mode and the source IP is in its source list.
[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-2.6
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 15:11:53 +02:00
struct vxlan_mdb_entry * vxlan_mdb_entry_skb_get ( struct vxlan_dev * vxlan ,
struct sk_buff * skb ,
__be32 src_vni ) ;
netdev_tx_t vxlan_mdb_xmit ( struct vxlan_dev * vxlan ,
const struct vxlan_mdb_entry * mdb_entry ,
struct sk_buff * skb ) ;
vxlan: mdb: Add MDB control path support
Implement MDB control path support, enabling the creation, deletion,
replacement and dumping of MDB entries in a similar fashion to the
bridge driver. Unlike the bridge driver, each entry stores a list of
remote VTEPs to which matched packets need to be replicated to and not a
list of bridge ports.
The motivating use case is the installation of MDB entries by a user
space control plane in response to received EVPN routes. As such, only
allow permanent MDB entries to be installed and do not implement
snooping functionality, avoiding a lot of unnecessary complexity.
Since entries can only be modified by user space under RTNL, use RTNL as
the write lock. Use RCU to ensure that MDB entries and remotes are not
freed while being accessed from the data path during transmission.
In terms of uAPI, reuse the existing MDB netlink interface, but add a
few new attributes to request and response messages:
* IP address of the destination VXLAN tunnel endpoint where the
multicast receivers reside.
* UDP destination port number to use to connect to the remote VXLAN
tunnel endpoint.
* VXLAN VNI Network Identifier to use to connect to the remote VXLAN
tunnel endpoint. Required when Ingress Replication (IR) is used and
the remote VTEP is not a member of originating broadcast domain
(VLAN/VNI) [1].
* Source VNI Network Identifier the MDB entry belongs to. Used only when
the VXLAN device is in external mode.
* Interface index of the outgoing interface to reach the remote VXLAN
tunnel endpoint. This is required when the underlay destination IP is
multicast (P2MP), as the multicast routing tables are not consulted.
All the new attributes are added under the 'MDBA_SET_ENTRY_ATTRS' nest
which is strictly validated by the bridge driver, thereby automatically
rejecting the new attributes.
[1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-3.2.2
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2023-03-15 15:11:51 +02:00
int vxlan_mdb_init ( struct vxlan_dev * vxlan ) ;
void vxlan_mdb_fini ( struct vxlan_dev * vxlan ) ;
2022-03-01 05:04:30 +00:00
# endif