2019-05-27 08:55:01 +02:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
2005-04-16 15:20:36 -07:00
/*
* Linux ethernet bridge
*
* Authors :
* Lennert Buytenhek < buytenh @ gnu . org >
*/
# ifndef _BR_PRIVATE_H
# define _BR_PRIVATE_H
# include <linux/netdevice.h>
# include <linux/if_bridge.h>
2010-06-10 16:12:50 +00:00
# include <linux/netpoll.h>
2010-06-23 13:00:48 -07:00
# include <linux/u64_stats_sync.h>
2008-07-30 16:27:55 -07:00
# include <net/route.h>
2015-05-30 15:30:16 +02:00
# include <net/ip6_fib.h>
2013-02-13 12:00:09 +00:00
# include <linux/if_vlan.h>
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
# include <linux/rhashtable.h>
2017-07-04 15:53:05 +03:00
# include <linux/refcount.h>
2005-04-16 15:20:36 -07:00
# define BR_HASH_BITS 8
# define BR_HASH_SIZE (1 << BR_HASH_BITS)
# define BR_HOLD_TIME (1*HZ)
# define BR_PORT_BITS 10
# define BR_MAX_PORTS (1<<BR_PORT_BITS)
2018-12-05 15:14:27 +02:00
# define BR_MULTICAST_DEFAULT_HASH_MAX 4096
2021-07-21 19:24:00 +03:00
# define BR_HWDOM_MAX BITS_PER_LONG
2007-03-21 14:22:44 -07:00
# define BR_VERSION "2.3"
2011-10-03 18:14:46 +00:00
/* Control of forwarding link local multicast */
# define BR_GROUPFWD_DEFAULT 0
2015-05-04 22:47:13 +02:00
/* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */
2017-09-27 16:12:44 +03:00
enum {
BR_GROUPFWD_STP = BIT ( 0 ) ,
BR_GROUPFWD_MACPAUSE = BIT ( 1 ) ,
BR_GROUPFWD_LACP = BIT ( 2 ) ,
} ;
# define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \
BR_GROUPFWD_LACP )
2014-06-10 20:59:24 +09:00
/* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */
# define BR_GROUPFWD_8021AD 0xB801u
2011-10-03 18:14:46 +00:00
2007-03-21 14:22:44 -07:00
/* Path to usermode spanning tree program */
# define BR_STP_PROG " / sbin / bridge-stp"
2005-12-21 19:01:30 -08:00
2020-06-23 23:47:17 +03:00
# define BR_FDB_NOTIFY_SETTABLE_BITS (FDB_NOTIFY_BIT | FDB_NOTIFY_INACTIVE_BIT)
2005-04-16 15:20:36 -07:00
typedef struct bridge_id bridge_id ;
typedef struct mac_addr mac_addr ;
typedef __u16 port_id ;
2018-09-26 17:00:59 +03:00
struct bridge_id {
2005-04-16 15:20:36 -07:00
unsigned char prio [ 2 ] ;
2014-02-23 00:05:25 -08:00
unsigned char addr [ ETH_ALEN ] ;
2005-04-16 15:20:36 -07:00
} ;
2018-09-26 17:00:59 +03:00
struct mac_addr {
2014-02-23 00:05:25 -08:00
unsigned char addr [ ETH_ALEN ] ;
2005-04-16 15:20:36 -07:00
} ;
2013-08-30 17:28:17 +02:00
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
/* our own querier */
2014-06-07 18:26:26 +02:00
struct bridge_mcast_own_query {
2013-08-30 17:28:17 +02:00
struct timer_list timer ;
u32 startup_sent ;
} ;
/* other querier */
2014-06-07 18:26:26 +02:00
struct bridge_mcast_other_query {
2013-08-30 17:28:17 +02:00
struct timer_list timer ;
unsigned long delay_time ;
} ;
2014-06-07 18:26:27 +02:00
/* selected querier */
struct bridge_mcast_querier {
struct br_ip addr ;
2014-06-07 18:26:29 +02:00
struct net_bridge_port __rcu * port ;
2014-06-07 18:26:27 +02:00
} ;
2016-06-28 16:57:06 +02:00
/* IGMP/MLD statistics */
struct bridge_mcast_stats {
struct br_mcast_stats mstats ;
struct u64_stats_sync syncp ;
} ;
2013-08-30 17:28:17 +02:00
# endif
2021-07-19 20:06:23 +03:00
/* net_bridge_mcast_port must be always defined due to forwarding stubs */
struct net_bridge_mcast_port {
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
struct net_bridge_port * port ;
2021-07-19 20:06:26 +03:00
struct net_bridge_vlan * vlan ;
2021-07-19 20:06:23 +03:00
struct bridge_mcast_own_query ip4_own_query ;
struct timer_list ip4_mc_router_timer ;
struct hlist_node ip4_rlist ;
# if IS_ENABLED(CONFIG_IPV6)
struct bridge_mcast_own_query ip6_own_query ;
struct timer_list ip6_mc_router_timer ;
struct hlist_node ip6_rlist ;
# endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router ;
# endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
} ;
2021-07-19 20:06:24 +03:00
/* net_bridge_mcast must be always defined due to forwarding stubs */
struct net_bridge_mcast {
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
struct net_bridge * br ;
2021-07-19 20:06:26 +03:00
struct net_bridge_vlan * vlan ;
2021-07-19 20:06:24 +03:00
u32 multicast_last_member_count ;
u32 multicast_startup_query_count ;
2021-08-10 18:29:28 +03:00
u8 multicast_querier ;
2021-07-19 20:06:24 +03:00
u8 multicast_igmp_version ;
u8 multicast_router ;
# if IS_ENABLED(CONFIG_IPV6)
u8 multicast_mld_version ;
# endif
unsigned long multicast_last_member_interval ;
unsigned long multicast_membership_interval ;
unsigned long multicast_querier_interval ;
unsigned long multicast_query_interval ;
unsigned long multicast_query_response_interval ;
unsigned long multicast_startup_query_interval ;
struct hlist_head ip4_mc_router_list ;
struct timer_list ip4_mc_router_timer ;
struct bridge_mcast_other_query ip4_other_query ;
struct bridge_mcast_own_query ip4_own_query ;
struct bridge_mcast_querier ip4_querier ;
# if IS_ENABLED(CONFIG_IPV6)
struct hlist_head ip6_mc_router_list ;
struct timer_list ip6_mc_router_timer ;
struct bridge_mcast_other_query ip6_other_query ;
struct bridge_mcast_own_query ip6_own_query ;
struct bridge_mcast_querier ip6_querier ;
# endif /* IS_ENABLED(CONFIG_IPV6) */
# endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
} ;
2017-01-31 22:59:54 -08:00
struct br_tunnel_info {
2021-06-10 15:04:10 +03:00
__be64 tunnel_id ;
struct metadata_dst __rcu * tunnel_dst ;
2017-01-31 22:59:54 -08:00
} ;
2018-11-16 18:50:01 +02:00
/* private vlan flags */
enum {
BR_VLFLAG_PER_PORT_STATS = BIT ( 0 ) ,
2019-01-08 16:48:11 +00:00
BR_VLFLAG_ADDED_BY_SWITCHDEV = BIT ( 1 ) ,
2021-07-19 20:06:27 +03:00
BR_VLFLAG_MCAST_ENABLED = BIT ( 2 ) ,
BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT ( 3 ) ,
2018-11-16 18:50:01 +02:00
} ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
/**
* struct net_bridge_vlan - per - vlan entry
*
* @ vnode : rhashtable member
* @ vid : VLAN id
* @ flags : bridge vlan flags
2018-11-16 18:50:01 +02:00
* @ priv_flags : private ( in - kernel ) bridge vlan flags
2020-01-24 13:40:22 +02:00
* @ state : STP state ( e . g . blocking , learning , forwarding )
2016-04-30 10:25:28 +02:00
* @ stats : per - cpu VLAN statistics
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
* @ br : if MASTER flag set , this points to a bridge struct
* @ port : if MASTER flag unset , this points to a port struct
* @ refcnt : if MASTER flag set , this is bumped for each port referencing it
* @ brvlan : if MASTER flag unset , this points to the global per - VLAN context
* for this VLAN entry
2021-07-19 20:06:26 +03:00
* @ br_mcast_ctx : if MASTER flag set , this is the global vlan multicast context
* @ port_mcast_ctx : if MASTER flag unset , this is the per - port / vlan multicast
* context
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
* @ vlist : sorted list of VLAN entries
* @ rcu : used for entry destruction
*
* This structure is shared between the global per - VLAN entries contained in
* the bridge rhashtable and the local per - port per - VLAN entries contained in
* the port ' s rhashtable . The union entries should be interpreted depending on
* the entry flags that are set .
*/
struct net_bridge_vlan {
struct rhash_head vnode ;
2017-01-31 22:59:54 -08:00
struct rhash_head tnode ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
u16 vid ;
u16 flags ;
2018-11-16 18:50:01 +02:00
u16 priv_flags ;
2020-01-24 13:40:22 +02:00
u8 state ;
2020-11-17 21:25:42 +01:00
struct pcpu_sw_netstats __percpu * stats ;
2013-02-13 12:00:09 +00:00
union {
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
struct net_bridge * br ;
struct net_bridge_port * port ;
} ;
union {
2017-07-04 15:53:05 +03:00
refcount_t refcnt ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
struct net_bridge_vlan * brvlan ;
} ;
2017-01-31 22:59:54 -08:00
struct br_tunnel_info tinfo ;
2021-07-19 20:06:26 +03:00
union {
struct net_bridge_mcast br_mcast_ctx ;
struct net_bridge_mcast_port port_mcast_ctx ;
} ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
struct list_head vlist ;
2013-02-13 12:00:09 +00:00
struct rcu_head rcu ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
} ;
/**
* struct net_bridge_vlan_group
*
* @ vlan_hash : VLAN entry rhashtable
* @ vlan_list : sorted VLAN entry list
* @ num_vlans : number of total VLAN entries
2015-09-30 20:16:53 +02:00
* @ pvid : PVID VLAN id
2020-01-24 13:40:22 +02:00
* @ pvid_state : PVID ' s STP state ( e . g . forwarding , learning , blocking )
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
*
* IMPORTANT : Be careful when checking if there ' re VLAN entries using list
* primitives because the bridge can have entries in its list which
* are just for global context but not for filtering , i . e . they have
* the master flag set but not the brentry flag . If you have to check
* if there ' re " real " entries in the bridge please test @ num_vlans
*/
struct net_bridge_vlan_group {
struct rhashtable vlan_hash ;
2017-01-31 22:59:54 -08:00
struct rhashtable tunnel_hash ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
struct list_head vlan_list ;
2013-02-13 12:00:13 +00:00
u16 num_vlans ;
2015-09-30 20:16:53 +02:00
u16 pvid ;
2020-01-24 13:40:22 +02:00
u8 pvid_state ;
2013-02-13 12:00:09 +00:00
} ;
2019-10-29 13:45:53 +02:00
/* bridge fdb flags */
enum {
BR_FDB_LOCAL ,
2019-10-29 13:45:54 +02:00
BR_FDB_STATIC ,
2019-10-29 13:45:55 +02:00
BR_FDB_STICKY ,
2019-10-29 13:45:56 +02:00
BR_FDB_ADDED_BY_USER ,
2019-10-29 13:45:57 +02:00
BR_FDB_ADDED_BY_EXT_LEARN ,
2019-10-29 13:45:58 +02:00
BR_FDB_OFFLOADED ,
2020-06-23 23:47:17 +03:00
BR_FDB_NOTIFY ,
BR_FDB_NOTIFY_INACTIVE
2019-10-29 13:45:53 +02:00
} ;
2017-12-12 16:02:50 +02:00
struct net_bridge_fdb_key {
mac_addr addr ;
u16 vlan_id ;
} ;
2017-02-04 18:05:08 +01:00
struct net_bridge_fdb_entry {
2017-12-12 16:02:50 +02:00
struct rhash_head rhnode ;
2005-04-16 15:20:36 -07:00
struct net_bridge_port * dst ;
2017-12-12 16:02:50 +02:00
struct net_bridge_fdb_key key ;
struct hlist_node fdb_node ;
2019-10-29 13:45:53 +02:00
unsigned long flags ;
2017-02-04 18:05:08 +01:00
/* write-heavy members should not affect lookups */
unsigned long updated ____cacheline_aligned_in_smp ;
unsigned long used ;
bridge: fdb: rearrange net_bridge_fdb_entry
While looking into fixing the local entries scalability issue I noticed
that the structure is badly arranged because vlan_id would fall in a
second cache line while keeping rcu which is used only when deleting
in the first, so re-arrange the structure and push rcu to the end so we
can get 16 bytes which can be used for other fields (by pushing rcu
fully in the second 64 byte chunk). With this change all the core
necessary information when doing fdb lookups will be available in a
single cache line.
pahole before (note vlan_id):
struct net_bridge_fdb_entry {
struct hlist_node hlist; /* 0 16 */
struct net_bridge_port * dst; /* 16 8 */
struct callback_head rcu; /* 24 16 */
long unsigned int updated; /* 40 8 */
long unsigned int used; /* 48 8 */
mac_addr addr; /* 56 6 */
unsigned char is_local:1; /* 62: 7 1 */
unsigned char is_static:1; /* 62: 6 1 */
unsigned char added_by_user:1; /* 62: 5 1 */
unsigned char added_by_external_learn:1; /* 62: 4 1 */
/* XXX 4 bits hole, try to pack */
/* XXX 1 byte hole, try to pack */
/* --- cacheline 1 boundary (64 bytes) --- */
__u16 vlan_id; /* 64 2 */
/* size: 72, cachelines: 2, members: 11 */
/* sum members: 65, holes: 1, sum holes: 1 */
/* bit holes: 1, sum bit holes: 4 bits */
/* padding: 6 */
/* last cacheline: 8 bytes */
}
pahole after (note vlan_id):
struct net_bridge_fdb_entry {
struct hlist_node hlist; /* 0 16 */
struct net_bridge_port * dst; /* 16 8 */
long unsigned int updated; /* 24 8 */
long unsigned int used; /* 32 8 */
mac_addr addr; /* 40 6 */
__u16 vlan_id; /* 46 2 */
unsigned char is_local:1; /* 48: 7 1 */
unsigned char is_static:1; /* 48: 6 1 */
unsigned char added_by_user:1; /* 48: 5 1 */
unsigned char added_by_external_learn:1; /* 48: 4 1 */
/* XXX 4 bits hole, try to pack */
/* XXX 7 bytes hole, try to pack */
struct callback_head rcu; /* 56 16 */
/* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
/* size: 72, cachelines: 2, members: 11 */
/* sum members: 65, holes: 1, sum holes: 7 */
/* bit holes: 1, sum bit holes: 4 bits */
/* last cacheline: 8 bytes */
}
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-08-27 14:19:20 -07:00
struct rcu_head rcu ;
2005-04-16 15:20:36 -07:00
} ;
2016-02-03 09:57:05 +01:00
# define MDB_PG_FLAGS_PERMANENT BIT(0)
# define MDB_PG_FLAGS_OFFLOAD BIT(1)
2019-07-30 15:20:41 +03:00
# define MDB_PG_FLAGS_FAST_LEAVE BIT(2)
net: bridge: mcast: handle port group filter modes
We need to handle group filter mode transitions and initial state.
To change a port group's INCLUDE -> EXCLUDE mode (or when we have added
a new port group in EXCLUDE mode) we need to add that port to all of
*,G ports' S,G entries for proper replication. When the EXCLUDE state is
changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be
called after the source list processing because the assumption is that
all of the group's S,G entries will be created before transitioning to
EXCLUDE mode, i.e. most importantly its blocked entries will already be
added so it will not get automatically added to them.
The transition EXCLUDE -> INCLUDE happens only when a port group timer
expires, it requires us to remove that port from all of *,G ports' S,G
entries where it was automatically added previously.
Finally when we are adding a new S,G entry we must add all of *,G's
EXCLUDE ports to it.
In order to distinguish automatically added *,G EXCLUDE ports we have a
new port group flag - MDB_PG_FLAGS_STAR_EXCL.
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-22 10:30:24 +03:00
# define MDB_PG_FLAGS_STAR_EXCL BIT(3)
2020-09-22 10:30:25 +03:00
# define MDB_PG_FLAGS_BLOCKED BIT(4)
2016-02-03 09:57:05 +01:00
2020-09-07 12:56:07 +03:00
# define PG_SRC_ENT_LIMIT 32
# define BR_SGRP_F_DELETE BIT(0)
# define BR_SGRP_F_SEND BIT(1)
2020-09-22 10:30:23 +03:00
# define BR_SGRP_F_INSTALLED BIT(2)
2020-09-07 12:56:07 +03:00
2020-09-07 12:56:19 +03:00
struct net_bridge_mcast_gc {
struct hlist_node gc_node ;
void ( * destroy ) ( struct net_bridge_mcast_gc * gc ) ;
} ;
2020-09-07 12:56:07 +03:00
struct net_bridge_group_src {
struct hlist_node node ;
struct br_ip addr ;
struct net_bridge_port_group * pg ;
u8 flags ;
2020-09-07 12:56:09 +03:00
u8 src_query_rexmit_cnt ;
2020-09-07 12:56:07 +03:00
struct timer_list timer ;
struct net_bridge * br ;
2020-09-07 12:56:19 +03:00
struct net_bridge_mcast_gc mcast_gc ;
2020-09-07 12:56:07 +03:00
struct rcu_head rcu ;
} ;
2020-09-22 10:30:22 +03:00
struct net_bridge_port_group_sg_key {
2010-02-27 19:41:45 +00:00
struct net_bridge_port * port ;
2010-04-18 12:42:07 +09:00
struct br_ip addr ;
2020-09-22 10:30:22 +03:00
} ;
struct net_bridge_port_group {
struct net_bridge_port_group __rcu * next ;
struct net_bridge_port_group_sg_key key ;
2020-06-25 14:26:03 +02:00
unsigned char eth_addr [ ETH_ALEN ] __aligned ( 2 ) ;
2016-02-03 09:57:05 +01:00
unsigned char flags ;
2020-09-07 12:56:07 +03:00
unsigned char filter_mode ;
2020-09-07 12:56:10 +03:00
unsigned char grp_query_rexmit_cnt ;
2020-09-22 10:30:21 +03:00
unsigned char rt_protocol ;
2020-09-07 12:56:05 +03:00
2020-09-07 12:56:07 +03:00
struct hlist_head src_list ;
unsigned int src_ents ;
2020-09-07 12:56:05 +03:00
struct timer_list timer ;
2020-09-07 12:56:10 +03:00
struct timer_list rexmit_timer ;
2020-09-07 12:56:05 +03:00
struct hlist_node mglist ;
2021-01-20 16:51:54 +02:00
struct rb_root eht_set_tree ;
struct rb_root eht_host_tree ;
2020-09-07 12:56:05 +03:00
2020-09-22 10:30:22 +03:00
struct rhash_head rhnode ;
2020-09-07 12:56:19 +03:00
struct net_bridge_mcast_gc mcast_gc ;
2020-09-07 12:56:05 +03:00
struct rcu_head rcu ;
2010-02-27 19:41:45 +00:00
} ;
2018-09-26 17:00:59 +03:00
struct net_bridge_mdb_entry {
2018-12-05 15:14:24 +02:00
struct rhash_head rhnode ;
2010-02-27 19:41:45 +00:00
struct net_bridge * br ;
2010-11-15 06:38:10 +00:00
struct net_bridge_port_group __rcu * ports ;
2010-04-18 12:42:07 +09:00
struct br_ip addr ;
2017-11-09 23:10:57 +01:00
bool host_joined ;
2020-09-07 12:56:05 +03:00
struct timer_list timer ;
2018-12-05 15:14:24 +02:00
struct hlist_node mdb_node ;
2020-09-07 12:56:05 +03:00
2020-09-07 12:56:19 +03:00
struct net_bridge_mcast_gc mcast_gc ;
2020-09-07 12:56:05 +03:00
struct rcu_head rcu ;
2010-02-27 19:41:45 +00:00
} ;
2017-02-04 18:05:06 +01:00
struct net_bridge_port {
2005-04-16 15:20:36 -07:00
struct net_bridge * br ;
struct net_device * dev ;
struct list_head list ;
2017-02-04 18:05:06 +01:00
unsigned long flags ;
# ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu * vlgrp ;
# endif
2018-07-23 11:16:59 +03:00
struct net_bridge_port __rcu * backup_port ;
2017-02-04 18:05:06 +01:00
2005-04-16 15:20:36 -07:00
/* STP */
u8 priority ;
u8 state ;
u16 port_no ;
unsigned char topology_change_ack ;
unsigned char config_pending ;
port_id port_id ;
port_id designated_port ;
bridge_id designated_root ;
bridge_id designated_bridge ;
u32 path_cost ;
u32 designated_cost ;
2011-07-22 07:47:06 +00:00
unsigned long designated_age ;
2005-04-16 15:20:36 -07:00
struct timer_list forward_delay_timer ;
struct timer_list hold_timer ;
struct timer_list message_age_timer ;
struct kobject kobj ;
struct rcu_head rcu ;
2009-08-13 06:55:16 +00:00
2021-07-19 20:06:23 +03:00
struct net_bridge_mcast_port multicast_ctx ;
2010-02-27 19:41:45 +00:00
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
2021-07-19 20:06:23 +03:00
struct bridge_mcast_stats __percpu * mcast_stats ;
2021-01-26 11:35:32 +02:00
u32 multicast_eht_hosts_limit ;
u32 multicast_eht_hosts_cnt ;
2010-02-27 19:41:45 +00:00
struct hlist_head mglist ;
# endif
2010-05-10 09:31:11 +00:00
# ifdef CONFIG_SYSFS
char sysfs_name [ IFNAMSIZ ] ;
# endif
2010-06-10 16:12:50 +00:00
# ifdef CONFIG_NET_POLL_CONTROLLER
struct netpoll * np ;
# endif
2016-08-25 18:42:37 +02:00
# ifdef CONFIG_NET_SWITCHDEV
net: bridge: disambiguate offload_fwd_mark
Before this change, four related - but distinct - concepts where named
offload_fwd_mark:
- skb->offload_fwd_mark: Set by the switchdev driver if the underlying
hardware has already forwarded this frame to the other ports in the
same hardware domain.
- nbp->offload_fwd_mark: An idetifier used to group ports that share
the same hardware forwarding domain.
- br->offload_fwd_mark: Counter used to make sure that unique IDs are
used in cases where a bridge contains ports from multiple hardware
domains.
- skb->cb->offload_fwd_mark: The hardware domain on which the frame
ingressed and was forwarded.
Introduce the term "hardware forwarding domain" ("hwdom") in the
bridge to denote a set of ports with the following property:
If an skb with skb->offload_fwd_mark set, is received on a port
belonging to hwdom N, that frame has already been forwarded to all
other ports in hwdom N.
By decoupling the name from "offload_fwd_mark", we can extend the
term's definition in the future - e.g. to add constraints that
describe expected egress behavior - without overloading the meaning of
"offload_fwd_mark".
- nbp->offload_fwd_mark thus becomes nbp->hwdom.
- br->offload_fwd_mark becomes br->last_hwdom.
- skb->cb->offload_fwd_mark becomes skb->cb->src_hwdom. The slight
change in naming here mandates a slight change in behavior of the
nbp_switchdev_frame_mark() function. Previously, it only set this
value in skb->cb for packets with skb->offload_fwd_mark true (ones
which were forwarded in hardware). Whereas now we always track the
incoming hwdom for all packets coming from a switchdev (even for the
packets which weren't forwarded in hardware, such as STP BPDUs, IGMP
reports etc). As all uses of skb->cb->offload_fwd_mark were already
gated behind checks of skb->offload_fwd_mark, this will not introduce
any functional change, but it paves the way for future changes where
the ingressing hwdom must be known for frames coming from a switchdev
regardless of whether they were forwarded in hardware or not
(basically, if the skb comes from a switchdev, skb->cb->src_hwdom now
always tracks which one).
A typical example where this is relevant: the switchdev has a fixed
configuration to trap STP BPDUs, but STP is not running on the bridge
and the group_fwd_mask allows them to be forwarded. Say we have this
setup:
br0
/ | \
/ | \
swp0 swp1 swp2
A BPDU comes in on swp0 and is trapped to the CPU; the driver does not
set skb->offload_fwd_mark. The bridge determines that the frame should
be forwarded to swp{1,2}. It is imperative that forward offloading is
_not_ allowed in this case, as the source hwdom is already "poisoned".
Recording the source hwdom allows this case to be handled properly.
v2->v3: added code comments
v3->v6: none
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:23:59 +03:00
/* Identifier used to group ports that share the same switchdev
* hardware domain .
*/
int hwdom ;
net: bridge: switchdev: let drivers inform which bridge ports are offloaded
On reception of an skb, the bridge checks if it was marked as 'already
forwarded in hardware' (checks if skb->offload_fwd_mark == 1), and if it
is, it assigns the source hardware domain of that skb based on the
hardware domain of the ingress port. Then during forwarding, it enforces
that the egress port must have a different hardware domain than the
ingress one (this is done in nbp_switchdev_allowed_egress).
Non-switchdev drivers don't report any physical switch id (neither
through devlink nor .ndo_get_port_parent_id), therefore the bridge
assigns them a hardware domain of 0, and packets coming from them will
always have skb->offload_fwd_mark = 0. So there aren't any restrictions.
Problems appear due to the fact that DSA would like to perform software
fallback for bonding and team interfaces that the physical switch cannot
offload.
+-- br0 ---+
/ / | \
/ / | \
/ | | bond0
/ | | / \
swp0 swp1 swp2 swp3 swp4
There, it is desirable that the presence of swp3 and swp4 under a
non-offloaded LAG does not preclude us from doing hardware bridging
beteen swp0, swp1 and swp2. The bandwidth of the CPU is often times high
enough that software bridging between {swp0,swp1,swp2} and bond0 is not
impractical.
But this creates an impossible paradox given the current way in which
port hardware domains are assigned. When the driver receives a packet
from swp0 (say, due to flooding), it must set skb->offload_fwd_mark to
something.
- If we set it to 0, then the bridge will forward it towards swp1, swp2
and bond0. But the switch has already forwarded it towards swp1 and
swp2 (not to bond0, remember, that isn't offloaded, so as far as the
switch is concerned, ports swp3 and swp4 are not looking up the FDB,
and the entire bond0 is a destination that is strictly behind the
CPU). But we don't want duplicated traffic towards swp1 and swp2, so
it's not ok to set skb->offload_fwd_mark = 0.
- If we set it to 1, then the bridge will not forward the skb towards
the ports with the same switchdev mark, i.e. not to swp1, swp2 and
bond0. Towards swp1 and swp2 that's ok, but towards bond0? It should
have forwarded the skb there.
So the real issue is that bond0 will be assigned the same hardware
domain as {swp0,swp1,swp2}, because the function that assigns hardware
domains to bridge ports, nbp_switchdev_add(), recurses through bond0's
lower interfaces until it finds something that implements devlink (calls
dev_get_port_parent_id with bool recurse = true). This is a problem
because the fact that bond0 can be offloaded by swp3 and swp4 in our
example is merely an assumption.
A solution is to give the bridge explicit hints as to what hardware
domain it should use for each port.
Currently, the bridging offload is very 'silent': a driver registers a
netdevice notifier, which is put on the netns's notifier chain, and
which sniffs around for NETDEV_CHANGEUPPER events where the upper is a
bridge, and the lower is an interface it knows about (one registered by
this driver, normally). Then, from within that notifier, it does a bunch
of stuff behind the bridge's back, without the bridge necessarily
knowing that there's somebody offloading that port. It looks like this:
ip link set swp0 master br0
|
v
br_add_if() calls netdev_master_upper_dev_link()
|
v
call_netdevice_notifiers
|
v
dsa_slave_netdevice_event
|
v
oh, hey! it's for me!
|
v
.port_bridge_join
What we do to solve the conundrum is to be less silent, and change the
switchdev drivers to present themselves to the bridge. Something like this:
ip link set swp0 master br0
|
v
br_add_if() calls netdev_master_upper_dev_link()
|
v bridge: Aye! I'll use this
call_netdevice_notifiers ^ ppid as the
| | hardware domain for
v | this port, and zero
dsa_slave_netdevice_event | if I got nothing.
| |
v |
oh, hey! it's for me! |
| |
v |
.port_bridge_join |
| |
+------------------------+
switchdev_bridge_port_offload(swp0, swp0)
Then stacked interfaces (like bond0 on top of swp3/swp4) would be
treated differently in DSA, depending on whether we can or cannot
offload them.
The offload case:
ip link set bond0 master br0
|
v
br_add_if() calls netdev_master_upper_dev_link()
|
v bridge: Aye! I'll use this
call_netdevice_notifiers ^ ppid as the
| | switchdev mark for
v | bond0.
dsa_slave_netdevice_event | Coincidentally (or not),
| | bond0 and swp0, swp1, swp2
v | all have the same switchdev
hmm, it's not quite for me, | mark now, since the ASIC
but my driver has already | is able to forward towards
called .port_lag_join | all these ports in hw.
for it, because I have |
a port with dp->lag_dev == bond0. |
| |
v |
.port_bridge_join |
for swp3 and swp4 |
| |
+------------------------+
switchdev_bridge_port_offload(bond0, swp3)
switchdev_bridge_port_offload(bond0, swp4)
And the non-offload case:
ip link set bond0 master br0
|
v
br_add_if() calls netdev_master_upper_dev_link()
|
v bridge waiting:
call_netdevice_notifiers ^ huh, switchdev_bridge_port_offload
| | wasn't called, okay, I'll use a
v | hwdom of zero for this one.
dsa_slave_netdevice_event : Then packets received on swp0 will
| : not be software-forwarded towards
v : swp1, but they will towards bond0.
it's not for me, but
bond0 is an upper of swp3
and swp4, but their dp->lag_dev
is NULL because they couldn't
offload it.
Basically we can draw the conclusion that the lowers of a bridge port
can come and go, so depending on the configuration of lowers for a
bridge port, it can dynamically toggle between offloaded and unoffloaded.
Therefore, we need an equivalent switchdev_bridge_port_unoffload too.
This patch changes the way any switchdev driver interacts with the
bridge. From now on, everybody needs to call switchdev_bridge_port_offload
and switchdev_bridge_port_unoffload, otherwise the bridge will treat the
port as non-offloaded and allow software flooding to other ports from
the same ASIC.
Note that these functions lay the ground for a more complex handshake
between switchdev drivers and the bridge in the future.
For drivers that will request a replay of the switchdev objects when
they offload and unoffload a bridge port (DSA, dpaa2-switch, ocelot), we
place the call to switchdev_bridge_port_unoffload() strategically inside
the NETDEV_PRECHANGEUPPER notifier's code path, and not inside
NETDEV_CHANGEUPPER. This is because the switchdev object replay helpers
need the netdev adjacency lists to be valid, and that is only true in
NETDEV_PRECHANGEUPPER.
Cc: Vadym Kochan <vkochan@marvell.com>
Cc: Taras Chornyi <tchornyi@marvell.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Cc: Claudiu Manoil <claudiu.manoil@nxp.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Ioana Ciornei <ioana.ciornei@nxp.com> # dpaa2-switch: regression
Acked-by: Ioana Ciornei <ioana.ciornei@nxp.com> # dpaa2-switch
Tested-by: Horatiu Vultur <horatiu.vultur@microchip.com> # ocelot-switch
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:24:01 +03:00
int offload_count ;
struct netdev_phys_item_id ppid ;
2016-08-25 18:42:37 +02:00
# endif
2017-09-27 16:12:44 +03:00
u16 group_fwd_mask ;
2018-07-23 11:16:59 +03:00
u16 backup_redirected_cnt ;
2019-12-11 20:07:10 -05:00
struct bridge_stp_xstats stp_xstats ;
2005-04-16 15:20:36 -07:00
} ;
2018-07-20 21:56:54 +00:00
# define kobj_to_brport(obj) container_of(obj, struct net_bridge_port, kobj)
2014-05-16 09:59:16 -04:00
# define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
2014-05-16 09:59:18 -04:00
# define br_promisc_port(p) ((p)->flags & BR_PROMISC)
2014-05-16 09:59:16 -04:00
2010-11-15 06:38:13 +00:00
static inline struct net_bridge_port * br_port_get_rcu ( const struct net_device * dev )
{
2013-09-14 22:42:28 +08:00
return rcu_dereference ( dev - > rx_handler_data ) ;
2010-11-15 06:38:13 +00:00
}
2013-09-14 22:42:27 +08:00
static inline struct net_bridge_port * br_port_get_rtnl ( const struct net_device * dev )
2010-11-15 06:38:13 +00:00
{
2019-03-29 14:38:19 +01:00
return netif_is_bridge_port ( dev ) ?
2010-11-15 06:38:14 +00:00
rtnl_dereference ( dev - > rx_handler_data ) : NULL ;
2010-11-15 06:38:13 +00:00
}
2017-06-08 08:44:12 +02:00
static inline struct net_bridge_port * br_port_get_rtnl_rcu ( const struct net_device * dev )
{
2019-03-29 14:38:19 +01:00
return netif_is_bridge_port ( dev ) ?
2017-06-08 08:44:12 +02:00
rcu_dereference_rtnl ( dev - > rx_handler_data ) : NULL ;
}
2018-09-26 17:01:00 +03:00
enum net_bridge_opts {
BROPT_VLAN_ENABLED ,
BROPT_VLAN_STATS_ENABLED ,
2018-09-26 17:01:01 +03:00
BROPT_NF_CALL_IPTABLES ,
BROPT_NF_CALL_IP6TABLES ,
BROPT_NF_CALL_ARPTABLES ,
2018-09-26 17:01:02 +03:00
BROPT_GROUP_ADDR_SET ,
2018-09-26 17:01:03 +03:00
BROPT_MULTICAST_ENABLED ,
2018-09-26 17:01:04 +03:00
BROPT_MULTICAST_QUERY_USE_IFADDR ,
BROPT_MULTICAST_STATS_ENABLED ,
BROPT_HAS_IPV6_ADDR ,
2018-09-26 17:01:05 +03:00
BROPT_NEIGH_SUPPRESS_ENABLED ,
2018-09-26 17:01:06 +03:00
BROPT_MTU_SET_BY_USER ,
2018-10-12 13:41:16 +03:00
BROPT_VLAN_STATS_PER_PORT ,
2018-11-24 04:34:21 +02:00
BROPT_NO_LL_LEARN ,
2019-04-18 18:35:33 +01:00
BROPT_VLAN_BRIDGE_BINDING ,
2021-07-19 20:06:28 +03:00
BROPT_MCAST_VLAN_SNOOPING_ENABLED ,
2018-09-26 17:01:00 +03:00
} ;
2017-02-04 18:05:06 +01:00
struct net_bridge {
2005-04-16 15:20:36 -07:00
spinlock_t lock ;
2017-02-04 18:05:06 +01:00
spinlock_t hash_lock ;
2020-10-27 10:02:42 +00:00
struct hlist_head frame_type_list ;
2005-04-16 15:20:36 -07:00
struct net_device * dev ;
2018-09-26 17:01:00 +03:00
unsigned long options ;
2017-02-04 18:05:06 +01:00
/* These fields are accessed on each packet */
# ifdef CONFIG_BRIDGE_VLAN_FILTERING
__be16 vlan_proto ;
u16 default_pvid ;
struct net_bridge_vlan_group __rcu * vlgrp ;
# endif
2017-12-12 16:02:50 +02:00
struct rhashtable fdb_hash_tbl ;
2020-10-27 10:02:42 +00:00
struct list_head port_list ;
2014-09-18 11:29:03 +02:00
# if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
2015-05-30 15:30:16 +02:00
union {
struct rtable fake_rtable ;
struct rt6_info fake_rt6_info ;
} ;
2008-07-30 16:27:55 -07:00
# endif
2011-10-03 18:14:46 +00:00
u16 group_fwd_mask ;
2014-06-10 20:59:24 +09:00
u16 group_fwd_mask_required ;
2011-10-03 18:14:46 +00:00
2005-04-16 15:20:36 -07:00
/* STP */
bridge_id designated_root ;
bridge_id bridge_id ;
2017-02-04 18:05:06 +01:00
unsigned char topology_change ;
unsigned char topology_change_detected ;
u16 root_port ;
2005-04-16 15:20:36 -07:00
unsigned long max_age ;
unsigned long hello_time ;
unsigned long forward_delay ;
unsigned long ageing_time ;
2016-12-10 13:44:29 -05:00
unsigned long bridge_max_age ;
2005-04-16 15:20:36 -07:00
unsigned long bridge_hello_time ;
unsigned long bridge_forward_delay ;
2016-12-10 13:44:29 -05:00
unsigned long bridge_ageing_time ;
2018-09-26 17:01:07 +03:00
u32 root_path_cost ;
2005-04-16 15:20:36 -07:00
2006-03-20 22:59:21 -08:00
u8 group_addr [ ETH_ALEN ] ;
2007-03-21 14:22:44 -07:00
enum {
BR_NO_STP , /* no spanning tree */
BR_KERNEL_STP , /* old STP in kernel */
BR_USER_STP , /* new RSTP in userspace */
} stp_enabled ;
2021-07-19 20:06:24 +03:00
struct net_bridge_mcast multicast_ctx ;
2010-02-27 19:41:45 +00:00
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
2021-07-19 20:06:24 +03:00
struct bridge_mcast_stats __percpu * mcast_stats ;
2010-02-27 19:41:45 +00:00
u32 hash_max ;
2018-09-26 17:01:07 +03:00
spinlock_t multicast_lock ;
2010-02-27 19:41:45 +00:00
2018-12-05 15:14:24 +02:00
struct rhashtable mdb_hash_tbl ;
2020-09-22 10:30:22 +03:00
struct rhashtable sg_port_tbl ;
2018-12-05 15:14:24 +02:00
2020-09-07 12:56:19 +03:00
struct hlist_head mcast_gc_list ;
2018-12-05 15:14:24 +02:00
struct hlist_head mdb_list ;
2010-02-27 19:41:45 +00:00
2020-09-07 12:56:19 +03:00
struct work_struct mcast_gc_work ;
2010-02-27 19:41:45 +00:00
# endif
2005-04-16 15:20:36 -07:00
struct timer_list hello_timer ;
struct timer_list tcn_timer ;
struct timer_list topology_change_timer ;
2017-02-04 18:05:07 +01:00
struct delayed_work gc_work ;
2007-12-17 15:54:39 -04:00
struct kobject * ifobj ;
2014-05-16 09:59:16 -04:00
u32 auto_cnt ;
2016-08-25 18:42:37 +02:00
# ifdef CONFIG_NET_SWITCHDEV
net: bridge: disambiguate offload_fwd_mark
Before this change, four related - but distinct - concepts where named
offload_fwd_mark:
- skb->offload_fwd_mark: Set by the switchdev driver if the underlying
hardware has already forwarded this frame to the other ports in the
same hardware domain.
- nbp->offload_fwd_mark: An idetifier used to group ports that share
the same hardware forwarding domain.
- br->offload_fwd_mark: Counter used to make sure that unique IDs are
used in cases where a bridge contains ports from multiple hardware
domains.
- skb->cb->offload_fwd_mark: The hardware domain on which the frame
ingressed and was forwarded.
Introduce the term "hardware forwarding domain" ("hwdom") in the
bridge to denote a set of ports with the following property:
If an skb with skb->offload_fwd_mark set, is received on a port
belonging to hwdom N, that frame has already been forwarded to all
other ports in hwdom N.
By decoupling the name from "offload_fwd_mark", we can extend the
term's definition in the future - e.g. to add constraints that
describe expected egress behavior - without overloading the meaning of
"offload_fwd_mark".
- nbp->offload_fwd_mark thus becomes nbp->hwdom.
- br->offload_fwd_mark becomes br->last_hwdom.
- skb->cb->offload_fwd_mark becomes skb->cb->src_hwdom. The slight
change in naming here mandates a slight change in behavior of the
nbp_switchdev_frame_mark() function. Previously, it only set this
value in skb->cb for packets with skb->offload_fwd_mark true (ones
which were forwarded in hardware). Whereas now we always track the
incoming hwdom for all packets coming from a switchdev (even for the
packets which weren't forwarded in hardware, such as STP BPDUs, IGMP
reports etc). As all uses of skb->cb->offload_fwd_mark were already
gated behind checks of skb->offload_fwd_mark, this will not introduce
any functional change, but it paves the way for future changes where
the ingressing hwdom must be known for frames coming from a switchdev
regardless of whether they were forwarded in hardware or not
(basically, if the skb comes from a switchdev, skb->cb->src_hwdom now
always tracks which one).
A typical example where this is relevant: the switchdev has a fixed
configuration to trap STP BPDUs, but STP is not running on the bridge
and the group_fwd_mask allows them to be forwarded. Say we have this
setup:
br0
/ | \
/ | \
swp0 swp1 swp2
A BPDU comes in on swp0 and is trapped to the CPU; the driver does not
set skb->offload_fwd_mark. The bridge determines that the frame should
be forwarded to swp{1,2}. It is imperative that forward offloading is
_not_ allowed in this case, as the source hwdom is already "poisoned".
Recording the source hwdom allows this case to be handled properly.
v2->v3: added code comments
v3->v6: none
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:23:59 +03:00
/* Counter used to make sure that hardware domains get unique
* identifiers in case a bridge spans multiple switchdev instances .
*/
int last_hwdom ;
2021-07-21 19:24:00 +03:00
/* Bit mask of hardware domain numbers in use */
unsigned long busy_hwdoms ;
2016-08-25 18:42:37 +02:00
# endif
2017-12-12 16:02:50 +02:00
struct hlist_head fdb_list ;
2020-04-26 15:22:00 +02:00
# if IS_ENABLED(CONFIG_BRIDGE_MRP)
2020-11-06 22:50:49 +01:00
struct hlist_head mrp_list ;
2020-04-26 15:22:00 +02:00
# endif
2020-10-27 10:02:43 +00:00
# if IS_ENABLED(CONFIG_BRIDGE_CFM)
struct hlist_head mep_list ;
# endif
2005-04-16 15:20:36 -07:00
} ;
2010-02-27 19:41:40 +00:00
struct br_input_skb_cb {
struct net_device * brdev ;
2014-10-05 12:00:22 +08:00
netfilter: bridge: add connection tracking system
This patch adds basic connection tracking support for the bridge,
including initial IPv4 support.
This patch register two hooks to deal with the bridge forwarding path,
one from the bridge prerouting hook to call nf_conntrack_in(); and
another from the bridge postrouting hook to confirm the entry.
The conntrack bridge prerouting hook defragments packets before passing
them to nf_conntrack_in() to look up for an existing entry, otherwise a
new entry is allocated and it is attached to the skbuff. The conntrack
bridge postrouting hook confirms new conntrack entries, ie. if this is
the first packet seen, then it adds the entry to the hashtable and (if
needed) it refragments the skbuff into the original fragments, leaving
the geometry as is if possible. Exceptions are linearized skbuffs, eg.
skbuffs that are passed up to nfqueue and conntrack helpers, as well as
cloned skbuff for the local delivery (eg. tcpdump), also in case of
bridge port flooding (cloned skbuff too).
The packet defragmentation is done through the ip_defrag() call. This
forces us to save the bridge control buffer, reset the IP control buffer
area and then restore it after call. This function also bumps the IP
fragmentation statistics, it would be probably desiderable to have
independent statistics for the bridge defragmentation/refragmentation.
The maximum fragment length is stored in the control buffer and it is
used to refragment the skbuff from the postrouting path.
The new fraglist splitter and fragment transformer APIs are used to
implement the bridge refragmentation code. The br_ip_fragment() function
drops the packet in case the maximum fragment size seen is larger than
the output port MTU.
This patchset follows the principle that conntrack should not drop
packets, so users can do it through policy via invalid state matching.
Like br_netfilter, there is no refragmentation for packets that are
passed up for local delivery, ie. prerouting -> input path. There are
calls to nf_reset() already in several spots in the stack since time ago
already, eg. af_packet, that show that skbuff fraglist handling from the
netif_rx path is supported already.
The helpers are called from the postrouting hook, before confirmation,
from there we may see packet floods to bridge ports. Then, although
unlikely, this may result in exercising the helpers many times for each
clone. It would be good to explore how to pass all the packets in a list
to the conntrack hook to do this handle only once for this case.
Thanks to Florian Westphal for handing me over an initial patchset
version to add support for conntrack bridge.
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
2019-05-29 13:25:37 +02:00
u16 frag_max_size ;
2010-03-15 21:51:18 +00:00
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
2019-04-11 16:36:40 +02:00
u8 igmp ;
u8 mrouters_only : 1 ;
2010-03-15 21:51:18 +00:00
# endif
2019-04-11 16:36:40 +02:00
u8 proxyarp_replied : 1 ;
u8 src_port_isolated : 1 ;
2014-09-12 16:26:16 -04:00
# ifdef CONFIG_BRIDGE_VLAN_FILTERING
2019-04-11 16:36:40 +02:00
u8 vlan_filtered : 1 ;
2014-09-12 16:26:16 -04:00
# endif
2019-04-11 16:36:42 +02:00
# ifdef CONFIG_NETFILTER_FAMILY_BRIDGE
u8 br_netfilter_broute : 1 ;
# endif
2016-08-25 18:42:37 +02:00
# ifdef CONFIG_NET_SWITCHDEV
2021-07-22 18:55:38 +03:00
/* Set if TX data plane offloading is used towards at least one
* hardware domain .
*/
u8 tx_fwd_offload : 1 ;
net: bridge: disambiguate offload_fwd_mark
Before this change, four related - but distinct - concepts where named
offload_fwd_mark:
- skb->offload_fwd_mark: Set by the switchdev driver if the underlying
hardware has already forwarded this frame to the other ports in the
same hardware domain.
- nbp->offload_fwd_mark: An idetifier used to group ports that share
the same hardware forwarding domain.
- br->offload_fwd_mark: Counter used to make sure that unique IDs are
used in cases where a bridge contains ports from multiple hardware
domains.
- skb->cb->offload_fwd_mark: The hardware domain on which the frame
ingressed and was forwarded.
Introduce the term "hardware forwarding domain" ("hwdom") in the
bridge to denote a set of ports with the following property:
If an skb with skb->offload_fwd_mark set, is received on a port
belonging to hwdom N, that frame has already been forwarded to all
other ports in hwdom N.
By decoupling the name from "offload_fwd_mark", we can extend the
term's definition in the future - e.g. to add constraints that
describe expected egress behavior - without overloading the meaning of
"offload_fwd_mark".
- nbp->offload_fwd_mark thus becomes nbp->hwdom.
- br->offload_fwd_mark becomes br->last_hwdom.
- skb->cb->offload_fwd_mark becomes skb->cb->src_hwdom. The slight
change in naming here mandates a slight change in behavior of the
nbp_switchdev_frame_mark() function. Previously, it only set this
value in skb->cb for packets with skb->offload_fwd_mark true (ones
which were forwarded in hardware). Whereas now we always track the
incoming hwdom for all packets coming from a switchdev (even for the
packets which weren't forwarded in hardware, such as STP BPDUs, IGMP
reports etc). As all uses of skb->cb->offload_fwd_mark were already
gated behind checks of skb->offload_fwd_mark, this will not introduce
any functional change, but it paves the way for future changes where
the ingressing hwdom must be known for frames coming from a switchdev
regardless of whether they were forwarded in hardware or not
(basically, if the skb comes from a switchdev, skb->cb->src_hwdom now
always tracks which one).
A typical example where this is relevant: the switchdev has a fixed
configuration to trap STP BPDUs, but STP is not running on the bridge
and the group_fwd_mask allows them to be forwarded. Say we have this
setup:
br0
/ | \
/ | \
swp0 swp1 swp2
A BPDU comes in on swp0 and is trapped to the CPU; the driver does not
set skb->offload_fwd_mark. The bridge determines that the frame should
be forwarded to swp{1,2}. It is imperative that forward offloading is
_not_ allowed in this case, as the source hwdom is already "poisoned".
Recording the source hwdom allows this case to be handled properly.
v2->v3: added code comments
v3->v6: none
Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:23:59 +03:00
/* The switchdev hardware domain from which this packet was received.
* If skb - > offload_fwd_mark was set , then this packet was already
* forwarded by hardware to the other ports in the source hardware
* domain , otherwise it wasn ' t .
*/
int src_hwdom ;
2021-07-22 18:55:38 +03:00
/* Bit mask of hardware domains towards this packet has already been
* transmitted using the TX data plane offload .
*/
unsigned long fwd_hwdoms ;
2016-08-25 18:42:37 +02:00
# endif
2010-02-27 19:41:40 +00:00
} ;
# define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
2010-03-15 21:51:18 +00:00
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (BR_INPUT_SKB_CB(__skb)->mrouters_only)
# else
# define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb) (0)
# endif
2010-05-10 09:31:09 +00:00
# define br_printk(level, br, format, args...) \
printk ( level " %s: " format , ( br ) - > dev - > name , # # args )
# define br_err(__br, format, args...) \
br_printk ( KERN_ERR , __br , format , # # args )
# define br_warn(__br, format, args...) \
br_printk ( KERN_WARNING , __br , format , # # args )
# define br_notice(__br, format, args...) \
br_printk ( KERN_NOTICE , __br , format , # # args )
# define br_info(__br, format, args...) \
br_printk ( KERN_INFO , __br , format , # # args )
# define br_debug(br, format, args...) \
pr_debug ( " %s: " format , ( br ) - > dev - > name , # # args )
2005-04-16 15:20:36 -07:00
/* called under bridge lock */
static inline int br_is_root_bridge ( const struct net_bridge * br )
{
return ! memcmp ( & br - > bridge_id , & br - > designated_root , 8 ) ;
}
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
/* check if a VLAN entry is global */
static inline bool br_vlan_is_master ( const struct net_bridge_vlan * v )
{
return v - > flags & BRIDGE_VLAN_INFO_MASTER ;
}
/* check if a VLAN entry is used by the bridge */
static inline bool br_vlan_is_brentry ( const struct net_bridge_vlan * v )
{
return v - > flags & BRIDGE_VLAN_INFO_BRENTRY ;
}
2015-10-02 15:05:13 +02:00
/* check if we should use the vlan entry, returns false if it's only context */
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline bool br_vlan_should_use ( const struct net_bridge_vlan * v )
{
if ( br_vlan_is_master ( v ) ) {
if ( br_vlan_is_brentry ( v ) )
return true ;
else
return false ;
}
return true ;
}
2019-11-04 11:36:51 +02:00
static inline bool nbp_state_should_learn ( const struct net_bridge_port * p )
{
return p - > state = = BR_STATE_LEARNING | | p - > state = = BR_STATE_FORWARDING ;
}
2020-01-14 19:56:08 +02:00
static inline bool br_vlan_valid_id ( u16 vid , struct netlink_ext_ack * extack )
2020-01-14 19:56:07 +02:00
{
2020-01-14 19:56:08 +02:00
bool ret = vid > 0 & & vid < VLAN_VID_MASK ;
if ( ! ret )
NL_SET_ERR_MSG_MOD ( extack , " Vlan id is invalid " ) ;
return ret ;
2020-01-14 19:56:07 +02:00
}
static inline bool br_vlan_valid_range ( const struct bridge_vlan_info * cur ,
2020-01-14 19:56:08 +02:00
const struct bridge_vlan_info * last ,
struct netlink_ext_ack * extack )
2020-01-14 19:56:07 +02:00
{
/* pvid flag is not allowed in ranges */
2020-01-14 19:56:08 +02:00
if ( cur - > flags & BRIDGE_VLAN_INFO_PVID ) {
NL_SET_ERR_MSG_MOD ( extack , " Pvid isn't allowed in a range " ) ;
2020-01-14 19:56:07 +02:00
return false ;
2020-01-14 19:56:08 +02:00
}
2020-01-14 19:56:07 +02:00
/* when cur is the range end, check if:
* - it has range start flag
* - range ids are invalid ( end is equal to or before start )
*/
if ( last ) {
2020-01-14 19:56:08 +02:00
if ( cur - > flags & BRIDGE_VLAN_INFO_RANGE_BEGIN ) {
NL_SET_ERR_MSG_MOD ( extack , " Found a new vlan range start while processing one " ) ;
2020-01-14 19:56:07 +02:00
return false ;
2020-01-14 19:56:08 +02:00
} else if ( ! ( cur - > flags & BRIDGE_VLAN_INFO_RANGE_END ) ) {
NL_SET_ERR_MSG_MOD ( extack , " Vlan range end flag is missing " ) ;
2020-01-14 19:56:07 +02:00
return false ;
2020-01-14 19:56:08 +02:00
} else if ( cur - > vid < = last - > vid ) {
NL_SET_ERR_MSG_MOD ( extack , " End vlan id is less than or equal to start vlan id " ) ;
return false ;
}
}
/* check for required range flags */
if ( ! ( cur - > flags & ( BRIDGE_VLAN_INFO_RANGE_BEGIN |
BRIDGE_VLAN_INFO_RANGE_END ) ) ) {
NL_SET_ERR_MSG_MOD ( extack , " Both vlan range flags are missing " ) ;
return false ;
2020-01-14 19:56:07 +02:00
}
return true ;
}
2020-01-14 19:56:14 +02:00
static inline int br_afspec_cmd_to_rtm ( int cmd )
{
switch ( cmd ) {
case RTM_SETLINK :
return RTM_NEWVLAN ;
case RTM_DELLINK :
return RTM_DELVLAN ;
}
return 0 ;
}
2018-09-26 17:01:00 +03:00
static inline int br_opt_get ( const struct net_bridge * br ,
enum net_bridge_opts opt )
{
return test_bit ( opt , & br - > options ) ;
}
2018-11-24 04:34:20 +02:00
int br_boolopt_toggle ( struct net_bridge * br , enum br_boolopt_id opt , bool on ,
struct netlink_ext_ack * extack ) ;
int br_boolopt_get ( const struct net_bridge * br , enum br_boolopt_id opt ) ;
int br_boolopt_multi_toggle ( struct net_bridge * br ,
struct br_boolopt_multi * bm ,
struct netlink_ext_ack * extack ) ;
void br_boolopt_multi_get ( const struct net_bridge * br ,
struct br_boolopt_multi * bm ) ;
2018-09-26 17:01:00 +03:00
void br_opt_toggle ( struct net_bridge * br , enum net_bridge_opts opt , bool on ) ;
2005-04-16 15:20:36 -07:00
/* br_device.c */
2013-10-18 13:48:22 -07:00
void br_dev_setup ( struct net_device * dev ) ;
void br_dev_delete ( struct net_device * dev , struct list_head * list ) ;
netdev_tx_t br_dev_xmit ( struct sk_buff * skb , struct net_device * dev ) ;
2010-05-10 09:31:08 +00:00
# ifdef CONFIG_NET_POLL_CONTROLLER
2010-06-10 16:12:50 +00:00
static inline void br_netpoll_send_skb ( const struct net_bridge_port * p ,
struct sk_buff * skb )
{
2020-05-07 09:32:21 -07:00
netpoll_send_skb ( p - > np , skb ) ;
2010-06-10 16:12:50 +00:00
}
2014-03-27 15:36:38 -07:00
int br_netpoll_enable ( struct net_bridge_port * p ) ;
2013-10-18 13:48:22 -07:00
void br_netpoll_disable ( struct net_bridge_port * p ) ;
2010-05-10 09:31:08 +00:00
# else
2010-06-15 21:43:48 -07:00
static inline void br_netpoll_send_skb ( const struct net_bridge_port * p ,
2010-06-10 16:12:50 +00:00
struct sk_buff * skb )
{
}
2010-05-10 09:31:08 +00:00
2014-03-27 15:36:38 -07:00
static inline int br_netpoll_enable ( struct net_bridge_port * p )
2010-06-10 16:12:50 +00:00
{
return 0 ;
}
static inline void br_netpoll_disable ( struct net_bridge_port * p )
{
}
2010-05-10 09:31:08 +00:00
# endif
2005-04-16 15:20:36 -07:00
/* br_fdb.c */
2013-10-18 13:48:22 -07:00
int br_fdb_init ( void ) ;
void br_fdb_fini ( void ) ;
2017-12-12 16:02:50 +02:00
int br_fdb_hash_init ( struct net_bridge * br ) ;
void br_fdb_hash_fini ( struct net_bridge * br ) ;
2013-10-18 13:48:22 -07:00
void br_fdb_flush ( struct net_bridge * br ) ;
2014-02-07 16:48:25 +09:00
void br_fdb_find_delete_local ( struct net_bridge * br ,
const struct net_bridge_port * p ,
const unsigned char * addr , u16 vid ) ;
2013-10-18 13:48:22 -07:00
void br_fdb_changeaddr ( struct net_bridge_port * p , const unsigned char * newaddr ) ;
void br_fdb_change_mac_address ( struct net_bridge * br , const u8 * newaddr ) ;
2017-02-04 18:05:07 +01:00
void br_fdb_cleanup ( struct work_struct * work ) ;
2013-10-18 13:48:22 -07:00
void br_fdb_delete_by_port ( struct net_bridge * br ,
2015-06-23 05:28:16 -07:00
const struct net_bridge_port * p , u16 vid , int do_all ) ;
2017-02-13 14:59:09 +01:00
struct net_bridge_fdb_entry * br_fdb_find_rcu ( struct net_bridge * br ,
const unsigned char * addr ,
__u16 vid ) ;
2013-10-18 13:48:22 -07:00
int br_fdb_test_addr ( struct net_device * dev , unsigned char * addr ) ;
int br_fdb_fillbuf ( struct net_bridge * br , void * buf , unsigned long count ,
unsigned long off ) ;
int br_fdb_insert ( struct net_bridge * br , struct net_bridge_port * source ,
const unsigned char * addr , u16 vid ) ;
void br_fdb_update ( struct net_bridge * br , struct net_bridge_port * source ,
2019-11-01 14:46:37 +02:00
const unsigned char * addr , u16 vid , unsigned long flags ) ;
2013-10-18 13:48:22 -07:00
int br_fdb_delete ( struct ndmsg * ndm , struct nlattr * tb [ ] ,
2014-11-28 14:34:15 +01:00
struct net_device * dev , const unsigned char * addr , u16 vid ) ;
2013-10-18 13:48:22 -07:00
int br_fdb_add ( struct ndmsg * nlh , struct nlattr * tb [ ] , struct net_device * dev ,
2019-01-16 23:06:50 +00:00
const unsigned char * addr , u16 vid , u16 nlh_flags ,
struct netlink_ext_ack * extack ) ;
2013-10-18 13:48:22 -07:00
int br_fdb_dump ( struct sk_buff * skb , struct netlink_callback * cb ,
2016-08-30 21:56:45 -07:00
struct net_device * dev , struct net_device * fdev , int * idx ) ;
2018-12-15 22:35:09 -08:00
int br_fdb_get ( struct sk_buff * skb , struct nlattr * tb [ ] , struct net_device * dev ,
const unsigned char * addr , u16 vid , u32 portid , u32 seq ,
struct netlink_ext_ack * extack ) ;
2014-05-16 09:59:17 -04:00
int br_fdb_sync_static ( struct net_bridge * br , struct net_bridge_port * p ) ;
void br_fdb_unsync_static ( struct net_bridge * br , struct net_bridge_port * p ) ;
2015-01-15 23:49:37 +01:00
int br_fdb_external_learn_add ( struct net_bridge * br , struct net_bridge_port * p ,
net: bridge: validate the NUD_PERMANENT bit when adding an extern_learn FDB entry
Currently it is possible to add broken extern_learn FDB entries to the
bridge in two ways:
1. Entries pointing towards the bridge device that are not local/permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static
2. Entries pointing towards the bridge device or towards a port that
are marked as local/permanent, however the bridge does not process the
'permanent' bit in any way, therefore they are recorded as though they
aren't permanent:
ip link add br0 type bridge
bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent
Since commit 52e4bec15546 ("net: bridge: switchdev: treat local FDBs the
same as entries towards the bridge"), these incorrect FDB entries can
even trigger NULL pointer dereferences inside the kernel.
This is because that commit made the assumption that all FDB entries
that are not local/permanent have a valid destination port. For context,
local / permanent FDB entries either have fdb->dst == NULL, and these
point towards the bridge device and are therefore local and not to be
used for forwarding, or have fdb->dst == a net_bridge_port structure
(but are to be treated in the same way, i.e. not for forwarding).
That assumption _is_ correct as long as things are working correctly in
the bridge driver, i.e. we cannot logically have fdb->dst == NULL under
any circumstance for FDB entries that are not local. However, the
extern_learn code path where FDB entries are managed by a user space
controller show that it is possible for the bridge kernel driver to
misinterpret the NUD flags of an entry transmitted by user space, and
end up having fdb->dst == NULL while not being a local entry. This is
invalid and should be rejected.
Before, the two commands listed above both crashed the kernel in this
check from br_switchdev_fdb_notify:
struct net_device *dev = info.is_local ? br->dev : dst->dev;
info.is_local == false, dst == NULL.
After this patch, the invalid entry added by the first command is
rejected:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn static; ip link del br0
Error: bridge: FDB entry towards bridge must be permanent.
and the valid entry added by the second command is properly treated as a
local address and does not crash br_switchdev_fdb_notify anymore:
ip link add br0 type bridge && bridge fdb add 00:01:02:03:04:05 dev br0 self extern_learn permanent; ip link del br0
Fixes: eb100e0e24a2 ("net: bridge: allow to add externally learned entries from user-space")
Reported-by: syzbot+9ba1174359adba5a5b7c@syzkaller.appspotmail.com
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Link: https://lore.kernel.org/r/20210801231730.7493-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2021-08-02 02:17:30 +03:00
const unsigned char * addr , u16 vid , bool is_local ,
2018-05-03 14:43:53 +02:00
bool swdev_notify ) ;
2015-01-15 23:49:37 +01:00
int br_fdb_external_learn_del ( struct net_bridge * br , struct net_bridge_port * p ,
2018-05-03 14:43:53 +02:00
const unsigned char * addr , u16 vid ,
bool swdev_notify ) ;
2017-06-08 08:44:15 +02:00
void br_fdb_offloaded_set ( struct net_bridge * br , struct net_bridge_port * p ,
2018-10-17 08:53:29 +00:00
const unsigned char * addr , u16 vid , bool offloaded ) ;
net: bridge: switchdev: replay the entire FDB for each port
Currently when a switchdev port joins a bridge, we replay all FDB
entries pointing towards that port or towards the bridge.
However, this is insufficient in certain situations:
(a) DSA, through its assisted_learning_on_cpu_port logic, snoops
dynamically learned FDB entries on foreign interfaces.
These are FDB entries that are pointing neither towards the newly
joined switchdev port, nor towards the bridge. So these addresses
would be missed when joining a bridge where a foreign interface has
already learned some addresses, and they would also linger on if the
DSA port leaves the bridge before the foreign interface forgets them.
None of this happens if we replay the entire FDB when the port joins.
(b) There is a desire to treat local FDB entries on a port (i.e. the
port's termination MAC address) identically to FDB entries pointing
towards the bridge itself. More details on the reason behind this in
the next patch. The point is that this cannot be done given the
current structure of br_fdb_replay() in this situation:
ip link set swp0 master br0 # br0 inherits its MAC address from swp0
ip link set swp1 master br0
What is desirable is that when swp1 joins the bridge, br_fdb_replay()
also notifies swp1 of br0's MAC address, but this won't in fact
happen because the MAC address of br0 does not have fdb->dst == NULL
(it doesn't point towards the bridge), but it has fdb->dst == swp0.
So our current logic makes it impossible for that address to be
replayed. But if we dump the entire FDB instead of just the entries
with fdb->dst == swp1 and fdb->dst == NULL, then the inherited MAC
address of br0 will be replayed too, which is what we need.
A natural question arises: say there is an FDB entry to be replayed,
like a MAC address dynamically learned on a foreign interface that
belongs to a bridge where no switchdev port has joined yet. If 10
switchdev ports belonging to the same driver join this bridge, one by
one, won't every port get notified 10 times of the foreign FDB entry,
amounting to a total of 100 notifications for this FDB entry in the
switchdev driver?
Well, yes, but this is where the "void *ctx" argument for br_fdb_replay
is useful: every port of the switchdev driver is notified whenever any
other port requests an FDB replay, but because the replay was initiated
by a different port, its context is different from the initiating port's
context, so it ignores those replays.
So the foreign FDB entry will be installed only 10 times, once per port.
This is done so that the following 4 code paths are always well balanced:
(a) addition of foreign FDB entry is replayed when port joins bridge
(b) deletion of foreign FDB entry is replayed when port leaves bridge
(c) addition of foreign FDB entry is notified to all ports currently in bridge
(c) deletion of foreign FDB entry is notified to all ports currently in bridge
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-28 21:27:47 +03:00
int br_fdb_replay ( const struct net_device * br_dev , const void * ctx , bool adding ,
struct notifier_block * nb ) ;
2005-04-16 15:20:36 -07:00
/* br_forward.c */
2016-08-31 15:36:51 +02:00
enum br_pkt_type {
BR_PKT_UNICAST ,
BR_PKT_MULTICAST ,
BR_PKT_BROADCAST
} ;
2015-09-15 20:04:18 -05:00
int br_dev_queue_push_xmit ( struct net * net , struct sock * sk , struct sk_buff * skb ) ;
2016-07-14 06:10:01 +03:00
void br_forward ( const struct net_bridge_port * to , struct sk_buff * skb ,
2016-07-14 06:10:02 +03:00
bool local_rcv , bool local_orig ) ;
2015-09-15 20:04:18 -05:00
int br_forward_finish ( struct net * net , struct sock * sk , struct sk_buff * skb ) ;
2016-07-14 06:10:02 +03:00
void br_flood ( struct net_bridge * br , struct sk_buff * skb ,
2016-08-31 15:36:51 +02:00
enum br_pkt_type pkt_type , bool local_rcv , bool local_orig ) ;
2005-04-16 15:20:36 -07:00
2018-05-24 11:56:48 +03:00
/* return true if both source port and dest port are isolated */
static inline bool br_skb_isolated ( const struct net_bridge_port * to ,
const struct sk_buff * skb )
{
return BR_INPUT_SKB_CB ( skb ) - > src_port_isolated & &
( to - > flags & BR_ISOLATED ) ;
}
2005-04-16 15:20:36 -07:00
/* br_if.c */
2018-05-03 13:47:24 +03:00
void br_port_carrier_check ( struct net_bridge_port * p , bool * notified ) ;
2013-10-18 13:48:22 -07:00
int br_add_bridge ( struct net * net , const char * name ) ;
int br_del_bridge ( struct net * net , const char * name ) ;
2017-10-04 17:48:50 -07:00
int br_add_if ( struct net_bridge * br , struct net_device * dev ,
struct netlink_ext_ack * extack ) ;
2013-10-18 13:48:22 -07:00
int br_del_if ( struct net_bridge * br , struct net_device * dev ) ;
2018-03-30 13:46:19 +03:00
void br_mtu_auto_adjust ( struct net_bridge * br ) ;
2013-10-18 13:48:22 -07:00
netdev_features_t br_features_recompute ( struct net_bridge * br ,
netdev_features_t features ) ;
2014-05-16 09:59:16 -04:00
void br_port_flags_change ( struct net_bridge_port * port , unsigned long mask ) ;
2014-05-16 09:59:20 -04:00
void br_manage_promisc ( struct net_bridge * br ) ;
2018-07-23 11:16:59 +03:00
int nbp_backup_change ( struct net_bridge_port * p , struct net_device * backup_dev ) ;
2005-04-16 15:20:36 -07:00
/* br_input.c */
2015-09-15 20:04:18 -05:00
int br_handle_frame_finish ( struct net * net , struct sock * sk , struct sk_buff * skb ) ;
2020-05-10 19:37:40 +03:00
rx_handler_func_t * br_get_rx_handler ( const struct net_device * dev ) ;
2005-04-16 15:20:36 -07:00
2020-10-27 10:02:42 +00:00
struct br_frame_type {
__be16 type ;
int ( * frame_handler ) ( struct net_bridge_port * port ,
struct sk_buff * skb ) ;
struct hlist_node list ;
} ;
void br_add_frame ( struct net_bridge * br , struct br_frame_type * ft ) ;
void br_del_frame ( struct net_bridge * br , struct br_frame_type * ft ) ;
2013-12-05 16:27:37 +01:00
static inline bool br_rx_handler_check_rcu ( const struct net_device * dev )
{
2020-05-10 19:37:40 +03:00
return rcu_dereference ( dev - > rx_handler ) = = br_get_rx_handler ( dev ) ;
2013-12-05 16:27:37 +01:00
}
2018-04-29 10:56:08 +03:00
static inline bool br_rx_handler_check_rtnl ( const struct net_device * dev )
{
2020-05-10 19:37:40 +03:00
return rcu_dereference_rtnl ( dev - > rx_handler ) = = br_get_rx_handler ( dev ) ;
2018-04-29 10:56:08 +03:00
}
2013-12-05 16:27:37 +01:00
static inline struct net_bridge_port * br_port_get_check_rcu ( const struct net_device * dev )
{
return br_rx_handler_check_rcu ( dev ) ? br_port_get_rcu ( dev ) : NULL ;
}
2018-04-29 10:56:08 +03:00
static inline struct net_bridge_port *
br_port_get_check_rtnl ( const struct net_device * dev )
{
return br_rx_handler_check_rtnl ( dev ) ? br_port_get_rtnl_rcu ( dev ) : NULL ;
}
2005-04-16 15:20:36 -07:00
/* br_ioctl.c */
2021-07-27 15:44:51 +02:00
int br_dev_siocdevprivate ( struct net_device * dev , struct ifreq * rq ,
void __user * data , int cmd ) ;
2021-07-27 15:45:16 +02:00
int br_ioctl_stub ( struct net * net , struct net_bridge * br , unsigned int cmd ,
struct ifreq * ifr , void __user * uarg ) ;
2005-04-16 15:20:36 -07:00
2010-02-27 19:41:45 +00:00
/* br_multicast.c */
# ifdef CONFIG_BRIDGE_IGMP_SNOOPING
2021-07-19 20:06:28 +03:00
int br_multicast_rcv ( struct net_bridge_mcast * * brmctx ,
struct net_bridge_mcast_port * * pmctx ,
struct net_bridge_vlan * vlan ,
2013-11-04 13:48:30 -05:00
struct sk_buff * skb , u16 vid ) ;
2021-07-19 20:06:25 +03:00
struct net_bridge_mdb_entry * br_mdb_get ( struct net_bridge_mcast * brmctx ,
2013-10-18 13:48:22 -07:00
struct sk_buff * skb , u16 vid ) ;
2016-06-28 16:57:06 +02:00
int br_multicast_add_port ( struct net_bridge_port * port ) ;
2013-10-18 13:48:22 -07:00
void br_multicast_del_port ( struct net_bridge_port * port ) ;
void br_multicast_enable_port ( struct net_bridge_port * port ) ;
void br_multicast_disable_port ( struct net_bridge_port * port ) ;
void br_multicast_init ( struct net_bridge * br ) ;
2020-12-04 18:56:28 -05:00
void br_multicast_join_snoopers ( struct net_bridge * br ) ;
void br_multicast_leave_snoopers ( struct net_bridge * br ) ;
2013-10-18 13:48:22 -07:00
void br_multicast_open ( struct net_bridge * br ) ;
void br_multicast_stop ( struct net_bridge * br ) ;
2015-07-15 07:16:51 -07:00
void br_multicast_dev_del ( struct net_bridge * br ) ;
2021-07-19 20:06:25 +03:00
void br_multicast_flood ( struct net_bridge_mdb_entry * mdst , struct sk_buff * skb ,
struct net_bridge_mcast * brmctx ,
bool local_rcv , bool local_orig ) ;
2021-08-10 18:29:31 +03:00
int br_multicast_set_router ( struct net_bridge_mcast * brmctx , unsigned long val ) ;
2013-10-18 13:48:22 -07:00
int br_multicast_set_port_router ( struct net_bridge_port * p , unsigned long val ) ;
2021-04-14 22:22:57 +03:00
int br_multicast_toggle ( struct net_bridge * br , unsigned long val ,
struct netlink_ext_ack * extack ) ;
2021-08-10 18:29:30 +03:00
int br_multicast_set_querier ( struct net_bridge_mcast * brmctx , unsigned long val ) ;
2013-10-18 13:48:22 -07:00
int br_multicast_set_hash_max ( struct net_bridge * br , unsigned long val ) ;
2021-08-10 18:29:19 +03:00
int br_multicast_set_igmp_version ( struct net_bridge_mcast * brmctx ,
unsigned long val ) ;
2016-11-21 13:03:25 +01:00
# if IS_ENABLED(CONFIG_IPV6)
2021-08-10 18:29:19 +03:00
int br_multicast_set_mld_version ( struct net_bridge_mcast * brmctx ,
unsigned long val ) ;
2016-11-21 13:03:25 +01:00
# endif
2013-10-18 13:48:22 -07:00
struct net_bridge_mdb_entry *
2018-12-05 15:14:24 +02:00
br_mdb_ip_get ( struct net_bridge * br , struct br_ip * dst ) ;
2013-10-18 13:48:22 -07:00
struct net_bridge_mdb_entry *
2018-12-05 15:14:24 +02:00
br_multicast_new_group ( struct net_bridge * br , struct br_ip * group ) ;
2013-10-18 13:48:22 -07:00
struct net_bridge_port_group *
br_multicast_new_port_group ( struct net_bridge_port * port , struct br_ip * group ,
struct net_bridge_port_group __rcu * next ,
2020-09-07 12:56:07 +03:00
unsigned char flags , const unsigned char * src ,
2020-09-22 10:30:21 +03:00
u8 filter_mode , u8 rt_protocol ) ;
2018-12-05 15:14:24 +02:00
int br_mdb_hash_init ( struct net_bridge * br ) ;
void br_mdb_hash_fini ( struct net_bridge * br ) ;
2020-09-07 12:56:12 +03:00
void br_mdb_notify ( struct net_device * dev , struct net_bridge_mdb_entry * mp ,
struct net_bridge_port_group * pg , int type ) ;
2021-07-19 20:06:33 +03:00
void br_rtr_notify ( struct net_device * dev , struct net_bridge_mcast_port * pmctx ,
2015-07-23 05:00:53 -07:00
int type ) ;
2020-09-07 12:56:06 +03:00
void br_multicast_del_pg ( struct net_bridge_mdb_entry * mp ,
struct net_bridge_port_group * pg ,
struct net_bridge_port_group __rcu * * pp ) ;
2021-07-19 20:06:25 +03:00
void br_multicast_count ( struct net_bridge * br ,
const struct net_bridge_port * p ,
2016-07-06 12:12:21 -07:00
const struct sk_buff * skb , u8 type , u8 dir ) ;
2016-06-28 16:57:06 +02:00
int br_multicast_init_stats ( struct net_bridge * br ) ;
2017-04-10 14:59:27 +03:00
void br_multicast_uninit_stats ( struct net_bridge * br ) ;
2016-06-28 16:57:06 +02:00
void br_multicast_get_stats ( const struct net_bridge * br ,
const struct net_bridge_port * p ,
struct br_mcast_stats * dest ) ;
2018-12-05 15:14:24 +02:00
void br_mdb_init ( void ) ;
void br_mdb_uninit ( void ) ;
2021-07-21 17:01:27 +03:00
void br_multicast_host_join ( const struct net_bridge_mcast * brmctx ,
struct net_bridge_mdb_entry * mp , bool notify ) ;
2019-08-17 14:22:13 +03:00
void br_multicast_host_leave ( struct net_bridge_mdb_entry * mp , bool notify ) ;
net: bridge: mcast: handle port group filter modes
We need to handle group filter mode transitions and initial state.
To change a port group's INCLUDE -> EXCLUDE mode (or when we have added
a new port group in EXCLUDE mode) we need to add that port to all of
*,G ports' S,G entries for proper replication. When the EXCLUDE state is
changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be
called after the source list processing because the assumption is that
all of the group's S,G entries will be created before transitioning to
EXCLUDE mode, i.e. most importantly its blocked entries will already be
added so it will not get automatically added to them.
The transition EXCLUDE -> INCLUDE happens only when a port group timer
expires, it requires us to remove that port from all of *,G ports' S,G
entries where it was automatically added previously.
Finally when we are adding a new S,G entry we must add all of *,G's
EXCLUDE ports to it.
In order to distinguish automatically added *,G EXCLUDE ports we have a
new port group flag - MDB_PG_FLAGS_STAR_EXCL.
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-22 10:30:24 +03:00
void br_multicast_star_g_handle_mode ( struct net_bridge_port_group * pg ,
u8 filter_mode ) ;
void br_multicast_sg_add_exclude_ports ( struct net_bridge_mdb_entry * star_mp ,
struct net_bridge_port_group * sg ) ;
2021-01-20 16:51:58 +02:00
struct net_bridge_group_src *
br_multicast_find_group_src ( struct net_bridge_port_group * pg , struct br_ip * ip ) ;
2021-01-20 16:52:03 +02:00
void br_multicast_del_group_src ( struct net_bridge_group_src * src ,
bool fastleave ) ;
2021-07-19 20:06:26 +03:00
void br_multicast_ctx_init ( struct net_bridge * br ,
struct net_bridge_vlan * vlan ,
struct net_bridge_mcast * brmctx ) ;
void br_multicast_ctx_deinit ( struct net_bridge_mcast * brmctx ) ;
void br_multicast_port_ctx_init ( struct net_bridge_port * port ,
struct net_bridge_vlan * vlan ,
struct net_bridge_mcast_port * pmctx ) ;
void br_multicast_port_ctx_deinit ( struct net_bridge_mcast_port * pmctx ) ;
2021-07-19 20:06:27 +03:00
void br_multicast_toggle_one_vlan ( struct net_bridge_vlan * vlan , bool on ) ;
2021-07-19 20:06:28 +03:00
void br_multicast_toggle_vlan ( struct net_bridge_vlan * vlan , bool on ) ;
int br_multicast_toggle_vlan_snooping ( struct net_bridge * br , bool on ,
struct netlink_ext_ack * extack ) ;
2021-07-19 20:06:37 +03:00
bool br_multicast_toggle_global_vlan ( struct net_bridge_vlan * vlan , bool on ) ;
2010-03-01 09:53:04 +00:00
net: bridge: move the switchdev object replay helpers to "push" mode
Starting with commit 4f2673b3a2b6 ("net: bridge: add helper to replay
port and host-joined mdb entries"), DSA has introduced some bridge
helpers that replay switchdev events (FDB/MDB/VLAN additions and
deletions) that can be lost by the switchdev drivers in a variety of
circumstances:
- an IP multicast group was host-joined on the bridge itself before any
switchdev port joined the bridge, leading to the host MDB entries
missing in the hardware database.
- during the bridge creation process, the MAC address of the bridge was
added to the FDB as an entry pointing towards the bridge device
itself, but with no switchdev ports being part of the bridge yet, this
local FDB entry would remain unknown to the switchdev hardware
database.
- a VLAN/FDB/MDB was added to a bridge port that is a LAG interface,
before any switchdev port joined that LAG, leading to the hardware
database missing those entries.
- a switchdev port left a LAG that is a bridge port, while the LAG
remained part of the bridge, and all FDB/MDB/VLAN entries remained
installed in the hardware database of the switchdev port.
Also, since commit 0d2cfbd41c4a ("net: bridge: ignore switchdev events
for LAG ports which didn't request replay"), DSA introduced a method,
based on a const void *ctx, to ensure that two switchdev ports under the
same LAG that is a bridge port do not see the same MDB/VLAN entry being
replayed twice by the bridge, once for every bridge port that joins the
LAG.
With so many ordering corner cases being possible, it seems unreasonable
to expect a switchdev driver writer to get it right from the first try.
Therefore, now that DSA has experimented with the bridge replay helpers
for a little bit, we can move the code to the bridge driver where it is
more readily available to all switchdev drivers.
To convert the switchdev object replay helpers from "pull mode" (where
the driver asks for them) to a "push mode" (where the bridge offers them
automatically), the biggest problem is that the bridge needs to be aware
when a switchdev port joins and leaves, even when the switchdev is only
indirectly a bridge port (for example when the bridge port is a LAG
upper of the switchdev).
Luckily, we already have a hook for that, in the form of the newly
introduced switchdev_bridge_port_offload() and
switchdev_bridge_port_unoffload() calls. These offer a natural place for
hooking the object addition and deletion replays.
Extend the above 2 functions with:
- pointers to the switchdev atomic notifier (for FDB replays) and the
blocking notifier (for MDB and VLAN replays).
- the "const void *ctx" argument required for drivers to be able to
disambiguate between which port is targeted, when multiple ports are
lowers of the same LAG that is a bridge port. Most of the drivers pass
NULL to this argument, except the ones that support LAG offload and have
the proper context check already in place in the switchdev blocking
notifier handler.
Also unexport the replay helpers, since nobody except the bridge calls
them directly now.
Note that:
(a) we abuse the terminology slightly, because FDB entries are not
"switchdev objects", but we count them as objects nonetheless.
With no direct way to prove it, I think they are not modeled as
switchdev objects because those can only be installed by the bridge
to the hardware (as opposed to FDB entries which can be propagated
in the other direction too). This is merely an abuse of terms, FDB
entries are replayed too, despite not being objects.
(b) the bridge does not attempt to sync port attributes to newly joined
ports, just the countable stuff (the objects). The reason for this
is simple: no universal and symmetric way to sync and unsync them is
known. For example, VLAN filtering: what to do on unsync, disable or
leave it enabled? Similarly, STP state, ageing timer, etc etc. What
a switchdev port does when it becomes standalone again is not really
up to the bridge's competence, and the driver should deal with it.
On the other hand, replaying deletions of switchdev objects can be
seen a matter of cleanup and therefore be treated by the bridge,
hence this patch.
We make the replay helpers opt-in for drivers, because they might not
bring immediate benefits for them:
- nbp_vlan_init() is called _after_ netdev_master_upper_dev_link(),
so br_vlan_replay() should not do anything for the new drivers on
which we call it. The existing drivers where there was even a slight
possibility for there to exist a VLAN on a bridge port before they
join it are already guarded against this: mlxsw and prestera deny
joining LAG interfaces that are members of a bridge.
- br_fdb_replay() should now notify of local FDB entries, but I patched
all drivers except DSA to ignore these new entries in commit
2c4eca3ef716 ("net: bridge: switchdev: include local flag in FDB
notifications"). Driver authors can lift this restriction as they
wish, and when they do, they can also opt into the FDB replay
functionality.
- br_mdb_replay() should fix a real issue which is described in commit
4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined
mdb entries"). However most drivers do not offload the
SWITCHDEV_OBJ_ID_HOST_MDB to see this issue: only cpsw and am65_cpsw
offload this switchdev object, and I don't completely understand the
way in which they offload this switchdev object anyway. So I'll leave
it up to these drivers' respective maintainers to opt into
br_mdb_replay().
So most of the drivers pass NULL notifier blocks for the replay helpers,
except:
- dpaa2-switch which was already acked/regression-tested with the
helpers enabled (and there isn't much of a downside in having them)
- ocelot which already had replay logic in "pull" mode
- DSA which already had replay logic in "pull" mode
An important observation is that the drivers which don't currently
request bridge event replays don't even have the
switchdev_bridge_port_{offload,unoffload} calls placed in proper places
right now. This was done to avoid unnecessary rework for drivers which
might never even add support for this. For driver writers who wish to
add replay support, this can be used as a tentative placement guide:
https://patchwork.kernel.org/project/netdevbpf/patch/20210720134655.892334-11-vladimir.oltean@nxp.com/
Cc: Vadym Kochan <vkochan@marvell.com>
Cc: Taras Chornyi <tchornyi@marvell.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Cc: Claudiu Manoil <claudiu.manoil@nxp.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Ioana Ciornei <ioana.ciornei@nxp.com> # dpaa2-switch
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:24:03 +03:00
int br_mdb_replay ( struct net_device * br_dev , struct net_device * dev ,
const void * ctx , bool adding , struct notifier_block * nb ,
struct netlink_ext_ack * extack ) ;
2021-08-10 18:29:33 +03:00
int br_rports_fill_info ( struct sk_buff * skb ,
const struct net_bridge_mcast * brmctx ) ;
net: bridge: move the switchdev object replay helpers to "push" mode
Starting with commit 4f2673b3a2b6 ("net: bridge: add helper to replay
port and host-joined mdb entries"), DSA has introduced some bridge
helpers that replay switchdev events (FDB/MDB/VLAN additions and
deletions) that can be lost by the switchdev drivers in a variety of
circumstances:
- an IP multicast group was host-joined on the bridge itself before any
switchdev port joined the bridge, leading to the host MDB entries
missing in the hardware database.
- during the bridge creation process, the MAC address of the bridge was
added to the FDB as an entry pointing towards the bridge device
itself, but with no switchdev ports being part of the bridge yet, this
local FDB entry would remain unknown to the switchdev hardware
database.
- a VLAN/FDB/MDB was added to a bridge port that is a LAG interface,
before any switchdev port joined that LAG, leading to the hardware
database missing those entries.
- a switchdev port left a LAG that is a bridge port, while the LAG
remained part of the bridge, and all FDB/MDB/VLAN entries remained
installed in the hardware database of the switchdev port.
Also, since commit 0d2cfbd41c4a ("net: bridge: ignore switchdev events
for LAG ports which didn't request replay"), DSA introduced a method,
based on a const void *ctx, to ensure that two switchdev ports under the
same LAG that is a bridge port do not see the same MDB/VLAN entry being
replayed twice by the bridge, once for every bridge port that joins the
LAG.
With so many ordering corner cases being possible, it seems unreasonable
to expect a switchdev driver writer to get it right from the first try.
Therefore, now that DSA has experimented with the bridge replay helpers
for a little bit, we can move the code to the bridge driver where it is
more readily available to all switchdev drivers.
To convert the switchdev object replay helpers from "pull mode" (where
the driver asks for them) to a "push mode" (where the bridge offers them
automatically), the biggest problem is that the bridge needs to be aware
when a switchdev port joins and leaves, even when the switchdev is only
indirectly a bridge port (for example when the bridge port is a LAG
upper of the switchdev).
Luckily, we already have a hook for that, in the form of the newly
introduced switchdev_bridge_port_offload() and
switchdev_bridge_port_unoffload() calls. These offer a natural place for
hooking the object addition and deletion replays.
Extend the above 2 functions with:
- pointers to the switchdev atomic notifier (for FDB replays) and the
blocking notifier (for MDB and VLAN replays).
- the "const void *ctx" argument required for drivers to be able to
disambiguate between which port is targeted, when multiple ports are
lowers of the same LAG that is a bridge port. Most of the drivers pass
NULL to this argument, except the ones that support LAG offload and have
the proper context check already in place in the switchdev blocking
notifier handler.
Also unexport the replay helpers, since nobody except the bridge calls
them directly now.
Note that:
(a) we abuse the terminology slightly, because FDB entries are not
"switchdev objects", but we count them as objects nonetheless.
With no direct way to prove it, I think they are not modeled as
switchdev objects because those can only be installed by the bridge
to the hardware (as opposed to FDB entries which can be propagated
in the other direction too). This is merely an abuse of terms, FDB
entries are replayed too, despite not being objects.
(b) the bridge does not attempt to sync port attributes to newly joined
ports, just the countable stuff (the objects). The reason for this
is simple: no universal and symmetric way to sync and unsync them is
known. For example, VLAN filtering: what to do on unsync, disable or
leave it enabled? Similarly, STP state, ageing timer, etc etc. What
a switchdev port does when it becomes standalone again is not really
up to the bridge's competence, and the driver should deal with it.
On the other hand, replaying deletions of switchdev objects can be
seen a matter of cleanup and therefore be treated by the bridge,
hence this patch.
We make the replay helpers opt-in for drivers, because they might not
bring immediate benefits for them:
- nbp_vlan_init() is called _after_ netdev_master_upper_dev_link(),
so br_vlan_replay() should not do anything for the new drivers on
which we call it. The existing drivers where there was even a slight
possibility for there to exist a VLAN on a bridge port before they
join it are already guarded against this: mlxsw and prestera deny
joining LAG interfaces that are members of a bridge.
- br_fdb_replay() should now notify of local FDB entries, but I patched
all drivers except DSA to ignore these new entries in commit
2c4eca3ef716 ("net: bridge: switchdev: include local flag in FDB
notifications"). Driver authors can lift this restriction as they
wish, and when they do, they can also opt into the FDB replay
functionality.
- br_mdb_replay() should fix a real issue which is described in commit
4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined
mdb entries"). However most drivers do not offload the
SWITCHDEV_OBJ_ID_HOST_MDB to see this issue: only cpsw and am65_cpsw
offload this switchdev object, and I don't completely understand the
way in which they offload this switchdev object anyway. So I'll leave
it up to these drivers' respective maintainers to opt into
br_mdb_replay().
So most of the drivers pass NULL notifier blocks for the replay helpers,
except:
- dpaa2-switch which was already acked/regression-tested with the
helpers enabled (and there isn't much of a downside in having them)
- ocelot which already had replay logic in "pull" mode
- DSA which already had replay logic in "pull" mode
An important observation is that the drivers which don't currently
request bridge event replays don't even have the
switchdev_bridge_port_{offload,unoffload} calls placed in proper places
right now. This was done to avoid unnecessary rework for drivers which
might never even add support for this. For driver writers who wish to
add replay support, this can be used as a tentative placement guide:
https://patchwork.kernel.org/project/netdevbpf/patch/20210720134655.892334-11-vladimir.oltean@nxp.com/
Cc: Vadym Kochan <vkochan@marvell.com>
Cc: Taras Chornyi <tchornyi@marvell.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Cc: Claudiu Manoil <claudiu.manoil@nxp.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Ioana Ciornei <ioana.ciornei@nxp.com> # dpaa2-switch
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:24:03 +03:00
2020-10-29 01:38:31 +02:00
static inline bool br_group_is_l2 ( const struct br_ip * group )
{
return group - > proto = = 0 ;
}
2012-12-11 22:23:08 +00:00
# define mlock_dereference(X, br) \
rcu_dereference_protected ( X , lockdep_is_held ( & br - > multicast_lock ) )
2021-05-13 15:20:44 +02:00
static inline struct hlist_node *
2021-07-19 20:06:25 +03:00
br_multicast_get_first_rport_node ( struct net_bridge_mcast * brmctx ,
struct sk_buff * skb )
2021-07-19 20:06:24 +03:00
{
2021-05-13 15:20:51 +02:00
# if IS_ENABLED(CONFIG_IPV6)
if ( skb - > protocol = = htons ( ETH_P_IPV6 ) )
2021-07-19 20:06:24 +03:00
return rcu_dereference ( hlist_first_rcu ( & brmctx - > ip6_mc_router_list ) ) ;
2021-05-13 15:20:51 +02:00
# endif
2021-07-19 20:06:24 +03:00
return rcu_dereference ( hlist_first_rcu ( & brmctx - > ip4_mc_router_list ) ) ;
2021-05-13 15:20:44 +02:00
}
static inline struct net_bridge_port *
2021-07-19 20:06:24 +03:00
br_multicast_rport_from_node_skb ( struct hlist_node * rp , struct sk_buff * skb )
{
2021-07-19 20:06:23 +03:00
struct net_bridge_mcast_port * mctx ;
2021-05-13 15:20:51 +02:00
# if IS_ENABLED(CONFIG_IPV6)
if ( skb - > protocol = = htons ( ETH_P_IPV6 ) )
2021-07-19 20:06:23 +03:00
mctx = hlist_entry_safe ( rp , struct net_bridge_mcast_port ,
ip6_rlist ) ;
else
2021-05-13 15:20:51 +02:00
# endif
2021-07-19 20:06:23 +03:00
mctx = hlist_entry_safe ( rp , struct net_bridge_mcast_port ,
ip4_rlist ) ;
if ( mctx )
return mctx - > port ;
else
return NULL ;
2021-05-13 15:20:44 +02:00
}
2021-07-19 20:06:24 +03:00
static inline bool br_ip4_multicast_is_router ( struct net_bridge_mcast * brmctx )
2010-03-01 09:53:04 +00:00
{
2021-07-19 20:06:24 +03:00
return timer_pending ( & brmctx - > ip4_mc_router_timer ) ;
2021-05-13 15:20:47 +02:00
}
2021-07-19 20:06:24 +03:00
static inline bool br_ip6_multicast_is_router ( struct net_bridge_mcast * brmctx )
2021-05-13 15:20:47 +02:00
{
# if IS_ENABLED(CONFIG_IPV6)
2021-07-19 20:06:24 +03:00
return timer_pending ( & brmctx - > ip6_mc_router_timer ) ;
2021-05-13 15:20:47 +02:00
# else
return false ;
# endif
}
static inline bool
2021-07-19 20:06:25 +03:00
br_multicast_is_router ( struct net_bridge_mcast * brmctx , struct sk_buff * skb )
2021-05-13 15:20:47 +02:00
{
2021-07-19 20:06:24 +03:00
switch ( brmctx - > multicast_router ) {
2021-05-13 15:20:47 +02:00
case MDB_RTR_TYPE_PERM :
return true ;
case MDB_RTR_TYPE_TEMP_QUERY :
if ( skb ) {
if ( skb - > protocol = = htons ( ETH_P_IP ) )
2021-07-19 20:06:24 +03:00
return br_ip4_multicast_is_router ( brmctx ) ;
2021-05-13 15:20:47 +02:00
else if ( skb - > protocol = = htons ( ETH_P_IPV6 ) )
2021-07-19 20:06:24 +03:00
return br_ip6_multicast_is_router ( brmctx ) ;
2021-05-13 15:20:47 +02:00
} else {
2021-07-19 20:06:24 +03:00
return br_ip4_multicast_is_router ( brmctx ) | |
br_ip6_multicast_is_router ( brmctx ) ;
2021-05-13 15:20:47 +02:00
}
fallthrough ;
default :
return false ;
}
2010-03-01 09:53:04 +00:00
}
2013-08-01 01:06:20 +02:00
2013-08-30 17:28:17 +02:00
static inline bool
2021-07-19 20:06:25 +03:00
__br_multicast_querier_exists ( struct net_bridge_mcast * brmctx ,
struct bridge_mcast_other_query * querier ,
const bool is_ipv6 )
2013-08-01 01:06:20 +02:00
{
2016-06-24 12:35:18 +02:00
bool own_querier_enabled ;
2021-08-10 18:29:30 +03:00
if ( brmctx - > multicast_querier ) {
2021-07-19 20:06:25 +03:00
if ( is_ipv6 & & ! br_opt_get ( brmctx - > br , BROPT_HAS_IPV6_ADDR ) )
2016-06-24 12:35:18 +02:00
own_querier_enabled = false ;
else
own_querier_enabled = true ;
} else {
own_querier_enabled = false ;
}
2013-08-30 17:28:17 +02:00
return time_is_before_jiffies ( querier - > delay_time ) & &
2016-06-24 12:35:18 +02:00
( own_querier_enabled | | timer_pending ( & querier - > timer ) ) ;
2013-08-30 17:28:17 +02:00
}
2021-07-19 20:06:25 +03:00
static inline bool br_multicast_querier_exists ( struct net_bridge_mcast * brmctx ,
2020-10-29 01:38:31 +02:00
struct ethhdr * eth ,
const struct net_bridge_mdb_entry * mdb )
2013-08-30 17:28:17 +02:00
{
switch ( eth - > h_proto ) {
case ( htons ( ETH_P_IP ) ) :
2021-07-19 20:06:25 +03:00
return __br_multicast_querier_exists ( brmctx ,
& brmctx - > ip4_other_query , false ) ;
2013-08-30 17:28:17 +02:00
# if IS_ENABLED(CONFIG_IPV6)
case ( htons ( ETH_P_IPV6 ) ) :
2021-07-19 20:06:25 +03:00
return __br_multicast_querier_exists ( brmctx ,
& brmctx - > ip6_other_query , true ) ;
2013-08-30 17:28:17 +02:00
# endif
default :
2020-10-29 01:38:31 +02:00
return ! ! mdb & & br_group_is_l2 ( & mdb - > addr ) ;
2013-08-30 17:28:17 +02:00
}
2013-08-01 01:06:20 +02:00
}
2016-06-28 16:57:06 +02:00
2020-09-22 10:30:19 +03:00
static inline bool br_multicast_is_star_g ( const struct br_ip * ip )
{
switch ( ip - > proto ) {
case htons ( ETH_P_IP ) :
return ipv4_is_zeronet ( ip - > src . ip4 ) ;
# if IS_ENABLED(CONFIG_IPV6)
case htons ( ETH_P_IPV6 ) :
return ipv6_addr_any ( & ip - > src . ip6 ) ;
# endif
default :
return false ;
}
}
2021-07-19 20:06:25 +03:00
static inline bool
br_multicast_should_handle_mode ( const struct net_bridge_mcast * brmctx ,
__be16 proto )
net: bridge: mcast: handle port group filter modes
We need to handle group filter mode transitions and initial state.
To change a port group's INCLUDE -> EXCLUDE mode (or when we have added
a new port group in EXCLUDE mode) we need to add that port to all of
*,G ports' S,G entries for proper replication. When the EXCLUDE state is
changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be
called after the source list processing because the assumption is that
all of the group's S,G entries will be created before transitioning to
EXCLUDE mode, i.e. most importantly its blocked entries will already be
added so it will not get automatically added to them.
The transition EXCLUDE -> INCLUDE happens only when a port group timer
expires, it requires us to remove that port from all of *,G ports' S,G
entries where it was automatically added previously.
Finally when we are adding a new S,G entry we must add all of *,G's
EXCLUDE ports to it.
In order to distinguish automatically added *,G EXCLUDE ports we have a
new port group flag - MDB_PG_FLAGS_STAR_EXCL.
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-22 10:30:24 +03:00
{
switch ( proto ) {
case htons ( ETH_P_IP ) :
2021-07-19 20:06:25 +03:00
return ! ! ( brmctx - > multicast_igmp_version = = 3 ) ;
net: bridge: mcast: handle port group filter modes
We need to handle group filter mode transitions and initial state.
To change a port group's INCLUDE -> EXCLUDE mode (or when we have added
a new port group in EXCLUDE mode) we need to add that port to all of
*,G ports' S,G entries for proper replication. When the EXCLUDE state is
changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be
called after the source list processing because the assumption is that
all of the group's S,G entries will be created before transitioning to
EXCLUDE mode, i.e. most importantly its blocked entries will already be
added so it will not get automatically added to them.
The transition EXCLUDE -> INCLUDE happens only when a port group timer
expires, it requires us to remove that port from all of *,G ports' S,G
entries where it was automatically added previously.
Finally when we are adding a new S,G entry we must add all of *,G's
EXCLUDE ports to it.
In order to distinguish automatically added *,G EXCLUDE ports we have a
new port group flag - MDB_PG_FLAGS_STAR_EXCL.
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-22 10:30:24 +03:00
# if IS_ENABLED(CONFIG_IPV6)
case htons ( ETH_P_IPV6 ) :
2021-07-19 20:06:25 +03:00
return ! ! ( brmctx - > multicast_mld_version = = 2 ) ;
net: bridge: mcast: handle port group filter modes
We need to handle group filter mode transitions and initial state.
To change a port group's INCLUDE -> EXCLUDE mode (or when we have added
a new port group in EXCLUDE mode) we need to add that port to all of
*,G ports' S,G entries for proper replication. When the EXCLUDE state is
changed from IGMPv3 report, br_multicast_fwd_filter_exclude() must be
called after the source list processing because the assumption is that
all of the group's S,G entries will be created before transitioning to
EXCLUDE mode, i.e. most importantly its blocked entries will already be
added so it will not get automatically added to them.
The transition EXCLUDE -> INCLUDE happens only when a port group timer
expires, it requires us to remove that port from all of *,G ports' S,G
entries where it was automatically added previously.
Finally when we are adding a new S,G entry we must add all of *,G's
EXCLUDE ports to it.
In order to distinguish automatically added *,G EXCLUDE ports we have a
new port group flag - MDB_PG_FLAGS_STAR_EXCL.
Signed-off-by: Nikolay Aleksandrov <nikolay@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2020-09-22 10:30:24 +03:00
# endif
default :
return false ;
}
}
2016-06-28 16:57:06 +02:00
static inline int br_multicast_igmp_type ( const struct sk_buff * skb )
{
return BR_INPUT_SKB_CB ( skb ) - > igmp ;
}
2020-09-07 12:56:10 +03:00
2021-07-19 20:06:25 +03:00
static inline unsigned long br_multicast_lmqt ( const struct net_bridge_mcast * brmctx )
2020-09-07 12:56:10 +03:00
{
2021-07-19 20:06:25 +03:00
return brmctx - > multicast_last_member_interval *
brmctx - > multicast_last_member_count ;
2020-09-07 12:56:10 +03:00
}
2020-09-07 12:56:14 +03:00
2021-07-19 20:06:25 +03:00
static inline unsigned long br_multicast_gmi ( const struct net_bridge_mcast * brmctx )
2020-09-07 12:56:14 +03:00
{
/* use the RFC default of 2 for QRV */
2021-07-19 20:06:25 +03:00
return 2 * brmctx - > multicast_query_interval +
brmctx - > multicast_query_response_interval ;
2020-09-07 12:56:14 +03:00
}
2021-07-19 20:06:27 +03:00
static inline bool
br_multicast_ctx_is_vlan ( const struct net_bridge_mcast * brmctx )
{
return ! ! brmctx - > vlan ;
}
static inline bool
br_multicast_port_ctx_is_vlan ( const struct net_bridge_mcast_port * pmctx )
{
return ! ! pmctx - > vlan ;
}
static inline struct net_bridge_mcast *
br_multicast_port_ctx_get_global ( const struct net_bridge_mcast_port * pmctx )
{
if ( ! br_multicast_port_ctx_is_vlan ( pmctx ) )
return & pmctx - > port - > br - > multicast_ctx ;
else
return & pmctx - > vlan - > brvlan - > br_mcast_ctx ;
}
static inline bool
br_multicast_ctx_vlan_global_disabled ( const struct net_bridge_mcast * brmctx )
{
2021-07-19 20:06:28 +03:00
return br_opt_get ( brmctx - > br , BROPT_MCAST_VLAN_SNOOPING_ENABLED ) & &
br_multicast_ctx_is_vlan ( brmctx ) & &
2021-07-19 20:06:27 +03:00
! ( brmctx - > vlan - > priv_flags & BR_VLFLAG_GLOBAL_MCAST_ENABLED ) ;
}
static inline bool
br_multicast_ctx_vlan_disabled ( const struct net_bridge_mcast * brmctx )
{
return br_multicast_ctx_is_vlan ( brmctx ) & &
! ( brmctx - > vlan - > priv_flags & BR_VLFLAG_MCAST_ENABLED ) ;
}
static inline bool
br_multicast_port_ctx_vlan_disabled ( const struct net_bridge_mcast_port * pmctx )
{
return br_multicast_port_ctx_is_vlan ( pmctx ) & &
! ( pmctx - > vlan - > priv_flags & BR_VLFLAG_MCAST_ENABLED ) ;
}
2021-07-19 20:06:31 +03:00
static inline bool
br_multicast_port_ctx_state_disabled ( const struct net_bridge_mcast_port * pmctx )
{
return pmctx - > port - > state = = BR_STATE_DISABLED | |
( br_multicast_port_ctx_is_vlan ( pmctx ) & &
( br_multicast_port_ctx_vlan_disabled ( pmctx ) | |
pmctx - > vlan - > state = = BR_STATE_DISABLED ) ) ;
}
static inline bool
br_multicast_port_ctx_state_stopped ( const struct net_bridge_mcast_port * pmctx )
{
return br_multicast_port_ctx_state_disabled ( pmctx ) | |
pmctx - > port - > state = = BR_STATE_BLOCKING | |
( br_multicast_port_ctx_is_vlan ( pmctx ) & &
pmctx - > vlan - > state = = BR_STATE_BLOCKING ) ;
}
2021-08-10 18:29:19 +03:00
2021-08-10 18:29:33 +03:00
static inline bool
br_rports_have_mc_router ( const struct net_bridge_mcast * brmctx )
{
# if IS_ENABLED(CONFIG_IPV6)
return ! hlist_empty ( & brmctx - > ip4_mc_router_list ) | |
! hlist_empty ( & brmctx - > ip6_mc_router_list ) ;
# else
return ! hlist_empty ( & brmctx - > ip4_mc_router_list ) ;
# endif
}
2021-08-10 18:29:19 +03:00
static inline bool
br_multicast_ctx_options_equal ( const struct net_bridge_mcast * brmctx1 ,
const struct net_bridge_mcast * brmctx2 )
{
return brmctx1 - > multicast_igmp_version = =
brmctx2 - > multicast_igmp_version & &
2021-08-10 18:29:20 +03:00
brmctx1 - > multicast_last_member_count = =
brmctx2 - > multicast_last_member_count & &
2021-08-10 18:29:21 +03:00
brmctx1 - > multicast_startup_query_count = =
brmctx2 - > multicast_startup_query_count & &
2021-08-10 18:29:22 +03:00
brmctx1 - > multicast_last_member_interval = =
brmctx2 - > multicast_last_member_interval & &
2021-08-10 18:29:23 +03:00
brmctx1 - > multicast_membership_interval = =
brmctx2 - > multicast_membership_interval & &
2021-08-10 18:29:24 +03:00
brmctx1 - > multicast_querier_interval = =
brmctx2 - > multicast_querier_interval & &
2021-08-10 18:29:25 +03:00
brmctx1 - > multicast_query_interval = =
brmctx2 - > multicast_query_interval & &
2021-08-10 18:29:26 +03:00
brmctx1 - > multicast_query_response_interval = =
brmctx2 - > multicast_query_response_interval & &
2021-08-10 18:29:27 +03:00
brmctx1 - > multicast_startup_query_interval = =
brmctx2 - > multicast_startup_query_interval & &
2021-08-10 18:29:30 +03:00
brmctx1 - > multicast_querier = = brmctx2 - > multicast_querier & &
2021-08-10 18:29:31 +03:00
brmctx1 - > multicast_router = = brmctx2 - > multicast_router & &
2021-08-10 18:29:33 +03:00
! br_rports_have_mc_router ( brmctx1 ) & &
! br_rports_have_mc_router ( brmctx2 ) & &
2021-08-10 18:29:19 +03:00
# if IS_ENABLED(CONFIG_IPV6)
brmctx1 - > multicast_mld_version = =
brmctx2 - > multicast_mld_version & &
# endif
true ;
}
2021-08-10 18:29:29 +03:00
static inline bool
br_multicast_ctx_matches_vlan_snooping ( const struct net_bridge_mcast * brmctx )
{
bool vlan_snooping_enabled ;
vlan_snooping_enabled = ! ! br_opt_get ( brmctx - > br ,
BROPT_MCAST_VLAN_SNOOPING_ENABLED ) ;
return ! ! ( vlan_snooping_enabled = = br_multicast_ctx_is_vlan ( brmctx ) ) ;
}
2010-02-27 19:41:45 +00:00
# else
2021-07-19 20:06:28 +03:00
static inline int br_multicast_rcv ( struct net_bridge_mcast * * brmctx ,
struct net_bridge_mcast_port * * pmctx ,
struct net_bridge_vlan * vlan ,
2013-10-28 15:45:07 -04:00
struct sk_buff * skb ,
u16 vid )
2010-02-27 19:41:45 +00:00
{
return 0 ;
}
2021-07-19 20:06:25 +03:00
static inline struct net_bridge_mdb_entry * br_mdb_get ( struct net_bridge_mcast * brmctx ,
2013-03-07 03:05:33 +00:00
struct sk_buff * skb , u16 vid )
2010-02-27 19:41:45 +00:00
{
return NULL ;
}
2016-06-28 16:57:06 +02:00
static inline int br_multicast_add_port ( struct net_bridge_port * port )
2010-02-27 19:41:45 +00:00
{
2016-06-28 16:57:06 +02:00
return 0 ;
2010-02-27 19:41:45 +00:00
}
static inline void br_multicast_del_port ( struct net_bridge_port * port )
{
}
static inline void br_multicast_enable_port ( struct net_bridge_port * port )
{
}
static inline void br_multicast_disable_port ( struct net_bridge_port * port )
{
}
static inline void br_multicast_init ( struct net_bridge * br )
{
}
2020-12-04 18:56:28 -05:00
static inline void br_multicast_join_snoopers ( struct net_bridge * br )
{
}
static inline void br_multicast_leave_snoopers ( struct net_bridge * br )
2010-02-27 19:41:45 +00:00
{
}
static inline void br_multicast_open ( struct net_bridge * br )
{
}
static inline void br_multicast_stop ( struct net_bridge * br )
{
}
2010-02-27 19:41:46 +00:00
2015-07-20 23:03:45 +02:00
static inline void br_multicast_dev_del ( struct net_bridge * br )
{
}
2016-07-14 06:10:02 +03:00
static inline void br_multicast_flood ( struct net_bridge_mdb_entry * mdst ,
struct sk_buff * skb ,
2021-07-19 20:06:25 +03:00
struct net_bridge_mcast * brmctx ,
2016-07-14 06:10:02 +03:00
bool local_rcv , bool local_orig )
2010-02-27 19:41:46 +00:00
{
}
2021-07-19 20:06:25 +03:00
static inline bool br_multicast_is_router ( struct net_bridge_mcast * brmctx ,
2021-05-14 10:32:33 +03:00
struct sk_buff * skb )
2010-02-27 19:41:45 +00:00
{
2018-01-18 17:37:45 -06:00
return false ;
2010-02-27 19:41:45 +00:00
}
2016-07-14 06:10:02 +03:00
2021-07-19 20:06:25 +03:00
static inline bool br_multicast_querier_exists ( struct net_bridge_mcast * brmctx ,
2020-11-01 02:08:45 +02:00
struct ethhdr * eth ,
const struct net_bridge_mdb_entry * mdb )
2013-08-01 01:06:20 +02:00
{
return false ;
}
2016-07-14 06:10:02 +03:00
2013-01-03 13:30:43 +02:00
static inline void br_mdb_init ( void )
{
}
2016-07-14 06:10:02 +03:00
2013-01-03 13:30:43 +02:00
static inline void br_mdb_uninit ( void )
{
}
2016-06-28 16:57:06 +02:00
2018-12-05 15:14:24 +02:00
static inline int br_mdb_hash_init ( struct net_bridge * br )
{
return 0 ;
}
static inline void br_mdb_hash_fini ( struct net_bridge * br )
{
}
2016-06-28 16:57:06 +02:00
static inline void br_multicast_count ( struct net_bridge * br ,
const struct net_bridge_port * p ,
2016-07-06 12:12:21 -07:00
const struct sk_buff * skb ,
u8 type , u8 dir )
2016-06-28 16:57:06 +02:00
{
}
static inline int br_multicast_init_stats ( struct net_bridge * br )
{
return 0 ;
}
2017-04-10 14:59:27 +03:00
static inline void br_multicast_uninit_stats ( struct net_bridge * br )
{
}
2016-06-28 16:57:06 +02:00
static inline int br_multicast_igmp_type ( const struct sk_buff * skb )
{
return 0 ;
}
2021-07-19 20:06:26 +03:00
static inline void br_multicast_ctx_init ( struct net_bridge * br ,
struct net_bridge_vlan * vlan ,
struct net_bridge_mcast * brmctx )
{
}
static inline void br_multicast_ctx_deinit ( struct net_bridge_mcast * brmctx )
{
}
static inline void br_multicast_port_ctx_init ( struct net_bridge_port * port ,
struct net_bridge_vlan * vlan ,
struct net_bridge_mcast_port * pmctx )
{
}
static inline void br_multicast_port_ctx_deinit ( struct net_bridge_mcast_port * pmctx )
{
}
2021-07-19 20:06:27 +03:00
static inline void br_multicast_toggle_one_vlan ( struct net_bridge_vlan * vlan ,
bool on )
{
}
2021-07-19 20:06:28 +03:00
static inline void br_multicast_toggle_vlan ( struct net_bridge_vlan * vlan ,
bool on )
{
}
static inline int br_multicast_toggle_vlan_snooping ( struct net_bridge * br ,
bool on ,
struct netlink_ext_ack * extack )
{
return - EOPNOTSUPP ;
}
2021-07-19 20:06:37 +03:00
static inline bool br_multicast_toggle_global_vlan ( struct net_bridge_vlan * vlan ,
bool on )
{
return false ;
}
net: bridge: move the switchdev object replay helpers to "push" mode
Starting with commit 4f2673b3a2b6 ("net: bridge: add helper to replay
port and host-joined mdb entries"), DSA has introduced some bridge
helpers that replay switchdev events (FDB/MDB/VLAN additions and
deletions) that can be lost by the switchdev drivers in a variety of
circumstances:
- an IP multicast group was host-joined on the bridge itself before any
switchdev port joined the bridge, leading to the host MDB entries
missing in the hardware database.
- during the bridge creation process, the MAC address of the bridge was
added to the FDB as an entry pointing towards the bridge device
itself, but with no switchdev ports being part of the bridge yet, this
local FDB entry would remain unknown to the switchdev hardware
database.
- a VLAN/FDB/MDB was added to a bridge port that is a LAG interface,
before any switchdev port joined that LAG, leading to the hardware
database missing those entries.
- a switchdev port left a LAG that is a bridge port, while the LAG
remained part of the bridge, and all FDB/MDB/VLAN entries remained
installed in the hardware database of the switchdev port.
Also, since commit 0d2cfbd41c4a ("net: bridge: ignore switchdev events
for LAG ports which didn't request replay"), DSA introduced a method,
based on a const void *ctx, to ensure that two switchdev ports under the
same LAG that is a bridge port do not see the same MDB/VLAN entry being
replayed twice by the bridge, once for every bridge port that joins the
LAG.
With so many ordering corner cases being possible, it seems unreasonable
to expect a switchdev driver writer to get it right from the first try.
Therefore, now that DSA has experimented with the bridge replay helpers
for a little bit, we can move the code to the bridge driver where it is
more readily available to all switchdev drivers.
To convert the switchdev object replay helpers from "pull mode" (where
the driver asks for them) to a "push mode" (where the bridge offers them
automatically), the biggest problem is that the bridge needs to be aware
when a switchdev port joins and leaves, even when the switchdev is only
indirectly a bridge port (for example when the bridge port is a LAG
upper of the switchdev).
Luckily, we already have a hook for that, in the form of the newly
introduced switchdev_bridge_port_offload() and
switchdev_bridge_port_unoffload() calls. These offer a natural place for
hooking the object addition and deletion replays.
Extend the above 2 functions with:
- pointers to the switchdev atomic notifier (for FDB replays) and the
blocking notifier (for MDB and VLAN replays).
- the "const void *ctx" argument required for drivers to be able to
disambiguate between which port is targeted, when multiple ports are
lowers of the same LAG that is a bridge port. Most of the drivers pass
NULL to this argument, except the ones that support LAG offload and have
the proper context check already in place in the switchdev blocking
notifier handler.
Also unexport the replay helpers, since nobody except the bridge calls
them directly now.
Note that:
(a) we abuse the terminology slightly, because FDB entries are not
"switchdev objects", but we count them as objects nonetheless.
With no direct way to prove it, I think they are not modeled as
switchdev objects because those can only be installed by the bridge
to the hardware (as opposed to FDB entries which can be propagated
in the other direction too). This is merely an abuse of terms, FDB
entries are replayed too, despite not being objects.
(b) the bridge does not attempt to sync port attributes to newly joined
ports, just the countable stuff (the objects). The reason for this
is simple: no universal and symmetric way to sync and unsync them is
known. For example, VLAN filtering: what to do on unsync, disable or
leave it enabled? Similarly, STP state, ageing timer, etc etc. What
a switchdev port does when it becomes standalone again is not really
up to the bridge's competence, and the driver should deal with it.
On the other hand, replaying deletions of switchdev objects can be
seen a matter of cleanup and therefore be treated by the bridge,
hence this patch.
We make the replay helpers opt-in for drivers, because they might not
bring immediate benefits for them:
- nbp_vlan_init() is called _after_ netdev_master_upper_dev_link(),
so br_vlan_replay() should not do anything for the new drivers on
which we call it. The existing drivers where there was even a slight
possibility for there to exist a VLAN on a bridge port before they
join it are already guarded against this: mlxsw and prestera deny
joining LAG interfaces that are members of a bridge.
- br_fdb_replay() should now notify of local FDB entries, but I patched
all drivers except DSA to ignore these new entries in commit
2c4eca3ef716 ("net: bridge: switchdev: include local flag in FDB
notifications"). Driver authors can lift this restriction as they
wish, and when they do, they can also opt into the FDB replay
functionality.
- br_mdb_replay() should fix a real issue which is described in commit
4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined
mdb entries"). However most drivers do not offload the
SWITCHDEV_OBJ_ID_HOST_MDB to see this issue: only cpsw and am65_cpsw
offload this switchdev object, and I don't completely understand the
way in which they offload this switchdev object anyway. So I'll leave
it up to these drivers' respective maintainers to opt into
br_mdb_replay().
So most of the drivers pass NULL notifier blocks for the replay helpers,
except:
- dpaa2-switch which was already acked/regression-tested with the
helpers enabled (and there isn't much of a downside in having them)
- ocelot which already had replay logic in "pull" mode
- DSA which already had replay logic in "pull" mode
An important observation is that the drivers which don't currently
request bridge event replays don't even have the
switchdev_bridge_port_{offload,unoffload} calls placed in proper places
right now. This was done to avoid unnecessary rework for drivers which
might never even add support for this. For driver writers who wish to
add replay support, this can be used as a tentative placement guide:
https://patchwork.kernel.org/project/netdevbpf/patch/20210720134655.892334-11-vladimir.oltean@nxp.com/
Cc: Vadym Kochan <vkochan@marvell.com>
Cc: Taras Chornyi <tchornyi@marvell.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Cc: Claudiu Manoil <claudiu.manoil@nxp.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Ioana Ciornei <ioana.ciornei@nxp.com> # dpaa2-switch
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:24:03 +03:00
static inline int br_mdb_replay ( struct net_device * br_dev ,
struct net_device * dev , const void * ctx ,
bool adding , struct notifier_block * nb ,
struct netlink_ext_ack * extack )
{
return - EOPNOTSUPP ;
}
2021-08-10 18:29:19 +03:00
static inline bool
br_multicast_ctx_options_equal ( const struct net_bridge_mcast * brmctx1 ,
const struct net_bridge_mcast * brmctx2 )
{
return true ;
}
2010-03-01 09:53:04 +00:00
# endif
2010-02-27 19:41:45 +00:00
2013-02-13 12:00:09 +00:00
/* br_vlan.c */
# ifdef CONFIG_BRIDGE_VLAN_FILTERING
2015-09-30 20:16:53 +02:00
bool br_allowed_ingress ( const struct net_bridge * br ,
struct net_bridge_vlan_group * vg , struct sk_buff * skb ,
2021-07-19 20:06:28 +03:00
u16 * vid , u8 * state ,
struct net_bridge_vlan * * vlan ) ;
2015-09-30 20:16:53 +02:00
bool br_allowed_egress ( struct net_bridge_vlan_group * vg ,
2013-10-18 13:48:22 -07:00
const struct sk_buff * skb ) ;
2014-05-26 15:15:53 +09:00
bool br_should_learn ( struct net_bridge_port * p , struct sk_buff * skb , u16 * vid ) ;
2013-10-18 13:48:22 -07:00
struct sk_buff * br_handle_vlan ( struct net_bridge * br ,
2017-01-31 22:59:55 -08:00
const struct net_bridge_port * port ,
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
struct net_bridge_vlan_group * vg ,
2013-10-18 13:48:22 -07:00
struct sk_buff * skb ) ;
2017-10-27 13:19:37 +03:00
int br_vlan_add ( struct net_bridge * br , u16 vid , u16 flags ,
2018-12-12 17:02:50 +00:00
bool * changed , struct netlink_ext_ack * extack ) ;
2013-10-18 13:48:22 -07:00
int br_vlan_delete ( struct net_bridge * br , u16 vid ) ;
void br_vlan_flush ( struct net_bridge * br ) ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
struct net_bridge_vlan * br_vlan_find ( struct net_bridge_vlan_group * vg , u16 vid ) ;
2014-06-10 20:59:25 +09:00
void br_recalculate_fwd_mask ( struct net_bridge * br ) ;
2021-02-13 22:43:16 +02:00
int br_vlan_filter_toggle ( struct net_bridge * br , unsigned long val ,
struct netlink_ext_ack * extack ) ;
2021-02-13 22:43:17 +02:00
int __br_vlan_set_proto ( struct net_bridge * br , __be16 proto ,
struct netlink_ext_ack * extack ) ;
2021-02-13 22:43:16 +02:00
int br_vlan_set_proto ( struct net_bridge * br , unsigned long val ,
struct netlink_ext_ack * extack ) ;
2016-04-30 10:25:28 +02:00
int br_vlan_set_stats ( struct net_bridge * br , unsigned long val ) ;
2018-10-12 13:41:16 +03:00
int br_vlan_set_stats_per_port ( struct net_bridge * br , unsigned long val ) ;
2014-10-03 11:29:18 -04:00
int br_vlan_init ( struct net_bridge * br ) ;
2021-02-13 22:43:16 +02:00
int br_vlan_set_default_pvid ( struct net_bridge * br , unsigned long val ,
struct netlink_ext_ack * extack ) ;
2018-12-12 17:02:50 +00:00
int __br_vlan_set_default_pvid ( struct net_bridge * br , u16 pvid ,
struct netlink_ext_ack * extack ) ;
2017-10-27 13:19:37 +03:00
int nbp_vlan_add ( struct net_bridge_port * port , u16 vid , u16 flags ,
2018-12-12 17:02:50 +00:00
bool * changed , struct netlink_ext_ack * extack ) ;
2013-10-18 13:48:22 -07:00
int nbp_vlan_delete ( struct net_bridge_port * port , u16 vid ) ;
void nbp_vlan_flush ( struct net_bridge_port * port ) ;
2018-12-12 17:02:50 +00:00
int nbp_vlan_init ( struct net_bridge_port * port , struct netlink_ext_ack * extack ) ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
int nbp_get_num_vlan_infos ( struct net_bridge_port * p , u32 filter_mask ) ;
2016-04-30 10:25:29 +02:00
void br_vlan_get_stats ( const struct net_bridge_vlan * v ,
2020-11-17 21:25:42 +01:00
struct pcpu_sw_netstats * stats ) ;
2019-04-18 18:35:33 +01:00
void br_vlan_port_event ( struct net_bridge_port * p , unsigned long event ) ;
2019-08-02 13:57:36 +03:00
int br_vlan_bridge_event ( struct net_device * dev , unsigned long event ,
void * ptr ) ;
2020-01-14 19:56:09 +02:00
void br_vlan_rtnl_init ( void ) ;
void br_vlan_rtnl_uninit ( void ) ;
2020-01-14 19:56:13 +02:00
void br_vlan_notify ( const struct net_bridge * br ,
const struct net_bridge_port * p ,
u16 vid , u16 vid_range ,
int cmd ) ;
net: bridge: move the switchdev object replay helpers to "push" mode
Starting with commit 4f2673b3a2b6 ("net: bridge: add helper to replay
port and host-joined mdb entries"), DSA has introduced some bridge
helpers that replay switchdev events (FDB/MDB/VLAN additions and
deletions) that can be lost by the switchdev drivers in a variety of
circumstances:
- an IP multicast group was host-joined on the bridge itself before any
switchdev port joined the bridge, leading to the host MDB entries
missing in the hardware database.
- during the bridge creation process, the MAC address of the bridge was
added to the FDB as an entry pointing towards the bridge device
itself, but with no switchdev ports being part of the bridge yet, this
local FDB entry would remain unknown to the switchdev hardware
database.
- a VLAN/FDB/MDB was added to a bridge port that is a LAG interface,
before any switchdev port joined that LAG, leading to the hardware
database missing those entries.
- a switchdev port left a LAG that is a bridge port, while the LAG
remained part of the bridge, and all FDB/MDB/VLAN entries remained
installed in the hardware database of the switchdev port.
Also, since commit 0d2cfbd41c4a ("net: bridge: ignore switchdev events
for LAG ports which didn't request replay"), DSA introduced a method,
based on a const void *ctx, to ensure that two switchdev ports under the
same LAG that is a bridge port do not see the same MDB/VLAN entry being
replayed twice by the bridge, once for every bridge port that joins the
LAG.
With so many ordering corner cases being possible, it seems unreasonable
to expect a switchdev driver writer to get it right from the first try.
Therefore, now that DSA has experimented with the bridge replay helpers
for a little bit, we can move the code to the bridge driver where it is
more readily available to all switchdev drivers.
To convert the switchdev object replay helpers from "pull mode" (where
the driver asks for them) to a "push mode" (where the bridge offers them
automatically), the biggest problem is that the bridge needs to be aware
when a switchdev port joins and leaves, even when the switchdev is only
indirectly a bridge port (for example when the bridge port is a LAG
upper of the switchdev).
Luckily, we already have a hook for that, in the form of the newly
introduced switchdev_bridge_port_offload() and
switchdev_bridge_port_unoffload() calls. These offer a natural place for
hooking the object addition and deletion replays.
Extend the above 2 functions with:
- pointers to the switchdev atomic notifier (for FDB replays) and the
blocking notifier (for MDB and VLAN replays).
- the "const void *ctx" argument required for drivers to be able to
disambiguate between which port is targeted, when multiple ports are
lowers of the same LAG that is a bridge port. Most of the drivers pass
NULL to this argument, except the ones that support LAG offload and have
the proper context check already in place in the switchdev blocking
notifier handler.
Also unexport the replay helpers, since nobody except the bridge calls
them directly now.
Note that:
(a) we abuse the terminology slightly, because FDB entries are not
"switchdev objects", but we count them as objects nonetheless.
With no direct way to prove it, I think they are not modeled as
switchdev objects because those can only be installed by the bridge
to the hardware (as opposed to FDB entries which can be propagated
in the other direction too). This is merely an abuse of terms, FDB
entries are replayed too, despite not being objects.
(b) the bridge does not attempt to sync port attributes to newly joined
ports, just the countable stuff (the objects). The reason for this
is simple: no universal and symmetric way to sync and unsync them is
known. For example, VLAN filtering: what to do on unsync, disable or
leave it enabled? Similarly, STP state, ageing timer, etc etc. What
a switchdev port does when it becomes standalone again is not really
up to the bridge's competence, and the driver should deal with it.
On the other hand, replaying deletions of switchdev objects can be
seen a matter of cleanup and therefore be treated by the bridge,
hence this patch.
We make the replay helpers opt-in for drivers, because they might not
bring immediate benefits for them:
- nbp_vlan_init() is called _after_ netdev_master_upper_dev_link(),
so br_vlan_replay() should not do anything for the new drivers on
which we call it. The existing drivers where there was even a slight
possibility for there to exist a VLAN on a bridge port before they
join it are already guarded against this: mlxsw and prestera deny
joining LAG interfaces that are members of a bridge.
- br_fdb_replay() should now notify of local FDB entries, but I patched
all drivers except DSA to ignore these new entries in commit
2c4eca3ef716 ("net: bridge: switchdev: include local flag in FDB
notifications"). Driver authors can lift this restriction as they
wish, and when they do, they can also opt into the FDB replay
functionality.
- br_mdb_replay() should fix a real issue which is described in commit
4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined
mdb entries"). However most drivers do not offload the
SWITCHDEV_OBJ_ID_HOST_MDB to see this issue: only cpsw and am65_cpsw
offload this switchdev object, and I don't completely understand the
way in which they offload this switchdev object anyway. So I'll leave
it up to these drivers' respective maintainers to opt into
br_mdb_replay().
So most of the drivers pass NULL notifier blocks for the replay helpers,
except:
- dpaa2-switch which was already acked/regression-tested with the
helpers enabled (and there isn't much of a downside in having them)
- ocelot which already had replay logic in "pull" mode
- DSA which already had replay logic in "pull" mode
An important observation is that the drivers which don't currently
request bridge event replays don't even have the
switchdev_bridge_port_{offload,unoffload} calls placed in proper places
right now. This was done to avoid unnecessary rework for drivers which
might never even add support for this. For driver writers who wish to
add replay support, this can be used as a tentative placement guide:
https://patchwork.kernel.org/project/netdevbpf/patch/20210720134655.892334-11-vladimir.oltean@nxp.com/
Cc: Vadym Kochan <vkochan@marvell.com>
Cc: Taras Chornyi <tchornyi@marvell.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Cc: Claudiu Manoil <claudiu.manoil@nxp.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Ioana Ciornei <ioana.ciornei@nxp.com> # dpaa2-switch
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:24:03 +03:00
int br_vlan_replay ( struct net_device * br_dev , struct net_device * dev ,
const void * ctx , bool adding , struct notifier_block * nb ,
struct netlink_ext_ack * extack ) ;
2020-01-24 13:40:21 +02:00
bool br_vlan_can_enter_range ( const struct net_bridge_vlan * v_curr ,
const struct net_bridge_vlan * range_end ) ;
2013-02-13 12:00:10 +00:00
2021-03-24 02:30:35 +01:00
void br_vlan_fill_forward_path_pvid ( struct net_bridge * br ,
struct net_device_path_ctx * ctx ,
struct net_device_path * path ) ;
int br_vlan_fill_forward_path_mode ( struct net_bridge * br ,
struct net_bridge_port * dst ,
struct net_device_path * path ) ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline struct net_bridge_vlan_group * br_vlan_group (
const struct net_bridge * br )
2013-02-13 12:00:10 +00:00
{
2015-10-12 21:47:02 +02:00
return rtnl_dereference ( br - > vlgrp ) ;
2013-02-13 12:00:10 +00:00
}
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline struct net_bridge_vlan_group * nbp_vlan_group (
const struct net_bridge_port * p )
2013-02-13 12:00:10 +00:00
{
2015-10-12 21:47:02 +02:00
return rtnl_dereference ( p - > vlgrp ) ;
}
static inline struct net_bridge_vlan_group * br_vlan_group_rcu (
const struct net_bridge * br )
{
return rcu_dereference ( br - > vlgrp ) ;
}
static inline struct net_bridge_vlan_group * nbp_vlan_group_rcu (
const struct net_bridge_port * p )
{
return rcu_dereference ( p - > vlgrp ) ;
2013-02-13 12:00:10 +00:00
}
/* Since bridge now depends on 8021Q module, but the time bridge sees the
* skb , the vlan tag will always be present if the frame was tagged .
*/
static inline int br_vlan_get_tag ( const struct sk_buff * skb , u16 * vid )
{
int err = 0 ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
if ( skb_vlan_tag_present ( skb ) ) {
2018-11-09 00:18:03 +01:00
* vid = skb_vlan_tag_get_id ( skb ) ;
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
} else {
2013-02-13 12:00:10 +00:00
* vid = 0 ;
err = - EINVAL ;
}
return err ;
}
2013-02-13 12:00:14 +00:00
2015-09-30 20:16:53 +02:00
static inline u16 br_get_pvid ( const struct net_bridge_vlan_group * vg )
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
{
2015-09-30 20:16:53 +02:00
if ( ! vg )
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
return 0 ;
smp_rmb ( ) ;
2015-09-30 20:16:53 +02:00
return vg - > pvid ;
2013-02-13 12:00:14 +00:00
}
2020-01-14 19:56:09 +02:00
static inline u16 br_vlan_flags ( const struct net_bridge_vlan * v , u16 pvid )
{
return v - > vid = = pvid ? v - > flags | BRIDGE_VLAN_INFO_PVID : v - > flags ;
}
2013-02-13 12:00:09 +00:00
# else
2015-09-30 20:16:53 +02:00
static inline bool br_allowed_ingress ( const struct net_bridge * br ,
struct net_bridge_vlan_group * vg ,
2013-02-13 12:00:14 +00:00
struct sk_buff * skb ,
2021-07-19 20:06:28 +03:00
u16 * vid , u8 * state ,
struct net_bridge_vlan * * vlan )
2013-02-13 12:00:10 +00:00
{
2021-07-19 20:06:28 +03:00
* vlan = NULL ;
2013-02-13 12:00:10 +00:00
return true ;
}
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline bool br_allowed_egress ( struct net_bridge_vlan_group * vg ,
2013-02-13 12:00:11 +00:00
const struct sk_buff * skb )
{
return true ;
}
2014-05-26 15:15:53 +09:00
static inline bool br_should_learn ( struct net_bridge_port * p ,
struct sk_buff * skb , u16 * vid )
{
return true ;
}
2013-02-13 12:00:14 +00:00
static inline struct sk_buff * br_handle_vlan ( struct net_bridge * br ,
2017-01-31 22:59:55 -08:00
const struct net_bridge_port * port ,
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
struct net_bridge_vlan_group * vg ,
2013-02-13 12:00:14 +00:00
struct sk_buff * skb )
{
return skb ;
}
2017-10-27 13:19:37 +03:00
static inline int br_vlan_add ( struct net_bridge * br , u16 vid , u16 flags ,
2018-12-12 17:02:50 +00:00
bool * changed , struct netlink_ext_ack * extack )
2013-02-13 12:00:09 +00:00
{
2017-10-27 13:19:37 +03:00
* changed = false ;
2013-02-13 12:00:09 +00:00
return - EOPNOTSUPP ;
}
static inline int br_vlan_delete ( struct net_bridge * br , u16 vid )
{
return - EOPNOTSUPP ;
}
static inline void br_vlan_flush ( struct net_bridge * br )
{
}
2014-06-10 20:59:25 +09:00
static inline void br_recalculate_fwd_mask ( struct net_bridge * br )
{
}
2014-10-03 11:29:18 -04:00
static inline int br_vlan_init ( struct net_bridge * br )
2014-06-10 20:59:23 +09:00
{
2014-10-03 11:29:18 -04:00
return 0 ;
2014-06-10 20:59:23 +09:00
}
2017-10-27 13:19:37 +03:00
static inline int nbp_vlan_add ( struct net_bridge_port * port , u16 vid , u16 flags ,
2018-12-12 17:02:50 +00:00
bool * changed , struct netlink_ext_ack * extack )
2013-02-13 12:00:09 +00:00
{
2017-10-27 13:19:37 +03:00
* changed = false ;
2013-02-13 12:00:09 +00:00
return - EOPNOTSUPP ;
}
static inline int nbp_vlan_delete ( struct net_bridge_port * port , u16 vid )
{
return - EOPNOTSUPP ;
}
static inline void nbp_vlan_flush ( struct net_bridge_port * port )
{
}
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline struct net_bridge_vlan * br_vlan_find ( struct net_bridge_vlan_group * vg ,
u16 vid )
2013-02-13 12:00:10 +00:00
{
return NULL ;
}
2018-12-12 17:02:50 +00:00
static inline int nbp_vlan_init ( struct net_bridge_port * port ,
struct netlink_ext_ack * extack )
2013-02-13 12:00:19 +00:00
{
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
return 0 ;
2013-02-13 12:00:19 +00:00
}
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline u16 br_vlan_get_tag ( const struct sk_buff * skb , u16 * tag )
2014-10-03 11:29:18 -04:00
{
return 0 ;
}
2015-09-30 20:16:53 +02:00
static inline u16 br_get_pvid ( const struct net_bridge_vlan_group * vg )
2013-02-13 12:00:14 +00:00
{
2014-10-03 11:29:17 -04:00
return 0 ;
2013-02-13 12:00:14 +00:00
}
2014-05-16 09:59:20 -04:00
2021-02-13 22:43:15 +02:00
static inline int br_vlan_filter_toggle ( struct net_bridge * br ,
2021-02-15 23:09:12 +02:00
unsigned long val ,
struct netlink_ext_ack * extack )
2015-08-07 19:40:45 +03:00
{
return - EOPNOTSUPP ;
}
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline int nbp_get_num_vlan_infos ( struct net_bridge_port * p ,
u32 filter_mask )
{
return 0 ;
}
2021-03-24 02:30:35 +01:00
static inline void br_vlan_fill_forward_path_pvid ( struct net_bridge * br ,
struct net_device_path_ctx * ctx ,
struct net_device_path * path )
{
}
static inline int br_vlan_fill_forward_path_mode ( struct net_bridge * br ,
struct net_bridge_port * dst ,
struct net_device_path * path )
{
return 0 ;
}
bridge: vlan: add per-vlan struct and move to rhashtables
This patch changes the bridge vlan implementation to use rhashtables
instead of bitmaps. The main motivation behind this change is that we
need extensible per-vlan structures (both per-port and global) so more
advanced features can be introduced and the vlan support can be
extended. I've tried to break this up but the moment net_port_vlans is
changed and the whole API goes away, thus this is a larger patch.
A few short goals of this patch are:
- Extensible per-vlan structs stored in rhashtables and a sorted list
- Keep user-visible behaviour (compressed vlans etc)
- Keep fastpath ingress/egress logic the same (optimizations to come
later)
Here's a brief list of some of the new features we'd like to introduce:
- per-vlan counters
- vlan ingress/egress mapping
- per-vlan igmp configuration
- vlan priorities
- avoid fdb entries replication (e.g. local fdb scaling issues)
The structure is kept single for both global and per-port entries so to
avoid code duplication where possible and also because we'll soon introduce
"port0 / aka bridge as port" which should simplify things further
(thanks to Vlad for the suggestion!).
Now we have per-vlan global rhashtable (bridge-wide) and per-vlan port
rhashtable, if an entry is added to a port it'll get a pointer to its
global context so it can be quickly accessed later. There's also a
sorted vlan list which is used for stable walks and some user-visible
behaviour such as the vlan ranges, also for error paths.
VLANs are stored in a "vlan group" which currently contains the
rhashtable, sorted vlan list and the number of "real" vlan entries.
A good side-effect of this change is that it resembles how hw keeps
per-vlan data.
One important note after this change is that if a VLAN is being looked up
in the bridge's rhashtable for filtering purposes (or to check if it's an
existing usable entry, not just a global context) then the new helper
br_vlan_should_use() needs to be used if the vlan is found. In case the
lookup is done only with a port's vlan group, then this check can be
skipped.
Things tested so far:
- basic vlan ingress/egress
- pvids
- untagged vlans
- undef CONFIG_BRIDGE_VLAN_FILTERING
- adding/deleting vlans in different scenarios (with/without global ctx,
while transmitting traffic, in ranges etc)
- loading/removing the module while having/adding/deleting vlans
- extracting bridge vlan information (user ABI), compressed requests
- adding/deleting fdbs on vlans
- bridge mac change, promisc mode
- default pvid change
- kmemleak ON during the whole time
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2015-09-25 19:00:11 +02:00
static inline struct net_bridge_vlan_group * br_vlan_group (
const struct net_bridge * br )
{
return NULL ;
}
static inline struct net_bridge_vlan_group * nbp_vlan_group (
const struct net_bridge_port * p )
{
return NULL ;
}
2015-10-12 21:47:02 +02:00
static inline struct net_bridge_vlan_group * br_vlan_group_rcu (
const struct net_bridge * br )
{
return NULL ;
}
static inline struct net_bridge_vlan_group * nbp_vlan_group_rcu (
const struct net_bridge_port * p )
{
return NULL ;
}
2016-04-30 10:25:29 +02:00
static inline void br_vlan_get_stats ( const struct net_bridge_vlan * v ,
2020-11-17 21:25:42 +01:00
struct pcpu_sw_netstats * stats )
2016-04-30 10:25:29 +02:00
{
}
2019-04-18 18:35:33 +01:00
static inline void br_vlan_port_event ( struct net_bridge_port * p ,
unsigned long event )
{
}
2019-08-02 13:57:36 +03:00
static inline int br_vlan_bridge_event ( struct net_device * dev ,
unsigned long event , void * ptr )
2019-04-18 18:35:33 +01:00
{
2019-08-02 13:57:36 +03:00
return 0 ;
2019-04-18 18:35:33 +01:00
}
2020-01-14 19:56:09 +02:00
static inline void br_vlan_rtnl_init ( void )
{
}
static inline void br_vlan_rtnl_uninit ( void )
{
}
2020-01-14 19:56:13 +02:00
static inline void br_vlan_notify ( const struct net_bridge * br ,
const struct net_bridge_port * p ,
u16 vid , u16 vid_range ,
int cmd )
{
}
2020-07-13 10:55:46 +03:00
static inline bool br_vlan_can_enter_range ( const struct net_bridge_vlan * v_curr ,
const struct net_bridge_vlan * range_end )
{
return true ;
}
net: bridge: move the switchdev object replay helpers to "push" mode
Starting with commit 4f2673b3a2b6 ("net: bridge: add helper to replay
port and host-joined mdb entries"), DSA has introduced some bridge
helpers that replay switchdev events (FDB/MDB/VLAN additions and
deletions) that can be lost by the switchdev drivers in a variety of
circumstances:
- an IP multicast group was host-joined on the bridge itself before any
switchdev port joined the bridge, leading to the host MDB entries
missing in the hardware database.
- during the bridge creation process, the MAC address of the bridge was
added to the FDB as an entry pointing towards the bridge device
itself, but with no switchdev ports being part of the bridge yet, this
local FDB entry would remain unknown to the switchdev hardware
database.
- a VLAN/FDB/MDB was added to a bridge port that is a LAG interface,
before any switchdev port joined that LAG, leading to the hardware
database missing those entries.
- a switchdev port left a LAG that is a bridge port, while the LAG
remained part of the bridge, and all FDB/MDB/VLAN entries remained
installed in the hardware database of the switchdev port.
Also, since commit 0d2cfbd41c4a ("net: bridge: ignore switchdev events
for LAG ports which didn't request replay"), DSA introduced a method,
based on a const void *ctx, to ensure that two switchdev ports under the
same LAG that is a bridge port do not see the same MDB/VLAN entry being
replayed twice by the bridge, once for every bridge port that joins the
LAG.
With so many ordering corner cases being possible, it seems unreasonable
to expect a switchdev driver writer to get it right from the first try.
Therefore, now that DSA has experimented with the bridge replay helpers
for a little bit, we can move the code to the bridge driver where it is
more readily available to all switchdev drivers.
To convert the switchdev object replay helpers from "pull mode" (where
the driver asks for them) to a "push mode" (where the bridge offers them
automatically), the biggest problem is that the bridge needs to be aware
when a switchdev port joins and leaves, even when the switchdev is only
indirectly a bridge port (for example when the bridge port is a LAG
upper of the switchdev).
Luckily, we already have a hook for that, in the form of the newly
introduced switchdev_bridge_port_offload() and
switchdev_bridge_port_unoffload() calls. These offer a natural place for
hooking the object addition and deletion replays.
Extend the above 2 functions with:
- pointers to the switchdev atomic notifier (for FDB replays) and the
blocking notifier (for MDB and VLAN replays).
- the "const void *ctx" argument required for drivers to be able to
disambiguate between which port is targeted, when multiple ports are
lowers of the same LAG that is a bridge port. Most of the drivers pass
NULL to this argument, except the ones that support LAG offload and have
the proper context check already in place in the switchdev blocking
notifier handler.
Also unexport the replay helpers, since nobody except the bridge calls
them directly now.
Note that:
(a) we abuse the terminology slightly, because FDB entries are not
"switchdev objects", but we count them as objects nonetheless.
With no direct way to prove it, I think they are not modeled as
switchdev objects because those can only be installed by the bridge
to the hardware (as opposed to FDB entries which can be propagated
in the other direction too). This is merely an abuse of terms, FDB
entries are replayed too, despite not being objects.
(b) the bridge does not attempt to sync port attributes to newly joined
ports, just the countable stuff (the objects). The reason for this
is simple: no universal and symmetric way to sync and unsync them is
known. For example, VLAN filtering: what to do on unsync, disable or
leave it enabled? Similarly, STP state, ageing timer, etc etc. What
a switchdev port does when it becomes standalone again is not really
up to the bridge's competence, and the driver should deal with it.
On the other hand, replaying deletions of switchdev objects can be
seen a matter of cleanup and therefore be treated by the bridge,
hence this patch.
We make the replay helpers opt-in for drivers, because they might not
bring immediate benefits for them:
- nbp_vlan_init() is called _after_ netdev_master_upper_dev_link(),
so br_vlan_replay() should not do anything for the new drivers on
which we call it. The existing drivers where there was even a slight
possibility for there to exist a VLAN on a bridge port before they
join it are already guarded against this: mlxsw and prestera deny
joining LAG interfaces that are members of a bridge.
- br_fdb_replay() should now notify of local FDB entries, but I patched
all drivers except DSA to ignore these new entries in commit
2c4eca3ef716 ("net: bridge: switchdev: include local flag in FDB
notifications"). Driver authors can lift this restriction as they
wish, and when they do, they can also opt into the FDB replay
functionality.
- br_mdb_replay() should fix a real issue which is described in commit
4f2673b3a2b6 ("net: bridge: add helper to replay port and host-joined
mdb entries"). However most drivers do not offload the
SWITCHDEV_OBJ_ID_HOST_MDB to see this issue: only cpsw and am65_cpsw
offload this switchdev object, and I don't completely understand the
way in which they offload this switchdev object anyway. So I'll leave
it up to these drivers' respective maintainers to opt into
br_mdb_replay().
So most of the drivers pass NULL notifier blocks for the replay helpers,
except:
- dpaa2-switch which was already acked/regression-tested with the
helpers enabled (and there isn't much of a downside in having them)
- ocelot which already had replay logic in "pull" mode
- DSA which already had replay logic in "pull" mode
An important observation is that the drivers which don't currently
request bridge event replays don't even have the
switchdev_bridge_port_{offload,unoffload} calls placed in proper places
right now. This was done to avoid unnecessary rework for drivers which
might never even add support for this. For driver writers who wish to
add replay support, this can be used as a tentative placement guide:
https://patchwork.kernel.org/project/netdevbpf/patch/20210720134655.892334-11-vladimir.oltean@nxp.com/
Cc: Vadym Kochan <vkochan@marvell.com>
Cc: Taras Chornyi <tchornyi@marvell.com>
Cc: Ioana Ciornei <ioana.ciornei@nxp.com>
Cc: Lars Povlsen <lars.povlsen@microchip.com>
Cc: Steen Hegelund <Steen.Hegelund@microchip.com>
Cc: UNGLinuxDriver@microchip.com
Cc: Claudiu Manoil <claudiu.manoil@nxp.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Ioana Ciornei <ioana.ciornei@nxp.com> # dpaa2-switch
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-07-21 19:24:03 +03:00
static inline int br_vlan_replay ( struct net_device * br_dev ,
struct net_device * dev , const void * ctx ,
bool adding , struct notifier_block * nb ,
struct netlink_ext_ack * extack )
{
return - EOPNOTSUPP ;
}
2013-02-13 12:00:09 +00:00
# endif
2020-01-24 13:40:20 +02:00
/* br_vlan_options.c */
# ifdef CONFIG_BRIDGE_VLAN_FILTERING
2020-03-17 14:08:33 +02:00
bool br_vlan_opts_eq_range ( const struct net_bridge_vlan * v_curr ,
const struct net_bridge_vlan * range_end ) ;
2020-01-24 13:40:20 +02:00
bool br_vlan_opts_fill ( struct sk_buff * skb , const struct net_bridge_vlan * v ) ;
size_t br_vlan_opts_nl_size ( void ) ;
2020-01-24 13:40:21 +02:00
int br_vlan_process_options ( const struct net_bridge * br ,
const struct net_bridge_port * p ,
struct net_bridge_vlan * range_start ,
struct net_bridge_vlan * range_end ,
struct nlattr * * tb ,
struct netlink_ext_ack * extack ) ;
2021-07-19 20:06:34 +03:00
int br_vlan_rtm_process_global_options ( struct net_device * dev ,
const struct nlattr * attr ,
int cmd ,
struct netlink_ext_ack * extack ) ;
2021-07-19 20:06:35 +03:00
bool br_vlan_global_opts_can_enter_range ( const struct net_bridge_vlan * v_curr ,
const struct net_bridge_vlan * r_end ) ;
bool br_vlan_global_opts_fill ( struct sk_buff * skb , u16 vid , u16 vid_range ,
const struct net_bridge_vlan * v_opts ) ;
2020-01-24 13:40:22 +02:00
/* vlan state manipulation helpers using *_ONCE to annotate lock-free access */
static inline u8 br_vlan_get_state ( const struct net_bridge_vlan * v )
{
return READ_ONCE ( v - > state ) ;
}
static inline void br_vlan_set_state ( struct net_bridge_vlan * v , u8 state )
{
WRITE_ONCE ( v - > state , state ) ;
}
static inline u8 br_vlan_get_pvid_state ( const struct net_bridge_vlan_group * vg )
{
return READ_ONCE ( vg - > pvid_state ) ;
}
static inline void br_vlan_set_pvid_state ( struct net_bridge_vlan_group * vg ,
u8 state )
{
WRITE_ONCE ( vg - > pvid_state , state ) ;
}
/* learn_allow is true at ingress and false at egress */
static inline bool br_vlan_state_allowed ( u8 state , bool learn_allow )
{
switch ( state ) {
case BR_STATE_LEARNING :
return learn_allow ;
case BR_STATE_FORWARDING :
return true ;
default :
return false ;
}
}
2020-01-24 13:40:20 +02:00
# endif
2015-03-10 10:27:18 +01:00
struct nf_br_ops {
int ( * br_dev_xmit_hook ) ( struct sk_buff * skb ) ;
} ;
extern const struct nf_br_ops __rcu * nf_br_ops ;
2005-04-16 15:20:36 -07:00
/* br_netfilter.c */
2014-09-18 11:29:03 +02:00
# if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
int br_nf_core_init ( void ) ;
void br_nf_core_fini ( void ) ;
2013-10-18 13:48:22 -07:00
void br_netfilter_rtable_init ( struct net_bridge * ) ;
2006-05-25 15:59:33 -07:00
# else
2014-09-18 11:29:03 +02:00
static inline int br_nf_core_init ( void ) { return 0 ; }
static inline void br_nf_core_fini ( void ) { }
2008-07-30 16:27:55 -07:00
# define br_netfilter_rtable_init(x)
2006-05-25 15:59:33 -07:00
# endif
2005-04-16 15:20:36 -07:00
/* br_stp.c */
2014-09-30 16:13:19 -07:00
void br_set_state ( struct net_bridge_port * p , unsigned int state ) ;
2013-10-18 13:48:22 -07:00
struct net_bridge_port * br_get_port ( struct net_bridge * br , u16 port_no ) ;
void br_init_port ( struct net_bridge_port * p ) ;
void br_become_designated_port ( struct net_bridge_port * p ) ;
2005-04-16 15:20:36 -07:00
2013-10-18 13:48:22 -07:00
void __br_set_forward_delay ( struct net_bridge * br , unsigned long t ) ;
int br_set_forward_delay ( struct net_bridge * br , unsigned long x ) ;
int br_set_hello_time ( struct net_bridge * br , unsigned long x ) ;
int br_set_max_age ( struct net_bridge * br , unsigned long x ) ;
2016-12-10 13:44:27 -05:00
int __set_ageing_time ( struct net_device * dev , unsigned long t ) ;
2016-07-21 12:42:19 -04:00
int br_set_ageing_time ( struct net_bridge * br , clock_t ageing_time ) ;
2011-04-04 14:03:33 +00:00
2005-04-16 15:20:36 -07:00
/* br_stp_if.c */
2013-10-18 13:48:22 -07:00
void br_stp_enable_bridge ( struct net_bridge * br ) ;
void br_stp_disable_bridge ( struct net_bridge * br ) ;
2020-04-26 15:22:08 +02:00
int br_stp_set_enabled ( struct net_bridge * br , unsigned long val ,
struct netlink_ext_ack * extack ) ;
2013-10-18 13:48:22 -07:00
void br_stp_enable_port ( struct net_bridge_port * p ) ;
void br_stp_disable_port ( struct net_bridge_port * p ) ;
bool br_stp_recalculate_bridge_id ( struct net_bridge * br ) ;
void br_stp_change_bridge_id ( struct net_bridge * br , const unsigned char * a ) ;
void br_stp_set_bridge_priority ( struct net_bridge * br , u16 newprio ) ;
int br_stp_set_port_priority ( struct net_bridge_port * p , unsigned long newprio ) ;
int br_stp_set_path_cost ( struct net_bridge_port * p , unsigned long path_cost ) ;
ssize_t br_show_bridge_id ( char * buf , const struct bridge_id * id ) ;
2005-04-16 15:20:36 -07:00
/* br_stp_bpdu.c */
2008-07-05 21:25:56 -07:00
struct stp_proto ;
2013-10-18 13:48:22 -07:00
void br_stp_rcv ( const struct stp_proto * proto , struct sk_buff * skb ,
struct net_device * dev ) ;
2005-04-16 15:20:36 -07:00
/* br_stp_timer.c */
2013-10-18 13:48:22 -07:00
void br_stp_timer_init ( struct net_bridge * br ) ;
void br_stp_port_timer_init ( struct net_bridge_port * p ) ;
unsigned long br_timer_value ( const struct timer_list * timer ) ;
2005-04-16 15:20:36 -07:00
/* br.c */
2011-12-12 02:58:25 +00:00
# if IS_ENABLED(CONFIG_ATM_LANE)
2009-06-05 05:35:28 +00:00
extern int ( * br_fdb_test_addr_hook ) ( struct net_device * dev , unsigned char * addr ) ;
# endif
2005-04-16 15:20:36 -07:00
2020-04-26 15:22:07 +02:00
/* br_mrp.c */
# if IS_ENABLED(CONFIG_BRIDGE_MRP)
int br_mrp_parse ( struct net_bridge * br , struct net_bridge_port * p ,
struct nlattr * attr , int cmd , struct netlink_ext_ack * extack ) ;
bool br_mrp_enabled ( struct net_bridge * br ) ;
void br_mrp_port_del ( struct net_bridge * br , struct net_bridge_port * p ) ;
2020-07-02 10:13:06 +02:00
int br_mrp_fill_info ( struct sk_buff * skb , struct net_bridge * br ) ;
2020-04-26 15:22:07 +02:00
# else
static inline int br_mrp_parse ( struct net_bridge * br , struct net_bridge_port * p ,
struct nlattr * attr , int cmd ,
struct netlink_ext_ack * extack )
{
return - EOPNOTSUPP ;
}
static inline bool br_mrp_enabled ( struct net_bridge * br )
{
2020-05-06 14:16:16 +08:00
return false ;
2020-04-26 15:22:07 +02:00
}
static inline void br_mrp_port_del ( struct net_bridge * br ,
struct net_bridge_port * p )
{
}
2020-07-02 10:13:06 +02:00
static inline int br_mrp_fill_info ( struct sk_buff * skb , struct net_bridge * br )
{
return 0 ;
}
2020-04-26 15:22:07 +02:00
# endif
2020-10-27 10:02:48 +00:00
/* br_cfm.c */
2020-10-27 10:02:45 +00:00
# if IS_ENABLED(CONFIG_BRIDGE_CFM)
2020-10-27 10:02:48 +00:00
int br_cfm_parse ( struct net_bridge * br , struct net_bridge_port * p ,
struct nlattr * attr , int cmd , struct netlink_ext_ack * extack ) ;
bool br_cfm_created ( struct net_bridge * br ) ;
2020-10-27 10:02:45 +00:00
void br_cfm_port_del ( struct net_bridge * br , struct net_bridge_port * p ) ;
2020-10-27 10:02:49 +00:00
int br_cfm_config_fill_info ( struct sk_buff * skb , struct net_bridge * br ) ;
2020-10-27 10:02:51 +00:00
int br_cfm_status_fill_info ( struct sk_buff * skb ,
struct net_bridge * br ,
bool getlink ) ;
int br_cfm_mep_count ( struct net_bridge * br , u32 * count ) ;
int br_cfm_peer_mep_count ( struct net_bridge * br , u32 * count ) ;
2020-10-27 10:02:45 +00:00
# else
2020-10-27 10:02:48 +00:00
static inline int br_cfm_parse ( struct net_bridge * br , struct net_bridge_port * p ,
struct nlattr * attr , int cmd ,
struct netlink_ext_ack * extack )
{
return - EOPNOTSUPP ;
}
static inline bool br_cfm_created ( struct net_bridge * br )
{
return false ;
}
2020-10-27 10:02:45 +00:00
static inline void br_cfm_port_del ( struct net_bridge * br ,
struct net_bridge_port * p )
{
}
2020-10-27 10:02:49 +00:00
static inline int br_cfm_config_fill_info ( struct sk_buff * skb , struct net_bridge * br )
{
return - EOPNOTSUPP ;
}
2020-10-27 10:02:50 +00:00
2020-10-27 10:02:51 +00:00
static inline int br_cfm_status_fill_info ( struct sk_buff * skb ,
struct net_bridge * br ,
bool getlink )
{
return - EOPNOTSUPP ;
}
static inline int br_cfm_mep_count ( struct net_bridge * br , u32 * count )
{
return - EOPNOTSUPP ;
}
static inline int br_cfm_peer_mep_count ( struct net_bridge * br , u32 * count )
2020-10-27 10:02:50 +00:00
{
return - EOPNOTSUPP ;
}
2020-10-27 10:02:45 +00:00
# endif
2006-05-25 16:00:12 -07:00
/* br_netlink.c */
2012-06-26 05:48:45 +00:00
extern struct rtnl_link_ops br_link_ops ;
2013-10-18 13:48:22 -07:00
int br_netlink_init ( void ) ;
void br_netlink_fini ( void ) ;
2017-11-01 12:18:13 +02:00
void br_ifinfo_notify ( int event , const struct net_bridge * br ,
const struct net_bridge_port * port ) ;
2020-10-27 10:02:51 +00:00
void br_info_notify ( int event , const struct net_bridge * br ,
const struct net_bridge_port * port , u32 filter ) ;
2018-12-12 17:02:48 +00:00
int br_setlink ( struct net_device * dev , struct nlmsghdr * nlmsg , u16 flags ,
struct netlink_ext_ack * extack ) ;
2015-01-29 22:40:12 -08:00
int br_dellink ( struct net_device * dev , struct nlmsghdr * nlmsg , u16 flags ) ;
2013-10-18 13:48:22 -07:00
int br_getlink ( struct sk_buff * skb , u32 pid , u32 seq , struct net_device * dev ,
2015-04-28 18:33:49 +02:00
u32 filter_mask , int nlflags ) ;
2020-01-14 19:56:10 +02:00
int br_process_vlan_info ( struct net_bridge * br ,
struct net_bridge_port * p , int cmd ,
struct bridge_vlan_info * vinfo_curr ,
struct bridge_vlan_info * * vinfo_last ,
bool * changed ,
struct netlink_ext_ack * extack ) ;
2006-05-25 16:00:12 -07:00
2005-04-16 15:20:36 -07:00
# ifdef CONFIG_SYSFS
/* br_sysfs_if.c */
2010-01-19 02:58:23 +01:00
extern const struct sysfs_ops brport_sysfs_ops ;
2013-10-18 13:48:22 -07:00
int br_sysfs_addif ( struct net_bridge_port * p ) ;
int br_sysfs_renameif ( struct net_bridge_port * p ) ;
2005-04-16 15:20:36 -07:00
/* br_sysfs_br.c */
2013-10-18 13:48:22 -07:00
int br_sysfs_addbr ( struct net_device * dev ) ;
void br_sysfs_delbr ( struct net_device * dev ) ;
2005-04-16 15:20:36 -07:00
# else
2012-11-03 23:02:30 +01:00
static inline int br_sysfs_addif ( struct net_bridge_port * p ) { return 0 ; }
static inline int br_sysfs_renameif ( struct net_bridge_port * p ) { return 0 ; }
static inline int br_sysfs_addbr ( struct net_device * dev ) { return 0 ; }
static inline void br_sysfs_delbr ( struct net_device * dev ) { return ; }
2005-04-16 15:20:36 -07:00
# endif /* CONFIG_SYSFS */
2016-08-25 18:42:37 +02:00
/* br_switchdev.c */
# ifdef CONFIG_NET_SWITCHDEV
net: make switchdev_bridge_port_{,unoffload} loosely coupled with the bridge
With the introduction of explicit offloading API in switchdev in commit
2f5dc00f7a3e ("net: bridge: switchdev: let drivers inform which bridge
ports are offloaded"), we started having Ethernet switch drivers calling
directly into a function exported by net/bridge/br_switchdev.c, which is
a function exported by the bridge driver.
This means that drivers that did not have an explicit dependency on the
bridge before, like cpsw and am65-cpsw, now do - otherwise it is not
possible to call a symbol exported by a driver that can be built as
module unless you are a module too.
There was an attempt to solve the dependency issue in the form of commit
b0e81817629a ("net: build all switchdev drivers as modules when the
bridge is a module"). Grygorii Strashko, however, says about it:
| In my opinion, the problem is a bit bigger here than just fixing the
| build :(
|
| In case, of ^cpsw the switchdev mode is kinda optional and in many
| cases (especially for testing purposes, NFS) the multi-mac mode is
| still preferable mode.
|
| There were no such tight dependency between switchdev drivers and
| bridge core before and switchdev serviced as independent, notification
| based layer between them, so ^cpsw still can be "Y" and bridge can be
| "M". Now for mostly every kernel build configuration the CONFIG_BRIDGE
| will need to be set as "Y", or we will have to update drivers to
| support build with BRIDGE=n and maintain separate builds for
| networking vs non-networking testing. But is this enough? Wouldn't
| it cause 'chain reaction' required to add more and more "Y" options
| (like CONFIG_VLAN_8021Q)?
|
| PS. Just to be sure we on the same page - ARM builds will be forced
| (with this patch) to have CONFIG_TI_CPSW_SWITCHDEV=m and so all our
| automation testing will just fail with omap2plus_defconfig.
In the light of this, it would be desirable for some configurations to
avoid dependencies between switchdev drivers and the bridge, and have
the switchdev mode as completely optional within the driver.
Arnd Bergmann also tried to write a patch which better expressed the
build time dependency for Ethernet switch drivers where the switchdev
support is optional, like cpsw/am65-cpsw, and this made the drivers
follow the bridge (compile as module if the bridge is a module) only if
the optional switchdev support in the driver was enabled in the first
place:
https://patchwork.kernel.org/project/netdevbpf/patch/20210802144813.1152762-1-arnd@kernel.org/
but this still did not solve the fact that cpsw and am65-cpsw now must
be built as modules when the bridge is a module - it just expressed
correctly that optional dependency. But the new behavior is an apparent
regression from Grygorii's perspective.
So to support the use case where the Ethernet driver is built-in,
NET_SWITCHDEV (a bool option) is enabled, and the bridge is a module, we
need a framework that can handle the possible absence of the bridge from
the running system, i.e. runtime bloatware as opposed to build-time
bloatware.
Luckily we already have this framework, since switchdev has been using
it extensively. Events from the bridge side are transmitted to the
driver side using notifier chains - this was originally done so that
unrelated drivers could snoop for events emitted by the bridge towards
ports that are implemented by other drivers (think of a switch driver
with LAG offload that listens for switchdev events on a bonding/team
interface that it offloads).
There are also events which are transmitted from the driver side to the
bridge side, which again are modeled using notifiers.
SWITCHDEV_FDB_ADD_TO_BRIDGE is an example of this, and deals with
notifying the bridge that a MAC address has been dynamically learned.
So there is a precedent we can use for modeling the new framework.
The difference compared to SWITCHDEV_FDB_ADD_TO_BRIDGE is that the work
that the bridge needs to do when a port becomes offloaded is blocking in
its nature: replay VLANs, MDBs etc. The calling context is indeed
blocking (we are under rtnl_mutex), but the existing switchdev
notification chain that the bridge is subscribed to is only the atomic
one. So we need to subscribe the bridge to the blocking switchdev
notification chain too.
This patch:
- keeps the driver-side perception of the switchdev_bridge_port_{,un}offload
unchanged
- moves the implementation of switchdev_bridge_port_{,un}offload from
the bridge module into the switchdev module.
- makes everybody that is subscribed to the switchdev blocking notifier
chain "hear" offload & unoffload events
- makes the bridge driver subscribe and handle those events
- moves the bridge driver's handling of those events into 2 new
functions called br_switchdev_port_{,un}offload. These functions
contain in fact the core of the logic that was previously in
switchdev_bridge_port_{,un}offload, just that now we go through an
extra indirection layer to reach them.
Unlike all the other switchdev notification structures, the structure
used to carry the bridge port information, struct
switchdev_notifier_brport_info, does not contain a "bool handled".
This is because in the current usage pattern, we always know that a
switchdev bridge port offloading event will be handled by the bridge,
because the switchdev_bridge_port_offload() call was initiated by a
NETDEV_CHANGEUPPER event in the first place, where info->upper_dev is a
bridge. So if the bridge wasn't loaded, then the CHANGEUPPER event
couldn't have happened.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 23:34:08 +03:00
int br_switchdev_port_offload ( struct net_bridge_port * p ,
struct net_device * dev , const void * ctx ,
struct notifier_block * atomic_nb ,
struct notifier_block * blocking_nb ,
bool tx_fwd_offload ,
struct netlink_ext_ack * extack ) ;
void br_switchdev_port_unoffload ( struct net_bridge_port * p , const void * ctx ,
struct notifier_block * atomic_nb ,
struct notifier_block * blocking_nb ) ;
2021-07-22 18:55:38 +03:00
bool br_switchdev_frame_uses_tx_fwd_offload ( struct sk_buff * skb ) ;
2021-07-23 23:49:11 +03:00
void br_switchdev_frame_set_offload_fwd_mark ( struct sk_buff * skb ) ;
2021-07-22 18:55:38 +03:00
void nbp_switchdev_frame_mark_tx_fwd_offload ( const struct net_bridge_port * p ,
struct sk_buff * skb ) ;
void nbp_switchdev_frame_mark_tx_fwd_to_hwdom ( const struct net_bridge_port * p ,
struct sk_buff * skb ) ;
2016-08-25 18:42:37 +02:00
void nbp_switchdev_frame_mark ( const struct net_bridge_port * p ,
struct sk_buff * skb ) ;
bool nbp_switchdev_allowed_egress ( const struct net_bridge_port * p ,
const struct sk_buff * skb ) ;
2017-06-08 08:44:11 +02:00
int br_switchdev_set_port_flag ( struct net_bridge_port * p ,
unsigned long flags ,
2021-02-12 17:15:53 +02:00
unsigned long mask ,
struct netlink_ext_ack * extack ) ;
2021-06-29 17:06:45 +03:00
void br_switchdev_fdb_notify ( struct net_bridge * br ,
const struct net_bridge_fdb_entry * fdb , int type ) ;
2018-12-12 17:02:50 +00:00
int br_switchdev_port_vlan_add ( struct net_device * dev , u16 vid , u16 flags ,
struct netlink_ext_ack * extack ) ;
2018-05-30 02:56:03 +02:00
int br_switchdev_port_vlan_del ( struct net_device * dev , u16 vid ) ;
2021-07-21 19:24:00 +03:00
void br_switchdev_init ( struct net_bridge * br ) ;
2017-09-03 17:44:13 +03:00
static inline void br_switchdev_frame_unmark ( struct sk_buff * skb )
{
skb - > offload_fwd_mark = 0 ;
}
2016-08-25 18:42:37 +02:00
# else
net: make switchdev_bridge_port_{,unoffload} loosely coupled with the bridge
With the introduction of explicit offloading API in switchdev in commit
2f5dc00f7a3e ("net: bridge: switchdev: let drivers inform which bridge
ports are offloaded"), we started having Ethernet switch drivers calling
directly into a function exported by net/bridge/br_switchdev.c, which is
a function exported by the bridge driver.
This means that drivers that did not have an explicit dependency on the
bridge before, like cpsw and am65-cpsw, now do - otherwise it is not
possible to call a symbol exported by a driver that can be built as
module unless you are a module too.
There was an attempt to solve the dependency issue in the form of commit
b0e81817629a ("net: build all switchdev drivers as modules when the
bridge is a module"). Grygorii Strashko, however, says about it:
| In my opinion, the problem is a bit bigger here than just fixing the
| build :(
|
| In case, of ^cpsw the switchdev mode is kinda optional and in many
| cases (especially for testing purposes, NFS) the multi-mac mode is
| still preferable mode.
|
| There were no such tight dependency between switchdev drivers and
| bridge core before and switchdev serviced as independent, notification
| based layer between them, so ^cpsw still can be "Y" and bridge can be
| "M". Now for mostly every kernel build configuration the CONFIG_BRIDGE
| will need to be set as "Y", or we will have to update drivers to
| support build with BRIDGE=n and maintain separate builds for
| networking vs non-networking testing. But is this enough? Wouldn't
| it cause 'chain reaction' required to add more and more "Y" options
| (like CONFIG_VLAN_8021Q)?
|
| PS. Just to be sure we on the same page - ARM builds will be forced
| (with this patch) to have CONFIG_TI_CPSW_SWITCHDEV=m and so all our
| automation testing will just fail with omap2plus_defconfig.
In the light of this, it would be desirable for some configurations to
avoid dependencies between switchdev drivers and the bridge, and have
the switchdev mode as completely optional within the driver.
Arnd Bergmann also tried to write a patch which better expressed the
build time dependency for Ethernet switch drivers where the switchdev
support is optional, like cpsw/am65-cpsw, and this made the drivers
follow the bridge (compile as module if the bridge is a module) only if
the optional switchdev support in the driver was enabled in the first
place:
https://patchwork.kernel.org/project/netdevbpf/patch/20210802144813.1152762-1-arnd@kernel.org/
but this still did not solve the fact that cpsw and am65-cpsw now must
be built as modules when the bridge is a module - it just expressed
correctly that optional dependency. But the new behavior is an apparent
regression from Grygorii's perspective.
So to support the use case where the Ethernet driver is built-in,
NET_SWITCHDEV (a bool option) is enabled, and the bridge is a module, we
need a framework that can handle the possible absence of the bridge from
the running system, i.e. runtime bloatware as opposed to build-time
bloatware.
Luckily we already have this framework, since switchdev has been using
it extensively. Events from the bridge side are transmitted to the
driver side using notifier chains - this was originally done so that
unrelated drivers could snoop for events emitted by the bridge towards
ports that are implemented by other drivers (think of a switch driver
with LAG offload that listens for switchdev events on a bonding/team
interface that it offloads).
There are also events which are transmitted from the driver side to the
bridge side, which again are modeled using notifiers.
SWITCHDEV_FDB_ADD_TO_BRIDGE is an example of this, and deals with
notifying the bridge that a MAC address has been dynamically learned.
So there is a precedent we can use for modeling the new framework.
The difference compared to SWITCHDEV_FDB_ADD_TO_BRIDGE is that the work
that the bridge needs to do when a port becomes offloaded is blocking in
its nature: replay VLANs, MDBs etc. The calling context is indeed
blocking (we are under rtnl_mutex), but the existing switchdev
notification chain that the bridge is subscribed to is only the atomic
one. So we need to subscribe the bridge to the blocking switchdev
notification chain too.
This patch:
- keeps the driver-side perception of the switchdev_bridge_port_{,un}offload
unchanged
- moves the implementation of switchdev_bridge_port_{,un}offload from
the bridge module into the switchdev module.
- makes everybody that is subscribed to the switchdev blocking notifier
chain "hear" offload & unoffload events
- makes the bridge driver subscribe and handle those events
- moves the bridge driver's handling of those events into 2 new
functions called br_switchdev_port_{,un}offload. These functions
contain in fact the core of the logic that was previously in
switchdev_bridge_port_{,un}offload, just that now we go through an
extra indirection layer to reach them.
Unlike all the other switchdev notification structures, the structure
used to carry the bridge port information, struct
switchdev_notifier_brport_info, does not contain a "bool handled".
This is because in the current usage pattern, we always know that a
switchdev bridge port offloading event will be handled by the bridge,
because the switchdev_bridge_port_offload() call was initiated by a
NETDEV_CHANGEUPPER event in the first place, where info->upper_dev is a
bridge. So if the bridge wasn't loaded, then the CHANGEUPPER event
couldn't have happened.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2021-08-03 23:34:08 +03:00
static inline int
br_switchdev_port_offload ( struct net_bridge_port * p ,
struct net_device * dev , const void * ctx ,
struct notifier_block * atomic_nb ,
struct notifier_block * blocking_nb ,
bool tx_fwd_offload ,
struct netlink_ext_ack * extack )
{
return - EOPNOTSUPP ;
}
static inline void
br_switchdev_port_unoffload ( struct net_bridge_port * p , const void * ctx ,
struct notifier_block * atomic_nb ,
struct notifier_block * blocking_nb )
{
}
2021-07-22 18:55:38 +03:00
static inline bool br_switchdev_frame_uses_tx_fwd_offload ( struct sk_buff * skb )
{
return false ;
}
2021-07-23 23:49:11 +03:00
static inline void br_switchdev_frame_set_offload_fwd_mark ( struct sk_buff * skb )
{
}
2021-07-22 18:55:38 +03:00
static inline void
nbp_switchdev_frame_mark_tx_fwd_offload ( const struct net_bridge_port * p ,
struct sk_buff * skb )
{
}
static inline void
nbp_switchdev_frame_mark_tx_fwd_to_hwdom ( const struct net_bridge_port * p ,
struct sk_buff * skb )
{
}
2016-08-25 18:42:37 +02:00
static inline void nbp_switchdev_frame_mark ( const struct net_bridge_port * p ,
struct sk_buff * skb )
{
}
static inline bool nbp_switchdev_allowed_egress ( const struct net_bridge_port * p ,
const struct sk_buff * skb )
{
return true ;
}
2017-06-08 08:44:11 +02:00
static inline int br_switchdev_set_port_flag ( struct net_bridge_port * p ,
unsigned long flags ,
2021-02-12 17:15:53 +02:00
unsigned long mask ,
struct netlink_ext_ack * extack )
2017-06-08 08:44:11 +02:00
{
return 0 ;
}
2017-06-08 08:44:14 +02:00
2018-05-30 02:56:03 +02:00
static inline int br_switchdev_port_vlan_add ( struct net_device * dev ,
2018-12-12 17:02:50 +00:00
u16 vid , u16 flags ,
struct netlink_ext_ack * extack )
2018-05-30 02:56:03 +02:00
{
return - EOPNOTSUPP ;
}
static inline int br_switchdev_port_vlan_del ( struct net_device * dev , u16 vid )
{
return - EOPNOTSUPP ;
}
2017-06-08 08:44:14 +02:00
static inline void
2021-06-29 17:06:45 +03:00
br_switchdev_fdb_notify ( struct net_bridge * br ,
const struct net_bridge_fdb_entry * fdb , int type )
2017-06-08 08:44:14 +02:00
{
}
2017-09-03 17:44:13 +03:00
static inline void br_switchdev_frame_unmark ( struct sk_buff * skb )
{
}
2021-07-21 19:24:00 +03:00
static inline void br_switchdev_init ( struct net_bridge * br )
{
}
2016-08-25 18:42:37 +02:00
# endif /* CONFIG_NET_SWITCHDEV */
2017-10-06 22:12:38 -07:00
/* br_arp_nd_proxy.c */
2017-10-06 22:12:37 -07:00
void br_recalculate_neigh_suppress_enabled ( struct net_bridge * br ) ;
2017-10-06 22:12:38 -07:00
void br_do_proxy_suppress_arp ( struct sk_buff * skb , struct net_bridge * br ,
u16 vid , struct net_bridge_port * p ) ;
2017-10-06 22:12:39 -07:00
void br_do_suppress_nd ( struct sk_buff * skb , struct net_bridge * br ,
u16 vid , struct net_bridge_port * p , struct nd_msg * msg ) ;
struct nd_msg * br_is_nd_neigh_msg ( struct sk_buff * skb , struct nd_msg * m ) ;
2005-04-16 15:20:36 -07:00
# endif