Merge branch 'make-dsa-work-with-bonding-s-arp-monitor'
Vladimir Oltean says:
====================
Make DSA work with bonding's ARP monitor
Since commit 2b86cb8299
("net: dsa: declare lockless TX feature for
slave ports") in v5.7, DSA breaks the ARP monitoring logic from the
bonding driver, fact which was pointed out by Brian Hutchinson who uses
a linux-5.10.y stable kernel.
Initially I got lured by other similar hacks introduced for other
NETIF_F_LLTX drivers, which, inspired by the bonding documentation,
update the trans_start of their TX queues by hand.
However Jakub pointed out that this simply isn't a proper solution, and
after coming to think more about it, I agree, and it doesn't work
properly with DSA nor is it maintainable for the future changes I plan
for it (multiple DSA masters in a LAG).
I've tested these changes using a DSA-based setup and a veth-based
setup, using the active-backup mode and ARP monitoring, with and without
arp_validate.
Link to v1:
https://patchwork.kernel.org/project/netdevbpf/patch/20220715232641.952532-1-vladimir.oltean@nxp.com/
Link to v2:
https://patchwork.kernel.org/project/netdevbpf/patch/20220727152000.3616086-1-vladimir.oltean@nxp.com/
====================
Link: https://lore.kernel.org/r/20220731124108.2810233-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
7de196a6aa
@ -1982,15 +1982,6 @@ uses the response as an indication that the link is operating. This
|
||||
gives some assurance that traffic is actually flowing to and from one
|
||||
or more peers on the local network.
|
||||
|
||||
The ARP monitor relies on the device driver itself to verify
|
||||
that traffic is flowing. In particular, the driver must keep up to
|
||||
date the last receive time, dev->last_rx. Drivers that use NETIF_F_LLTX
|
||||
flag must also update netdev_queue->trans_start. If they do not, then the
|
||||
ARP monitor will immediately fail any slaves using that driver, and
|
||||
those slaves will stay down. If networking monitoring (tcpdump, etc)
|
||||
shows the ARP requests and replies on the network, then it may be that
|
||||
your device driver is not updating last_rx and trans_start.
|
||||
|
||||
7.2 Configuring Multiple ARP Targets
|
||||
------------------------------------
|
||||
|
||||
|
@ -2001,6 +2001,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
|
||||
for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
|
||||
new_slave->target_last_arp_rx[i] = new_slave->last_rx;
|
||||
|
||||
new_slave->last_tx = new_slave->last_rx;
|
||||
|
||||
if (bond->params.miimon && !bond->params.use_carrier) {
|
||||
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
|
||||
|
||||
@ -2884,8 +2886,11 @@ static void bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip,
|
||||
return;
|
||||
}
|
||||
|
||||
if (bond_handle_vlan(slave, tags, skb))
|
||||
if (bond_handle_vlan(slave, tags, skb)) {
|
||||
slave_update_last_tx(slave);
|
||||
arp_xmit(skb);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -3074,8 +3079,7 @@ static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
|
||||
curr_active_slave->last_link_up))
|
||||
bond_validate_arp(bond, slave, tip, sip);
|
||||
else if (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
|
||||
bond_time_in_interval(bond,
|
||||
dev_trans_start(curr_arp_slave->dev), 1))
|
||||
bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
|
||||
bond_validate_arp(bond, slave, sip, tip);
|
||||
|
||||
out_unlock:
|
||||
@ -3103,8 +3107,10 @@ static void bond_ns_send(struct slave *slave, const struct in6_addr *daddr,
|
||||
}
|
||||
|
||||
addrconf_addr_solict_mult(daddr, &mcaddr);
|
||||
if (bond_handle_vlan(slave, tags, skb))
|
||||
if (bond_handle_vlan(slave, tags, skb)) {
|
||||
slave_update_last_tx(slave);
|
||||
ndisc_send_skb(skb, &mcaddr, saddr);
|
||||
}
|
||||
}
|
||||
|
||||
static void bond_ns_send_all(struct bonding *bond, struct slave *slave)
|
||||
@ -3246,8 +3252,7 @@ static int bond_na_rcv(const struct sk_buff *skb, struct bonding *bond,
|
||||
curr_active_slave->last_link_up))
|
||||
bond_validate_ns(bond, slave, saddr, daddr);
|
||||
else if (curr_arp_slave &&
|
||||
bond_time_in_interval(bond,
|
||||
dev_trans_start(curr_arp_slave->dev), 1))
|
||||
bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
|
||||
bond_validate_ns(bond, slave, saddr, daddr);
|
||||
|
||||
out:
|
||||
@ -3335,12 +3340,12 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
|
||||
* so it can wait
|
||||
*/
|
||||
bond_for_each_slave_rcu(bond, slave, iter) {
|
||||
unsigned long trans_start = dev_trans_start(slave->dev);
|
||||
unsigned long last_tx = slave_last_tx(slave);
|
||||
|
||||
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
|
||||
|
||||
if (slave->link != BOND_LINK_UP) {
|
||||
if (bond_time_in_interval(bond, trans_start, 1) &&
|
||||
if (bond_time_in_interval(bond, last_tx, 1) &&
|
||||
bond_time_in_interval(bond, slave->last_rx, 1)) {
|
||||
|
||||
bond_propose_link_state(slave, BOND_LINK_UP);
|
||||
@ -3365,7 +3370,7 @@ static void bond_loadbalance_arp_mon(struct bonding *bond)
|
||||
* when the source ip is 0, so don't take the link down
|
||||
* if we don't know our ip yet
|
||||
*/
|
||||
if (!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
|
||||
if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
|
||||
!bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
|
||||
|
||||
bond_propose_link_state(slave, BOND_LINK_DOWN);
|
||||
@ -3431,7 +3436,7 @@ re_arm:
|
||||
*/
|
||||
static int bond_ab_arp_inspect(struct bonding *bond)
|
||||
{
|
||||
unsigned long trans_start, last_rx;
|
||||
unsigned long last_tx, last_rx;
|
||||
struct list_head *iter;
|
||||
struct slave *slave;
|
||||
int commit = 0;
|
||||
@ -3482,9 +3487,9 @@ static int bond_ab_arp_inspect(struct bonding *bond)
|
||||
* - (more than missed_max*delta since receive AND
|
||||
* the bond has an IP address)
|
||||
*/
|
||||
trans_start = dev_trans_start(slave->dev);
|
||||
last_tx = slave_last_tx(slave);
|
||||
if (bond_is_active_slave(slave) &&
|
||||
(!bond_time_in_interval(bond, trans_start, bond->params.missed_max) ||
|
||||
(!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
|
||||
!bond_time_in_interval(bond, last_rx, bond->params.missed_max))) {
|
||||
bond_propose_link_state(slave, BOND_LINK_DOWN);
|
||||
commit++;
|
||||
@ -3501,8 +3506,8 @@ static int bond_ab_arp_inspect(struct bonding *bond)
|
||||
*/
|
||||
static void bond_ab_arp_commit(struct bonding *bond)
|
||||
{
|
||||
unsigned long trans_start;
|
||||
struct list_head *iter;
|
||||
unsigned long last_tx;
|
||||
struct slave *slave;
|
||||
|
||||
bond_for_each_slave(bond, slave, iter) {
|
||||
@ -3511,10 +3516,10 @@ static void bond_ab_arp_commit(struct bonding *bond)
|
||||
continue;
|
||||
|
||||
case BOND_LINK_UP:
|
||||
trans_start = dev_trans_start(slave->dev);
|
||||
last_tx = slave_last_tx(slave);
|
||||
if (rtnl_dereference(bond->curr_active_slave) != slave ||
|
||||
(!rtnl_dereference(bond->curr_active_slave) &&
|
||||
bond_time_in_interval(bond, trans_start, 1))) {
|
||||
bond_time_in_interval(bond, last_tx, 1))) {
|
||||
struct slave *current_arp_slave;
|
||||
|
||||
current_arp_slave = rtnl_dereference(bond->current_arp_slave);
|
||||
|
@ -312,7 +312,6 @@ static bool veth_skb_is_eligible_for_gro(const struct net_device *dev,
|
||||
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
|
||||
struct netdev_queue *queue = NULL;
|
||||
struct veth_rq *rq = NULL;
|
||||
struct net_device *rcv;
|
||||
int length = skb->len;
|
||||
@ -330,7 +329,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
rxq = skb_get_queue_mapping(skb);
|
||||
if (rxq < rcv->real_num_rx_queues) {
|
||||
rq = &rcv_priv->rq[rxq];
|
||||
queue = netdev_get_tx_queue(dev, rxq);
|
||||
|
||||
/* The napi pointer is available when an XDP program is
|
||||
* attached or when GRO is enabled
|
||||
@ -342,8 +340,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
|
||||
skb_tx_timestamp(skb);
|
||||
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
|
||||
if (queue)
|
||||
txq_trans_cond_update(queue);
|
||||
if (!use_napi)
|
||||
dev_lstats_add(dev, length);
|
||||
} else {
|
||||
|
@ -161,8 +161,9 @@ struct slave {
|
||||
struct net_device *dev; /* first - useful for panic debug */
|
||||
struct bonding *bond; /* our master */
|
||||
int delay;
|
||||
/* all three in jiffies */
|
||||
/* all 4 in jiffies */
|
||||
unsigned long last_link_up;
|
||||
unsigned long last_tx;
|
||||
unsigned long last_rx;
|
||||
unsigned long target_last_arp_rx[BOND_MAX_ARP_TARGETS];
|
||||
s8 link; /* one of BOND_LINK_XXXX */
|
||||
@ -540,6 +541,16 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
|
||||
return slave->last_rx;
|
||||
}
|
||||
|
||||
static inline void slave_update_last_tx(struct slave *slave)
|
||||
{
|
||||
WRITE_ONCE(slave->last_tx, jiffies);
|
||||
}
|
||||
|
||||
static inline unsigned long slave_last_tx(struct slave *slave)
|
||||
{
|
||||
return READ_ONCE(slave->last_tx);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NET_POLL_CONTROLLER
|
||||
static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
|
||||
struct sk_buff *skb)
|
||||
|
@ -427,14 +427,10 @@ void __qdisc_run(struct Qdisc *q)
|
||||
|
||||
unsigned long dev_trans_start(struct net_device *dev)
|
||||
{
|
||||
unsigned long val, res;
|
||||
unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
|
||||
unsigned long val;
|
||||
unsigned int i;
|
||||
|
||||
if (is_vlan_dev(dev))
|
||||
dev = vlan_dev_real_dev(dev);
|
||||
else if (netif_is_macvlan(dev))
|
||||
dev = macvlan_dev_real_dev(dev);
|
||||
res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
|
||||
for (i = 1; i < dev->num_tx_queues; i++) {
|
||||
val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
|
||||
if (val && time_after(val, res))
|
||||
|
Loading…
Reference in New Issue
Block a user