From b2e7aceb00b2621431e220748f7158b131b89e7b Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:19:32 +0800 Subject: [PATCH 01/11] bonding: remove the no effect lock for bond_select_active_slave() The bond slave list was no longer protected by bond lock and only protected by RTNL or RCU, so anywhere that use bond lock to protect slave list is meaningless. remove the release and acquire bond lock for bond_select_active_slave(). The curr_active_slave could only be changed in 3 place: 1. enslave slave. 2. release slave. 3. change_active_slave. all above place were holding bond lock, RTNL and curr_slave_lock together, it is tedious and meaningless, obviously bond lock is no need here, but RTNL or curr_slave_lock is needed, so if you want to access active slave, you have to choose one lock, RTNL or curr_slave_lock, if RTNL is exist, no need to add curr_slave_lock, otherwise curr_slave_lock is better, because of the performance. there are several place calling bond_select_active_slave() and bond_change_active_slave(), the next step I will clean these place and remove the no effect lock. there are some document changed together when update the function. Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 12 +++--------- drivers/net/bonding/bond_main.c | 15 +++------------ 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 2250b063ab89..7b806e330364 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -469,7 +469,7 @@ static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[]) /* slave being removed should not be active at this point * - * Caller must hold bond lock for read + * Caller must hold rtnl. */ static void rlb_clear_slave(struct bonding *bond, struct slave *slave) { @@ -1679,14 +1679,11 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char * If new_slave is NULL, caller must hold curr_slave_lock or * bond->lock for write. * - * If new_slave is not NULL, caller must hold RTNL, bond->lock for - * read and curr_slave_lock for write. Processing here may sleep, so - * no other locks may be held. + * If new_slave is not NULL, caller must hold RTNL, curr_slave_lock + * for write. Processing here may sleep, so no other locks may be held. */ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave) __releases(&bond->curr_slave_lock) - __releases(&bond->lock) - __acquires(&bond->lock) __acquires(&bond->curr_slave_lock) { struct slave *swap_slave; @@ -1722,7 +1719,6 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave tlb_clear_slave(bond, new_slave, 1); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); ASSERT_RTNL(); @@ -1748,11 +1744,9 @@ void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave /* swap mac address */ alb_swap_mac_addr(swap_slave, new_slave); alb_fasten_mac_swap(bond, swap_slave, new_slave); - read_lock(&bond->lock); } else { /* set the new_slave to the bond mac address */ alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr); - read_lock(&bond->lock); alb_send_learning_packets(new_slave, bond->dev->dev_addr); } diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 398e299ee1bd..04ae426fcc81 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -697,14 +697,12 @@ static void bond_set_dev_addr(struct net_device *bond_dev, * * Perform special MAC address swapping for fail_over_mac settings * - * Called with RTNL, bond->lock for read, curr_slave_lock for write_bh. + * Called with RTNL, curr_slave_lock for write_bh. */ static void bond_do_fail_over_mac(struct bonding *bond, struct slave *new_active, struct slave *old_active) __releases(&bond->curr_slave_lock) - __releases(&bond->lock) - __acquires(&bond->lock) __acquires(&bond->curr_slave_lock) { u8 tmp_mac[ETH_ALEN]; @@ -715,9 +713,7 @@ static void bond_do_fail_over_mac(struct bonding *bond, case BOND_FOM_ACTIVE: if (new_active) { write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); bond_set_dev_addr(bond->dev, new_active->dev); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); } break; @@ -731,7 +727,6 @@ static void bond_do_fail_over_mac(struct bonding *bond, return; write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); if (old_active) { memcpy(tmp_mac, new_active->dev->dev_addr, ETH_ALEN); @@ -761,7 +756,6 @@ static void bond_do_fail_over_mac(struct bonding *bond, pr_err("%s: Error %d setting MAC of slave %s\n", bond->dev->name, -rv, new_active->dev->name); out: - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); break; default: @@ -846,8 +840,7 @@ static bool bond_should_notify_peers(struct bonding *bond) * because it is apparently the best available slave we have, even though its * updelay hasn't timed out yet. * - * If new_active is not NULL, caller must hold bond->lock for read and - * curr_slave_lock for write_bh. + * If new_active is not NULL, caller must hold curr_slave_lock for write_bh. */ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) { @@ -916,14 +909,12 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) } write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, bond->dev); if (should_notify_peers) call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, bond->dev); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); } } @@ -949,7 +940,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active) * - The primary_slave has got its link back. * - A slave has got its link back and there's no old curr_active_slave. * - * Caller must hold bond->lock for read and curr_slave_lock for write_bh. + * Caller must hold curr_slave_lock for write_bh. */ void bond_select_active_slave(struct bonding *bond) { From 4cb4f97b7e361745281e843499ba58691112d2f8 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:19:39 +0800 Subject: [PATCH 02/11] bonding: rebuild the lock use for bond_mii_monitor() The bond_mii_monitor() still use bond lock to protect bond slave list, it is no effect, I have 2 way to fix the problem, move the RTNL to the top of the function, or add RCU to protect the bond slave list, according to the Jay Vosburgh's opinion, 10 times one second is a truely big performance loss if use RTNL to protect the whole monitor, so I would take the advice and use RCU to protect the bond slave list. The bond_has_slave() will not protect by anything, there will no things happen if the slave list is be changed, unless the bond was free, but it will not happened before the monitor, the bond will closed before be freed. The peers notify for the bond will calling curr_active_slave, so derefence the slave to make sure we will accessing the same slave if the curr_active_slave changed, as the rcu dereference need in read-side critical sector and bond_change_active_slave() will call it with no RCU hold, so add peer notify in rcu_read_lock which will be nested in monitor. Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 04ae426fcc81..b34634a96710 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -815,7 +815,11 @@ static struct slave *bond_find_best_slave(struct bonding *bond) static bool bond_should_notify_peers(struct bonding *bond) { - struct slave *slave = bond->curr_active_slave; + struct slave *slave; + + rcu_read_lock(); + slave = rcu_dereference(bond->curr_active_slave); + rcu_read_unlock(); pr_debug("bond_should_notify_peers: bond %s slave %s\n", bond->dev->name, slave ? slave->dev->name : "NULL"); @@ -1919,7 +1923,7 @@ static int bond_miimon_inspect(struct bonding *bond) ignore_updelay = !bond->curr_active_slave ? true : false; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; link_state = bond_check_dev_link(bond, slave->dev, 0); @@ -2117,41 +2121,35 @@ void bond_mii_monitor(struct work_struct *work) bool should_notify_peers = false; unsigned long delay; - read_lock(&bond->lock); - delay = msecs_to_jiffies(bond->params.miimon); if (!bond_has_slaves(bond)) goto re_arm; + rcu_read_lock(); + should_notify_peers = bond_should_notify_peers(bond); if (bond_miimon_inspect(bond)) { - read_unlock(&bond->lock); + rcu_read_unlock(); /* Race avoidance with bond_close cancel of workqueue */ if (!rtnl_trylock()) { - read_lock(&bond->lock); delay = 1; should_notify_peers = false; goto re_arm; } - read_lock(&bond->lock); - bond_miimon_commit(bond); - read_unlock(&bond->lock); rtnl_unlock(); /* might sleep, hold no other locks */ - read_lock(&bond->lock); - } + } else + rcu_read_unlock(); re_arm: if (bond->params.miimon) queue_delayed_work(bond->wq, &bond->mii_work, delay); - read_unlock(&bond->lock); - if (should_notify_peers) { if (!rtnl_trylock()) return; From 733ab63935beab905b03a329cdc544e8c171b6b9 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:19:45 +0800 Subject: [PATCH 03/11] bonding: rebuild the lock use for bond_alb_monitor() The bond_alb_monitor use bond lock to protect the bond slave list, it is no effect here, we need to use RTNL or RCU to replace bond lock, the bond_alb_monitor will called 10 times one second, RTNL may loss performance here, so I replace bond lock with RCU to protect the bond slave list, also the RTNL is preserved, the logic of the monitor did not changed. Suggested-by: Nikolay Aleksandrov Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_alb.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c index 7b806e330364..759ddeebe390 100644 --- a/drivers/net/bonding/bond_alb.c +++ b/drivers/net/bonding/bond_alb.c @@ -815,7 +815,7 @@ static void rlb_rebalance(struct bonding *bond) for (; hash_index != RLB_NULL_INDEX; hash_index = client_info->used_next) { client_info = &(bond_info->rx_hashtbl[hash_index]); - assigned_slave = rlb_next_rx_slave(bond); + assigned_slave = __rlb_next_rx_slave(bond); if (assigned_slave && (client_info->slave != assigned_slave)) { client_info->slave = assigned_slave; client_info->ntt = 1; @@ -1494,14 +1494,14 @@ void bond_alb_monitor(struct work_struct *work) struct list_head *iter; struct slave *slave; - read_lock(&bond->lock); - if (!bond_has_slaves(bond)) { bond_info->tx_rebalance_counter = 0; bond_info->lp_counter = 0; goto re_arm; } + rcu_read_lock(); + bond_info->tx_rebalance_counter++; bond_info->lp_counter++; @@ -1514,7 +1514,7 @@ void bond_alb_monitor(struct work_struct *work) */ read_lock(&bond->curr_slave_lock); - bond_for_each_slave(bond, slave, iter) + bond_for_each_slave_rcu(bond, slave, iter) alb_send_learning_packets(slave, slave->dev->dev_addr); read_unlock(&bond->curr_slave_lock); @@ -1527,7 +1527,7 @@ void bond_alb_monitor(struct work_struct *work) read_lock(&bond->curr_slave_lock); - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { tlb_clear_slave(bond, slave, 1); if (slave == bond->curr_active_slave) { SLAVE_TLB_INFO(slave).load = @@ -1551,11 +1551,9 @@ void bond_alb_monitor(struct work_struct *work) * dev_set_promiscuity requires rtnl and * nothing else. Avoid race with bond_close. */ - read_unlock(&bond->lock); - if (!rtnl_trylock()) { - read_lock(&bond->lock); + rcu_read_unlock(); + if (!rtnl_trylock()) goto re_arm; - } bond_info->rlb_promisc_timeout_counter = 0; @@ -1567,7 +1565,7 @@ void bond_alb_monitor(struct work_struct *work) bond_info->primary_is_promisc = 0; rtnl_unlock(); - read_lock(&bond->lock); + rcu_read_lock(); } if (bond_info->rlb_rebalance) { @@ -1589,11 +1587,9 @@ void bond_alb_monitor(struct work_struct *work) } } } - + rcu_read_unlock(); re_arm: queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks); - - read_unlock(&bond->lock); } /* assumption: called before the slave is attached to the bond From 2e52f4fe3655c7a2311070c6713f7feabc75486c Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:19:50 +0800 Subject: [PATCH 04/11] bonding: rebuild the lock use for bond_loadbalance_arp_mon() The bond_loadbalance_arp_mon() use the bond lock to protect the bond slave list, it is no effect, so I could use RTNL or RCU to replace it, considering the performance impact, the RCU is more better here, so the bond lock replace with the RCU. The bond_select_active_slave() need RTNL and curr_slave_lock together, but there is no RTNL lock here, so add a rtnl_rtylock. Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index b34634a96710..1781ea620d76 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2411,12 +2411,12 @@ void bond_loadbalance_arp_mon(struct work_struct *work) struct list_head *iter; int do_failover = 0; - read_lock(&bond->lock); - if (!bond_has_slaves(bond)) goto re_arm; - oldcurrent = bond->curr_active_slave; + rcu_read_lock(); + + oldcurrent = ACCESS_ONCE(bond->curr_active_slave); /* see if any of the previous devices are up now (i.e. they have * xmt and rcv traffic). the curr_active_slave does not come into * the picture unless it is null. also, slave->jiffies is not needed @@ -2425,7 +2425,7 @@ void bond_loadbalance_arp_mon(struct work_struct *work) * TODO: what about up/down delay in arp mode? it wasn't here before * so it can wait */ - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { unsigned long trans_start = dev_trans_start(slave->dev); if (slave->link != BOND_LINK_UP) { @@ -2487,7 +2487,14 @@ void bond_loadbalance_arp_mon(struct work_struct *work) bond_arp_send_all(bond, slave); } + rcu_read_unlock(); + if (do_failover) { + /* the bond_select_active_slave must hold RTNL + * and curr_slave_lock for write. + */ + if (!rtnl_trylock()) + goto re_arm; block_netpoll_tx(); write_lock_bh(&bond->curr_slave_lock); @@ -2495,14 +2502,13 @@ void bond_loadbalance_arp_mon(struct work_struct *work) write_unlock_bh(&bond->curr_slave_lock); unblock_netpoll_tx(); + rtnl_unlock(); } re_arm: if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, msecs_to_jiffies(bond->params.arp_interval)); - - read_unlock(&bond->lock); } /* From e001bfad913bf119fb67c1e8dd2d4ec1f5d392fa Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:19:55 +0800 Subject: [PATCH 05/11] bonding: create bond_first_slave_rcu() The bond_first_slave_rcu() will be used to instead of bond_first_slave() in rcu_read_lock(). According to the Jay Vosburgh's suggestion, the struct netdev_adjacent should hide from users who wanted to use it directly. so I package a new function to get the first slave of the bond. Suggested-by: Nikolay Aleksandrov Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bonding.h | 4 ++++ include/linux/netdevice.h | 1 + net/core/dev.c | 21 +++++++++++++++++++++ 3 files changed, 26 insertions(+) diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h index 8283cbdec50a..8f0d6d0c383b 100644 --- a/drivers/net/bonding/bonding.h +++ b/drivers/net/bonding/bonding.h @@ -101,6 +101,10 @@ netdev_adjacent_get_private(bond_slave_list(bond)->prev) : \ NULL) +/* Caller must have rcu_read_lock */ +#define bond_first_slave_rcu(bond) \ + netdev_lower_get_first_private_rcu(bond->dev) + #define bond_is_first_slave(bond, pos) (pos == bond_first_slave(bond)) #define bond_is_last_slave(bond, pos) (pos == bond_last_slave(bond)) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5260d2eae2e6..2c74d20dad34 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2907,6 +2907,7 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, priv = netdev_lower_get_next_private_rcu(dev, &(iter))) void *netdev_adjacent_get_private(struct list_head *adj_list); +void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); diff --git a/net/core/dev.c b/net/core/dev.c index c95d664b2b42..9d4369ece679 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4543,6 +4543,27 @@ void *netdev_lower_get_next_private_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_lower_get_next_private_rcu); +/** + * netdev_lower_get_first_private_rcu - Get the first ->private from the + * lower neighbour list, RCU + * variant + * @dev: device + * + * Gets the first netdev_adjacent->private from the dev's lower neighbour + * list. The caller must hold RCU read lock. + */ +void *netdev_lower_get_first_private_rcu(struct net_device *dev) +{ + struct netdev_adjacent *lower; + + lower = list_first_or_null_rcu(&dev->adj_list.lower, + struct netdev_adjacent, list); + if (lower) + return lower->private; + return NULL; +} +EXPORT_SYMBOL(netdev_lower_get_first_private_rcu); + /** * netdev_master_upper_dev_get_rcu - Get master upper device * @dev: device From eb9fa4b0199f62df3d174d32b4bd534df8ba4533 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:20:02 +0800 Subject: [PATCH 06/11] bonding: rebuild the lock use for bond_activebackup_arp_mon() The bond_activebackup_arp_mon() use the bond lock for read to protect the slave list, it is no effect, and the RTNL is only called for bond_ab_arp_commit() and peer notify, for the performance better, use RCU to replace with the bond lock, to the bond slave list need to called in RCU, add a new bond_first_slave_rcu() to get the first slave in RCU protection. In bond_ab_arp_probe(), the bond->current_arp_slave may changd if bond release slave, just like: bond_ab_arp_probe() bond_release() cpu 0 cpu 1 ... if (bond->current_arp_slave...) ... ... bond->current_arp_slave = NULl bond->current_arp_slave->dev->name ... So the current_arp_slave need to dereference in the section. Suggested-by: Nikolay Aleksandrov Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 45 +++++++++++++++------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1781ea620d76..bad1bf949546 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -2517,7 +2517,7 @@ re_arm: * place for the slave. Returns 0 if no changes are found, >0 if changes * to link states must be committed. * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold. */ static int bond_ab_arp_inspect(struct bonding *bond) { @@ -2526,7 +2526,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) struct slave *slave; int commit = 0; - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { slave->new_link = BOND_LINK_NOCHANGE; last_rx = slave_last_rx(bond, slave); @@ -2588,7 +2588,7 @@ static int bond_ab_arp_inspect(struct bonding *bond) * Called to commit link state changes noted by inspection step of * active-backup mode ARP monitor. * - * Called with RTNL and bond->lock for read. + * Called with RTNL hold. */ static void bond_ab_arp_commit(struct bonding *bond) { @@ -2663,19 +2663,20 @@ do_failover: /* * Send ARP probes for active-backup mode ARP monitor. * - * Called with bond->lock held for read. + * Called with rcu_read_lock hold. */ static void bond_ab_arp_probe(struct bonding *bond) { - struct slave *slave, *before = NULL, *new_slave = NULL; + struct slave *slave, *before = NULL, *new_slave = NULL, + *curr_arp_slave = rcu_dereference(bond->current_arp_slave); struct list_head *iter; bool found = false; read_lock(&bond->curr_slave_lock); - if (bond->current_arp_slave && bond->curr_active_slave) + if (curr_arp_slave && bond->curr_active_slave) pr_info("PROBE: c_arp %s && cas %s BAD\n", - bond->current_arp_slave->dev->name, + curr_arp_slave->dev->name, bond->curr_active_slave->dev->name); if (bond->curr_active_slave) { @@ -2691,15 +2692,15 @@ static void bond_ab_arp_probe(struct bonding *bond) * for becoming the curr_active_slave */ - if (!bond->current_arp_slave) { - bond->current_arp_slave = bond_first_slave(bond); - if (!bond->current_arp_slave) + if (!curr_arp_slave) { + curr_arp_slave = bond_first_slave_rcu(bond); + if (!curr_arp_slave) return; } - bond_set_slave_inactive_flags(bond->current_arp_slave); + bond_set_slave_inactive_flags(curr_arp_slave); - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { if (!found && !before && IS_UP(slave->dev)) before = slave; @@ -2722,7 +2723,7 @@ static void bond_ab_arp_probe(struct bonding *bond) pr_info("%s: backup interface %s is now down.\n", bond->dev->name, slave->dev->name); } - if (slave == bond->current_arp_slave) + if (slave == curr_arp_slave) found = true; } @@ -2736,8 +2737,7 @@ static void bond_ab_arp_probe(struct bonding *bond) bond_set_slave_active_flags(new_slave); bond_arp_send_all(bond, new_slave); new_slave->jiffies = jiffies; - bond->current_arp_slave = new_slave; - + rcu_assign_pointer(bond->current_arp_slave, new_slave); } void bond_activebackup_arp_mon(struct work_struct *work) @@ -2747,43 +2747,38 @@ void bond_activebackup_arp_mon(struct work_struct *work) bool should_notify_peers = false; int delta_in_ticks; - read_lock(&bond->lock); - delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval); if (!bond_has_slaves(bond)) goto re_arm; + rcu_read_lock(); + should_notify_peers = bond_should_notify_peers(bond); if (bond_ab_arp_inspect(bond)) { - read_unlock(&bond->lock); + rcu_read_unlock(); /* Race avoidance with bond_close flush of workqueue */ if (!rtnl_trylock()) { - read_lock(&bond->lock); delta_in_ticks = 1; should_notify_peers = false; goto re_arm; } - read_lock(&bond->lock); - bond_ab_arp_commit(bond); - read_unlock(&bond->lock); rtnl_unlock(); - read_lock(&bond->lock); + rcu_read_lock(); } bond_ab_arp_probe(bond); + rcu_read_unlock(); re_arm: if (bond->params.arp_interval) queue_delayed_work(bond->wq, &bond->arp_work, delta_in_ticks); - read_unlock(&bond->lock); - if (should_notify_peers) { if (!rtnl_trylock()) return; From c8517035445834350b8d498723b68f4e81286110 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:20:07 +0800 Subject: [PATCH 07/11] bonding: remove unwanted lock for bond enslave and release The bond_change_active_slave() and bond_select_active_slave() do't need bond lock anymore, so remove the unwanted bond lock for these two functions. The bond_select_active_slave() will release and acquire curr_slave_lock, so the curr_slave_lock need to protect the function. In bond enslave and bond release, the bond slave list is also protected by RTNL, so bond lock is no need to exist, remove the lock and clean the functions. Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index bad1bf949546..1b1dd01fbf72 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1589,11 +1589,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) bond_set_carrier(bond); if (USES_PRIMARY(bond->params.mode)) { - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); } pr_info("%s: enslaving %s as a%s interface with a%s link.\n", @@ -1613,19 +1611,13 @@ err_detach: bond_hw_addr_flush(bond_dev, slave_dev); vlan_vids_del_by_dev(slave_dev, bond_dev); - write_lock_bh(&bond->lock); if (bond->primary_slave == new_slave) bond->primary_slave = NULL; if (bond->curr_active_slave == new_slave) { - bond_change_active_slave(bond, NULL); - write_unlock_bh(&bond->lock); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); + bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); - } else { - write_unlock_bh(&bond->lock); } slave_disable_netpoll(new_slave); @@ -1690,20 +1682,16 @@ static int __bond_release_one(struct net_device *bond_dev, } block_netpoll_tx(); - write_lock_bh(&bond->lock); slave = bond_get_slave_by_dev(bond, slave_dev); if (!slave) { /* not a slave of this bond */ pr_info("%s: %s not enslaved\n", bond_dev->name, slave_dev->name); - write_unlock_bh(&bond->lock); unblock_netpoll_tx(); return -EINVAL; } - write_unlock_bh(&bond->lock); - /* release the slave from its bond */ bond->slave_cnt--; @@ -1721,6 +1709,7 @@ static int __bond_release_one(struct net_device *bond_dev, */ bond_3ad_unbind_slave(slave); } + write_unlock_bh(&bond->lock); pr_info("%s: releasing %s interface %s\n", bond_dev->name, @@ -1743,8 +1732,11 @@ static int __bond_release_one(struct net_device *bond_dev, if (bond->primary_slave == slave) bond->primary_slave = NULL; - if (oldcurrent == slave) + if (oldcurrent == slave) { + write_lock_bh(&bond->curr_slave_lock); bond_change_active_slave(bond, NULL); + write_unlock_bh(&bond->curr_slave_lock); + } if (bond_is_lb(bond)) { /* Must be called only after the slave has been @@ -1752,9 +1744,7 @@ static int __bond_release_one(struct net_device *bond_dev, * has been cleared (if our_slave == old_current), * but before a new active slave is selected. */ - write_unlock_bh(&bond->lock); bond_alb_deinit_slave(bond, slave); - write_lock_bh(&bond->lock); } if (all) { @@ -1765,15 +1755,11 @@ static int __bond_release_one(struct net_device *bond_dev, * is no concern that another slave add/remove event * will interfere. */ - write_unlock_bh(&bond->lock); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); - write_lock_bh(&bond->lock); } if (!bond_has_slaves(bond)) { @@ -1788,7 +1774,6 @@ static int __bond_release_one(struct net_device *bond_dev, } } - write_unlock_bh(&bond->lock); unblock_netpoll_tx(); synchronize_rcu(); From be79bd048abe9fb6aee049ea903d1e70b44d6480 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:20:12 +0800 Subject: [PATCH 08/11] bonding: add RCU for bond_3ad_state_machine_handler() The bond_3ad_state_machine_handler() use the bond lock to protect the bond slave list and slave port together, but it is not enough, the bond slave list was link and unlink in RTNL, not bond lock, so I add RCU to protect the slave list from leaving. The bond lock is still used here, because when the slave has been removed from the list by the time the state machine runs, it appears to be possible for both function to manupulate the same aggregator->lag_ports by finding the aggregator via two different ports that are both members of that aggregator (i.e., port A of the agg is being unbound, and port B of the agg is runing its state machine). If I remove the bond lock, there are nothing to mutex changes to aggregator->lag_ports between bond_3ad_state_machine_handler and bond_3ad_unbind_slave, So the bond lock is the simplest way to protect aggregator->lag_ports. There was a lot of function need RCU protect, I have two choice to make the function in RCU-safe, (1) create new similar functions and make the bond slave list in RCU. (2) modify the existed functions and make them in read-side critical section, because the RCU read-side critical sections may be nested. I choose (2) because it is no need to create more similar functions. The nots in the function is still too old, clean up the nots. Suggested-by: Nikolay Aleksandrov Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 54 ++++++++++++++++++++------------- drivers/net/bonding/bond_main.c | 7 ++--- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index 187b1b7772ef..58c2249a3324 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -147,11 +147,12 @@ static inline struct aggregator *__get_first_agg(struct port *port) struct bonding *bond = __get_bond_by_port(port); struct slave *first_slave; - // If there's no bond for this port, or bond has no slaves + /* If there's no bond for this port, or bond has no slaves */ if (bond == NULL) return NULL; - first_slave = bond_first_slave(bond); - + rcu_read_lock(); + first_slave = bond_first_slave_rcu(bond); + rcu_read_unlock(); return first_slave ? &(SLAVE_AD_INFO(first_slave).aggregator) : NULL; } @@ -702,9 +703,13 @@ static struct aggregator *__get_active_agg(struct aggregator *aggregator) struct list_head *iter; struct slave *slave; - bond_for_each_slave(bond, slave, iter) - if (SLAVE_AD_INFO(slave).aggregator.is_active) + rcu_read_lock(); + bond_for_each_slave_rcu(bond, slave, iter) + if (SLAVE_AD_INFO(slave).aggregator.is_active) { + rcu_read_unlock(); return &(SLAVE_AD_INFO(slave).aggregator); + } + rcu_read_unlock(); return NULL; } @@ -1471,7 +1476,8 @@ static void ad_agg_selection_logic(struct aggregator *agg) active = __get_active_agg(agg); best = (active && agg_device_up(active)) ? active : NULL; - bond_for_each_slave(bond, slave, iter) { + rcu_read_lock(); + bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave).aggregator); agg->is_active = 0; @@ -1505,7 +1511,7 @@ static void ad_agg_selection_logic(struct aggregator *agg) active->is_active = 1; } - // if there is new best aggregator, activate it + /* if there is new best aggregator, activate it */ if (best) { pr_debug("best Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", best->aggregator_identifier, best->num_of_ports, @@ -1516,7 +1522,7 @@ static void ad_agg_selection_logic(struct aggregator *agg) best->lag_ports, best->slave, best->slave ? best->slave->dev->name : "NULL"); - bond_for_each_slave(bond, slave, iter) { + bond_for_each_slave_rcu(bond, slave, iter) { agg = &(SLAVE_AD_INFO(slave).aggregator); pr_debug("Agg=%d; P=%d; a k=%d; p k=%d; Ind=%d; Act=%d\n", @@ -1526,10 +1532,11 @@ static void ad_agg_selection_logic(struct aggregator *agg) agg->is_individual, agg->is_active); } - // check if any partner replys + /* check if any partner replys */ if (best->is_individual) { pr_warning("%s: Warning: No 802.3ad response from the link partner for any adapters in the bond\n", - best->slave ? best->slave->bond->dev->name : "NULL"); + best->slave ? + best->slave->bond->dev->name : "NULL"); } best->is_active = 1; @@ -1541,7 +1548,7 @@ static void ad_agg_selection_logic(struct aggregator *agg) best->partner_oper_aggregator_key, best->is_individual, best->is_active); - // disable the ports that were related to the former active_aggregator + /* disable the ports that were related to the former active_aggregator */ if (active) { for (port = active->lag_ports; port; port = port->next_port_in_aggregator) { @@ -1565,6 +1572,8 @@ static void ad_agg_selection_logic(struct aggregator *agg) } } + rcu_read_unlock(); + bond_3ad_set_carrier(bond); } @@ -2069,17 +2078,18 @@ void bond_3ad_state_machine_handler(struct work_struct *work) struct port *port; read_lock(&bond->lock); + rcu_read_lock(); - //check if there are any slaves + /* check if there are any slaves */ if (!bond_has_slaves(bond)) goto re_arm; - // check if agg_select_timer timer after initialize is timed out + /* check if agg_select_timer timer after initialize is timed out */ if (BOND_AD_INFO(bond).agg_select_timer && !(--BOND_AD_INFO(bond).agg_select_timer)) { - slave = bond_first_slave(bond); + slave = bond_first_slave_rcu(bond); port = slave ? &(SLAVE_AD_INFO(slave).port) : NULL; - // select the active aggregator for the bond + /* select the active aggregator for the bond */ if (port) { if (!port->slave) { pr_warning("%s: Warning: bond's first port is uninitialized\n", @@ -2093,8 +2103,8 @@ void bond_3ad_state_machine_handler(struct work_struct *work) bond_3ad_set_carrier(bond); } - // for each port run the state machines - bond_for_each_slave(bond, slave, iter) { + /* for each port run the state machines */ + bond_for_each_slave_rcu(bond, slave, iter) { port = &(SLAVE_AD_INFO(slave).port); if (!port->slave) { pr_warning("%s: Warning: Found an uninitialized port\n", @@ -2114,7 +2124,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work) ad_mux_machine(port); ad_tx_machine(port); - // turn off the BEGIN bit, since we already handled it + /* turn off the BEGIN bit, since we already handled it */ if (port->sm_vars & AD_PORT_BEGIN) port->sm_vars &= ~AD_PORT_BEGIN; @@ -2122,9 +2132,9 @@ void bond_3ad_state_machine_handler(struct work_struct *work) } re_arm: - queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); - + rcu_read_unlock(); read_unlock(&bond->lock); + queue_delayed_work(bond->wq, &bond->ad_work, ad_delta_in_ticks); } /** @@ -2303,7 +2313,9 @@ int bond_3ad_set_carrier(struct bonding *bond) struct aggregator *active; struct slave *first_slave; - first_slave = bond_first_slave(bond); + rcu_read_lock(); + first_slave = bond_first_slave_rcu(bond); + rcu_read_unlock(); if (!first_slave) return 0; active = __get_active_agg(&(SLAVE_AD_INFO(first_slave).aggregator)); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1b1dd01fbf72..720a826eb071 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1703,12 +1703,9 @@ static int __bond_release_one(struct net_device *bond_dev, write_lock_bh(&bond->lock); /* Inform AD package of unbinding of slave. */ - if (bond->params.mode == BOND_MODE_8023AD) { - /* must be called before the slave is - * detached from the list - */ + if (bond->params.mode == BOND_MODE_8023AD) bond_3ad_unbind_slave(slave); - } + write_unlock_bh(&bond->lock); pr_info("%s: releasing %s interface %s\n", From 4e789fc1a62d4c25726ee053044399d71471fe2f Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:20:17 +0800 Subject: [PATCH 09/11] bonding: remove unwanted lock for bond_option_active_slave_set() The bond_option_active_slave_set() is always called in RTNL, the RTNL could protect bond slave list, so remove the unwanted bond lock. Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_options.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index dfef673d53d1..600779e5904f 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -107,7 +107,6 @@ int bond_option_active_slave_set(struct bonding *bond, } block_netpoll_tx(); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); /* check to see if we are clearing active */ @@ -142,7 +141,6 @@ int bond_option_active_slave_set(struct bonding *bond, } write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); unblock_netpoll_tx(); return ret; } From 75ad932c182d0b3d2cab24a2e2252bb7acd42d45 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:20:22 +0800 Subject: [PATCH 10/11] bonding: remove unwanted lock for bond_store_primaryxxx() The bond_select_active_slave() will not release and acquire bond lock, so it is no need to read the bond lock for them, and the bond_store_primaryxxx() is already in RTNL, so remove the unwanted lock. Suggested-by: Jay Vosburgh Suggested-by: Veaceslav Falico Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_sysfs.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index dad9bea95122..6368d299d5a6 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -878,7 +878,6 @@ static ssize_t bonding_store_primary(struct device *d, if (!rtnl_trylock()) return restart_syscall(); block_netpoll_tx(); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); if (!USES_PRIMARY(bond->params.mode)) { @@ -918,7 +917,6 @@ static ssize_t bonding_store_primary(struct device *d, bond->dev->name, ifname, bond->dev->name); out: write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); unblock_netpoll_tx(); rtnl_unlock(); @@ -966,11 +964,9 @@ static ssize_t bonding_store_primary_reselect(struct device *d, new_value); block_netpoll_tx(); - read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); bond_select_active_slave(bond); write_unlock_bh(&bond->curr_slave_lock); - read_unlock(&bond->lock); unblock_netpoll_tx(); out: rtnl_unlock(); From f23691095b3c2c05b3ee4ab03cb8e90cfe245ea8 Mon Sep 17 00:00:00 2001 From: dingtianhong Date: Fri, 13 Dec 2013 10:20:26 +0800 Subject: [PATCH 11/11] bonding: rebuild the bond_resend_igmp_join_requests_delayed() The bond_resend_igmp_join_requests_delayed() and bond_resend_igmp_join_requests() should be integrated, because the bond_resend_igmp_join_requests_delayed() did nothing except bond_resend_igmp_join_requests(). The bond igmp_retrans could only be changed in bond_change_active_slave and here, bond_change_active_slave will be called in RTNL and curr_slave_lock, the bond_resend_igmp_join_requests already hold RTNL, so no need to free RTNL and hold curr_slave_lock again, it may be a small optimization, so move the igmp_retrans in RTNL and remove the curr_slave_lock. Signed-off-by: Ding Tianhong Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 720a826eb071..c0456cc86610 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -591,33 +591,22 @@ static int bond_set_allmulti(struct bonding *bond, int inc) * device and retransmit an IGMP JOIN request to the current active * slave. */ -static void bond_resend_igmp_join_requests(struct bonding *bond) -{ - if (!rtnl_trylock()) { - queue_delayed_work(bond->wq, &bond->mcast_work, 1); - return; - } - call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); - rtnl_unlock(); - - /* We use curr_slave_lock to protect against concurrent access to - * igmp_retrans from multiple running instances of this function and - * bond_change_active_slave - */ - write_lock_bh(&bond->curr_slave_lock); - if (bond->igmp_retrans > 1) { - bond->igmp_retrans--; - queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); - } - write_unlock_bh(&bond->curr_slave_lock); -} - static void bond_resend_igmp_join_requests_delayed(struct work_struct *work) { struct bonding *bond = container_of(work, struct bonding, mcast_work.work); - bond_resend_igmp_join_requests(bond); + if (!rtnl_trylock()) { + queue_delayed_work(bond->wq, &bond->mcast_work, 1); + return; + } + call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev); + + if (bond->igmp_retrans > 1) { + bond->igmp_retrans--; + queue_delayed_work(bond->wq, &bond->mcast_work, HZ/5); + } + rtnl_unlock(); } /* Flush bond's hardware addresses from slave