From 42ce4126d8bc2e128e1f207cf79bb0623fac498f Mon Sep 17 00:00:00 2001 From: Akeem G Abodunrin Date: Fri, 8 Nov 2013 01:54:07 +0000 Subject: [PATCH 1/7] igb: Fixed Wake On LAN support This patch fixes Wake on LAN being reported as supported on some Ethernet ports, in contrary to Hardware capability. Signed-off-by: Akeem G Abodunrin Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_ethtool.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index b0f3666b1d7f..c3143da497c8 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2062,14 +2062,15 @@ static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) { struct igb_adapter *adapter = netdev_priv(netdev); - wol->supported = WAKE_UCAST | WAKE_MCAST | - WAKE_BCAST | WAKE_MAGIC | - WAKE_PHY; wol->wolopts = 0; if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) return; + wol->supported = WAKE_UCAST | WAKE_MCAST | + WAKE_BCAST | WAKE_MAGIC | + WAKE_PHY; + /* apply any specific unsupported masks here */ switch (adapter->hw.device_id) { default: From 6a7d64e3e09e11181a07a2e8cd6af5d6355133be Mon Sep 17 00:00:00 2001 From: yzhu1 Date: Sat, 23 Nov 2013 07:07:40 +0000 Subject: [PATCH 2/7] e1000: prevent oops when adapter is being closed and reset simultaneously This change is based on a similar change made to e1000e support in commit bb9e44d0d0f4 ("e1000e: prevent oops when adapter is being closed and reset simultaneously"). The same issue has also been observed on the older e1000 cards. Here, we have increased the RESET_COUNT value to 50 because there are too many accesses to e1000 nic on stress tests to e1000 nic, it is not enough to set RESET_COUT 25. Experimentation has shown that it is enough to set RESET_COUNT 50. Signed-off-by: yzhu1 Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 5 +++++ drivers/net/ethernet/intel/e1000/e1000_main.c | 9 +++++++++ 2 files changed, 14 insertions(+) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index 58c147271a36..e4093d1f64cb 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -83,6 +83,11 @@ struct e1000_adapter; #define E1000_MAX_INTR 10 +/* + * Count for polling __E1000_RESET condition every 10-20msec. + */ +#define E1000_CHECK_RESET_COUNT 50 + /* TX/RX descriptor defines */ #define E1000_DEFAULT_TXD 256 #define E1000_MAX_TXD 256 diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index e38622825fa7..c0f52174e4c8 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1440,6 +1440,10 @@ static int e1000_close(struct net_device *netdev) { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + int count = E1000_CHECK_RESET_COUNT; + + while (test_bit(__E1000_RESETTING, &adapter->flags) && count--) + usleep_range(10000, 20000); WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); @@ -4963,6 +4967,11 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) netif_device_detach(netdev); if (netif_running(netdev)) { + int count = E1000_CHECK_RESET_COUNT; + + while (test_bit(__E1000_RESETTING, &adapter->flags) && count--) + usleep_range(10000, 20000); + WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); } From b2f963bfaebadc9117b29f806630ea3bcaec403d Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Sat, 23 Nov 2013 07:17:56 +0000 Subject: [PATCH 3/7] e1000: fix lockdep warning in e1000_reset_task The patch fixes the following lockdep warning, which is 100% reproducible on network restart: ====================================================== [ INFO: possible circular locking dependency detected ] 3.12.0+ #47 Tainted: GF ------------------------------------------------------- kworker/1:1/27 is trying to acquire lock: ((&(&adapter->watchdog_task)->work)){+.+...}, at: [] flush_work+0x0/0x70 but task is already holding lock: (&adapter->mutex){+.+...}, at: [] e1000_reset_task+0x4a/0xa0 [e1000] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #1 (&adapter->mutex){+.+...}: [] lock_acquire+0x9d/0x120 [] mutex_lock_nested+0x4c/0x390 [] e1000_watchdog+0x7d/0x5b0 [e1000] [] process_one_work+0x1d2/0x510 [] worker_thread+0x120/0x3a0 [] kthread+0xee/0x110 [] ret_from_fork+0x7c/0xb0 -> #0 ((&(&adapter->watchdog_task)->work)){+.+...}: [] __lock_acquire+0x1710/0x1810 [] lock_acquire+0x9d/0x120 [] flush_work+0x3b/0x70 [] __cancel_work_timer+0x98/0x140 [] cancel_delayed_work_sync+0x13/0x20 [] e1000_down_and_stop+0x3c/0x60 [e1000] [] e1000_down+0x131/0x220 [e1000] [] e1000_reset_task+0x52/0xa0 [e1000] [] process_one_work+0x1d2/0x510 [] worker_thread+0x120/0x3a0 [] kthread+0xee/0x110 [] ret_from_fork+0x7c/0xb0 other info that might help us debug this: Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&adapter->mutex); lock((&(&adapter->watchdog_task)->work)); lock(&adapter->mutex); lock((&(&adapter->watchdog_task)->work)); *** DEADLOCK *** 3 locks held by kworker/1:1/27: #0: (events){.+.+.+}, at: [] process_one_work+0x166/0x510 #1: ((&adapter->reset_task)){+.+...}, at: [] process_one_work+0x166/0x510 #2: (&adapter->mutex){+.+...}, at: [] e1000_reset_task+0x4a/0xa0 [e1000] stack backtrace: CPU: 1 PID: 27 Comm: kworker/1:1 Tainted: GF 3.12.0+ #47 Hardware name: System manufacturer System Product Name/P5B-VM SE, BIOS 0501 05/31/2007 Workqueue: events e1000_reset_task [e1000] ffffffff820f6000 ffff88007b9dba98 ffffffff816b54a2 0000000000000002 ffffffff820f5e50 ffff88007b9dbae8 ffffffff810ba936 ffff88007b9dbac8 ffff88007b9dbb48 ffff88007b9d8f00 ffff88007b9d8780 ffff88007b9d8f00 Call Trace: [] dump_stack+0x49/0x5f [] print_circular_bug+0x216/0x310 [] __lock_acquire+0x1710/0x1810 [] ? __flush_work+0x250/0x250 [] lock_acquire+0x9d/0x120 [] ? __flush_work+0x250/0x250 [] flush_work+0x3b/0x70 [] ? __flush_work+0x250/0x250 [] __cancel_work_timer+0x98/0x140 [] cancel_delayed_work_sync+0x13/0x20 [] e1000_down_and_stop+0x3c/0x60 [e1000] [] e1000_down+0x131/0x220 [e1000] [] e1000_reset_task+0x52/0xa0 [e1000] [] process_one_work+0x1d2/0x510 [] ? process_one_work+0x166/0x510 [] worker_thread+0x120/0x3a0 [] ? manage_workers+0x2c0/0x2c0 [] kthread+0xee/0x110 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? __init_kthread_worker+0x70/0x70 == The issue background == The problem occurs, because e1000_down(), which is called under adapter->mutex by e1000_reset_task(), tries to synchronously cancel e1000 auxiliary works (reset_task, watchdog_task, phy_info_task, fifo_stall_task), which take adapter->mutex in their handlers. So the question is what does adapter->mutex protect there? The adapter->mutex was introduced by commit 0ef4ee ("e1000: convert to private mutex from rtnl") as a replacement for rtnl_lock() taken in the asynchronous handlers. It targeted on fixing a similar lockdep warning issued when e1000_down() was called under rtnl_lock(), and it fixed it, but unfortunately it introduced the lockdep warning described above. Anyway, that said the source of this bug is that the asynchronous works were made to take rtnl_lock() some time ago, so let's look deeper and find why it was added there. The rtnl_lock() was added to asynchronous handlers by commit 338c15 ("e1000: fix occasional panic on unload") in order to prevent asynchronous handlers from execution after the module is unloaded (e1000_down() is called) as it follows from the comment to the commit: > Net drivers in general have an issue where timers fired > by mod_timer or work threads with schedule_work are running > outside of the rtnl_lock. > > With no other lock protection these routines are vulnerable > to races with driver unload or reset paths. > > The longer term solution to this might be a redesign with > safer locks being taken in the driver to guarantee no > reentrance, but for now a safe and effective fix is > to take the rtnl_lock in these routines. I'm not sure if this locking scheme fixed the problem or just made it unlikely, although I incline to the latter. Anyway, this was long time ago when e1000 auxiliary works were implemented as timers scheduling real work handlers in their routines. The e1000_down() function only canceled the timers, but left the real handlers running if they were running, which could result in work execution after module unload. Today, the e1000 driver uses sane delayed works instead of the pair timer+work to implement its delayed asynchronous handlers, and the e1000_down() synchronously cancels all the works so that the problem that commit 338c15 tried to cope with disappeared, and we don't need any locks in the handlers any more. Moreover, any locking there can potentially result in a deadlock. So, this patch reverts commits 0ef4ee and 338c15. Fixes: 0ef4eedc2e98 ("e1000: convert to private mutex from rtnl") Fixes: 338c15e470d8 ("e1000: fix occasional panic on unload") Cc: Tushar Dave Cc: Patrick McHardy Signed-off-by: Vladimir Davydov Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 2 -- drivers/net/ethernet/intel/e1000/e1000_main.c | 36 ++----------------- 2 files changed, 3 insertions(+), 35 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index e4093d1f64cb..f9313b36c887 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -317,8 +317,6 @@ struct e1000_adapter { struct delayed_work watchdog_task; struct delayed_work fifo_stall_task; struct delayed_work phy_info_task; - - struct mutex mutex; }; enum e1000_state_t { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index c0f52174e4c8..619b0cb60f2f 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -544,21 +544,8 @@ void e1000_down(struct e1000_adapter *adapter) e1000_clean_all_rx_rings(adapter); } -static void e1000_reinit_safe(struct e1000_adapter *adapter) -{ - while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) - msleep(1); - mutex_lock(&adapter->mutex); - e1000_down(adapter); - e1000_up(adapter); - mutex_unlock(&adapter->mutex); - clear_bit(__E1000_RESETTING, &adapter->flags); -} - void e1000_reinit_locked(struct e1000_adapter *adapter) { - /* if rtnl_lock is not held the call path is bogus */ - ASSERT_RTNL(); WARN_ON(in_interrupt()); while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); @@ -1316,7 +1303,6 @@ static int e1000_sw_init(struct e1000_adapter *adapter) e1000_irq_disable(adapter); spin_lock_init(&adapter->stats_lock); - mutex_init(&adapter->mutex); set_bit(__E1000_DOWN, &adapter->flags); @@ -2329,11 +2315,8 @@ static void e1000_update_phy_info_task(struct work_struct *work) struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, phy_info_task.work); - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; - mutex_lock(&adapter->mutex); + e1000_phy_get_info(&adapter->hw, &adapter->phy_info); - mutex_unlock(&adapter->mutex); } /** @@ -2349,9 +2332,6 @@ static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) struct net_device *netdev = adapter->netdev; u32 tctl; - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; - mutex_lock(&adapter->mutex); if (atomic_read(&adapter->tx_fifo_stall)) { if ((er32(TDT) == er32(TDH)) && (er32(TDFT) == er32(TDFH)) && @@ -2372,7 +2352,6 @@ static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) schedule_delayed_work(&adapter->fifo_stall_task, 1); } } - mutex_unlock(&adapter->mutex); } bool e1000_has_link(struct e1000_adapter *adapter) @@ -2426,10 +2405,6 @@ static void e1000_watchdog(struct work_struct *work) struct e1000_tx_ring *txdr = adapter->tx_ring; u32 link, tctl; - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; - - mutex_lock(&adapter->mutex); link = e1000_has_link(adapter); if ((netif_carrier_ok(netdev)) && link) goto link_up; @@ -2520,7 +2495,7 @@ link_up: adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); /* exit immediately since reset is imminent */ - goto unlock; + return; } } @@ -2548,9 +2523,6 @@ link_up: /* Reschedule the task */ if (!test_bit(__E1000_DOWN, &adapter->flags)) schedule_delayed_work(&adapter->watchdog_task, 2 * HZ); - -unlock: - mutex_unlock(&adapter->mutex); } enum latency_range { @@ -3499,10 +3471,8 @@ static void e1000_reset_task(struct work_struct *work) struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, reset_task); - if (test_bit(__E1000_DOWN, &adapter->flags)) - return; e_err(drv, "Reset adapter\n"); - e1000_reinit_safe(adapter); + e1000_reinit_locked(adapter); } /** From 74a1b1ea8a30b035aaad833bbd6b9263e72acfac Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Sat, 23 Nov 2013 07:18:01 +0000 Subject: [PATCH 4/7] e1000: fix possible reset_task running after adapter down On e1000_down(), we should ensure every asynchronous work is canceled before proceeding. Since the watchdog_task can schedule other works apart from itself, it should be stopped first, but currently it is stopped after the reset_task. This can result in the following race leading to the reset_task running after the module unload: e1000_down_and_stop(): e1000_watchdog(): ---------------------- ----------------- cancel_work_sync(reset_task) schedule_work(reset_task) cancel_delayed_work_sync(watchdog_task) The patch moves cancel_delayed_work_sync(watchdog_task) at the beginning of e1000_down_and_stop() thus ensuring the race is impossible. Cc: Tushar Dave Cc: Patrick McHardy Signed-off-by: Vladimir Davydov Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 619b0cb60f2f..46e6544ed1b7 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -494,13 +494,20 @@ static void e1000_down_and_stop(struct e1000_adapter *adapter) { set_bit(__E1000_DOWN, &adapter->flags); + cancel_delayed_work_sync(&adapter->watchdog_task); + + /* + * Since the watchdog task can reschedule other tasks, we should cancel + * it first, otherwise we can run into the situation when a work is + * still running after the adapter has been turned down. + */ + + cancel_delayed_work_sync(&adapter->phy_info_task); + cancel_delayed_work_sync(&adapter->fifo_stall_task); + /* Only kill reset task if adapter is not resetting */ if (!test_bit(__E1000_RESETTING, &adapter->flags)) cancel_work_sync(&adapter->reset_task); - - cancel_delayed_work_sync(&adapter->watchdog_task); - cancel_delayed_work_sync(&adapter->phy_info_task); - cancel_delayed_work_sync(&adapter->fifo_stall_task); } void e1000_down(struct e1000_adapter *adapter) From ae72c8d068b3440c0b45df284303794cfbc0f4f2 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Sat, 9 Nov 2013 07:11:26 +0000 Subject: [PATCH 5/7] ixgbe: ixgbe_fwd_ring_down needs to be static When compiling with -Wstrict-prototypes gcc catches a static I missed. ./ixgbe_main.c:4254: warning: no previous prototype for 'ixgbe_fwd_ring_down' Reported-by: Phillip Schmitt Signed-off-by: John Fastabend Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 0c55079ebee3..ad9d67095d52 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -4251,8 +4251,8 @@ static void ixgbe_disable_fwd_ring(struct ixgbe_fwd_adapter *vadapter, rx_ring->l2_accel_priv = NULL; } -int ixgbe_fwd_ring_down(struct net_device *vdev, - struct ixgbe_fwd_adapter *accel) +static int ixgbe_fwd_ring_down(struct net_device *vdev, + struct ixgbe_fwd_adapter *accel) { struct ixgbe_adapter *adapter = accel->real_adapter; unsigned int rxbase = accel->rx_base_queue; From 8bf1264d2f12c6fa8645d097275568c1b62c246c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 12 Nov 2013 12:13:29 +0000 Subject: [PATCH 6/7] ixgbe: turn NETIF_F_HW_L2FW_DOFFLOAD off by default NETIF_F_HW_L2FW_DOFFLOAD allows upper layer net devices such as macvlan to use queues in the hardware to directly submit and receive skbs. This creates a subtle change in the datapath though. One change being the skb may no longer use the root devices qdisc. Because users may not expect this we can't enable the feature by default unless the hardware can offload all the software functionality above it. So for now disable it by default and let users opt in. Signed-off-by: John Fastabend Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ad9d67095d52..cc06854296a3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -7986,10 +7986,9 @@ skip_sriov: NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_RXHASH | - NETIF_F_RXCSUM | - NETIF_F_HW_L2FW_DOFFLOAD; + NETIF_F_RXCSUM; - netdev->hw_features = netdev->features; + netdev->hw_features = netdev->features | NETIF_F_HW_L2FW_DOFFLOAD; switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: From 8821754704a661623083a9dedf425114199600ba Mon Sep 17 00:00:00 2001 From: Mark Rustad Date: Sat, 23 Nov 2013 03:19:19 +0000 Subject: [PATCH 7/7] ixgbe: Make ixgbe_identify_qsfp_module_generic static Correct a namespace complaint by making the function static and moving the prototype into the .c file. Signed-off-by: Mark Rustad Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c | 3 ++- drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index e4c676006be9..39217e5ff7dc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -46,6 +46,7 @@ static bool ixgbe_get_i2c_data(u32 *i2cctl); static void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw); static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id); static s32 ixgbe_get_phy_id(struct ixgbe_hw *hw); +static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw); /** * ixgbe_identify_phy_generic - Get physical layer module @@ -1164,7 +1165,7 @@ err_read_i2c_eeprom: * * Searches for and identifies the QSFP module and assigns appropriate PHY type **/ -s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) +static s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) { struct ixgbe_adapter *adapter = hw->back; s32 status = IXGBE_ERR_PHY_ADDR_INVALID; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h index aae900a256da..fffcbdd2bf0e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h @@ -145,7 +145,6 @@ s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw); s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw); s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); -s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw); s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, u16 *list_offset, u16 *data_offset);