From 19d478bbe690a37489f58843dec20a456573d89f Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Fri, 7 Oct 2011 03:53:51 +0000 Subject: [PATCH 01/13] ixgbe: bump version number Bump the version string to better match pair up with the out of tree driver that contains the same functionality. Signed-off-by: Don Skidmore Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 1f936c88ec67..1519a23421af 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -56,8 +56,8 @@ char ixgbe_driver_name[] = "ixgbe"; static const char ixgbe_driver_string[] = "Intel(R) 10 Gigabit PCI Express Network Driver"; #define MAJ 3 -#define MIN 4 -#define BUILD 8 +#define MIN 6 +#define BUILD 7 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" const char ixgbe_driver_version[] = DRV_VERSION; From a4010afef585b7142eb605e3a6e4210c0e1b2957 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 5 Oct 2011 07:24:41 +0000 Subject: [PATCH 02/13] e1000: convert hardware management from timers to threads Thomas Gleixner (tglx) reported that e1000 was delaying for many milliseconds (using mdelay) from inside timer/interrupt context. None of these paths are performance critical and can be moved into threads/work items. This patch implements the work items and the next patch changes the mdelays to msleeps. Signed-off-by: Jesse Brandeburg CC: Thomas Gleixner CC: Tushar Dave Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 10 +- drivers/net/ethernet/intel/e1000/e1000_main.c | 129 +++++++----------- 2 files changed, 55 insertions(+), 84 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index 4ea87b19ac1a..fc6fbbda98d9 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -214,9 +214,6 @@ struct e1000_rx_ring { /* board specific private data structure */ struct e1000_adapter { - struct timer_list tx_fifo_stall_timer; - struct timer_list watchdog_timer; - struct timer_list phy_info_timer; unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; u16 mng_vlan_id; u32 bd_number; @@ -237,7 +234,6 @@ struct e1000_adapter { u16 tx_itr; u16 rx_itr; - struct work_struct reset_task; u8 fc_autoneg; /* TX */ @@ -310,8 +306,10 @@ struct e1000_adapter { bool discarding; - struct work_struct fifo_stall_task; - struct work_struct phy_info_task; + struct work_struct reset_task; + struct delayed_work watchdog_task; + struct delayed_work fifo_stall_task; + struct delayed_work phy_info_task; }; enum e1000_state_t { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 4bbc05ad9ba1..a0c5ea0d3fd5 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -131,10 +131,8 @@ static void e1000_clean_tx_ring(struct e1000_adapter *adapter, static void e1000_clean_rx_ring(struct e1000_adapter *adapter, struct e1000_rx_ring *rx_ring); static void e1000_set_rx_mode(struct net_device *netdev); -static void e1000_update_phy_info(unsigned long data); static void e1000_update_phy_info_task(struct work_struct *work); -static void e1000_watchdog(unsigned long data); -static void e1000_82547_tx_fifo_stall(unsigned long data); +static void e1000_watchdog(struct work_struct *work); static void e1000_82547_tx_fifo_stall_task(struct work_struct *work); static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev); @@ -493,6 +491,15 @@ out: return; } +static void e1000_down_and_stop(struct e1000_adapter *adapter) +{ + set_bit(__E1000_DOWN, &adapter->flags); + cancel_work_sync(&adapter->reset_task); + cancel_delayed_work_sync(&adapter->watchdog_task); + cancel_delayed_work_sync(&adapter->phy_info_task); + cancel_delayed_work_sync(&adapter->fifo_stall_task); +} + void e1000_down(struct e1000_adapter *adapter) { struct e1000_hw *hw = &adapter->hw; @@ -522,13 +529,9 @@ void e1000_down(struct e1000_adapter *adapter) /* * Setting DOWN must be after irq_disable to prevent * a screaming interrupt. Setting DOWN also prevents - * timers and tasks from rescheduling. + * tasks from rescheduling. */ - set_bit(__E1000_DOWN, &adapter->flags); - - del_timer_sync(&adapter->tx_fifo_stall_timer); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); + e1000_down_and_stop(adapter); adapter->link_speed = 0; adapter->link_duplex = 0; @@ -1120,21 +1123,12 @@ static int __devinit e1000_probe(struct pci_dev *pdev, if (!is_valid_ether_addr(netdev->perm_addr)) e_err(probe, "Invalid MAC Address\n"); - init_timer(&adapter->tx_fifo_stall_timer); - adapter->tx_fifo_stall_timer.function = e1000_82547_tx_fifo_stall; - adapter->tx_fifo_stall_timer.data = (unsigned long)adapter; - init_timer(&adapter->watchdog_timer); - adapter->watchdog_timer.function = e1000_watchdog; - adapter->watchdog_timer.data = (unsigned long) adapter; - - init_timer(&adapter->phy_info_timer); - adapter->phy_info_timer.function = e1000_update_phy_info; - adapter->phy_info_timer.data = (unsigned long)adapter; - - INIT_WORK(&adapter->fifo_stall_task, e1000_82547_tx_fifo_stall_task); + INIT_DELAYED_WORK(&adapter->watchdog_task, e1000_watchdog); + INIT_DELAYED_WORK(&adapter->fifo_stall_task, + e1000_82547_tx_fifo_stall_task); + INIT_DELAYED_WORK(&adapter->phy_info_task, e1000_update_phy_info_task); INIT_WORK(&adapter->reset_task, e1000_reset_task); - INIT_WORK(&adapter->phy_info_task, e1000_update_phy_info_task); e1000_check_options(adapter); @@ -1279,13 +1273,7 @@ static void __devexit e1000_remove(struct pci_dev *pdev) struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; - set_bit(__E1000_DOWN, &adapter->flags); - del_timer_sync(&adapter->tx_fifo_stall_timer); - del_timer_sync(&adapter->watchdog_timer); - del_timer_sync(&adapter->phy_info_timer); - - cancel_work_sync(&adapter->reset_task); - + e1000_down_and_stop(adapter); e1000_release_manageability(adapter); unregister_netdev(netdev); @@ -1369,7 +1357,7 @@ static int __devinit e1000_alloc_queues(struct e1000_adapter *adapter) * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt - * handler is registered with the OS, the watchdog timer is started, + * handler is registered with the OS, the watchdog task is started, * and the stack is notified that the interface is ready. **/ @@ -2331,37 +2319,23 @@ static void e1000_set_rx_mode(struct net_device *netdev) kfree(mcarray); } -/* Need to wait a few seconds after link up to get diagnostic information from - * the phy */ - -static void e1000_update_phy_info(unsigned long data) -{ - struct e1000_adapter *adapter = (struct e1000_adapter *)data; - schedule_work(&adapter->phy_info_task); -} - +/** + * e1000_update_phy_info_task - get phy info + * @work: work struct contained inside adapter struct + * + * Need to wait a few seconds after link up to get diagnostic information from + * the phy + */ static void e1000_update_phy_info_task(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, - struct e1000_adapter, - phy_info_task); - struct e1000_hw *hw = &adapter->hw; - + struct e1000_adapter, + phy_info_task.work); rtnl_lock(); - e1000_phy_get_info(hw, &adapter->phy_info); + e1000_phy_get_info(&adapter->hw, &adapter->phy_info); rtnl_unlock(); } -/** - * e1000_82547_tx_fifo_stall - Timer Call-back - * @data: pointer to adapter cast into an unsigned long - **/ -static void e1000_82547_tx_fifo_stall(unsigned long data) -{ - struct e1000_adapter *adapter = (struct e1000_adapter *)data; - schedule_work(&adapter->fifo_stall_task); -} - /** * e1000_82547_tx_fifo_stall_task - task to complete work * @work: work struct contained inside adapter struct @@ -2369,8 +2343,8 @@ static void e1000_82547_tx_fifo_stall(unsigned long data) static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) { struct e1000_adapter *adapter = container_of(work, - struct e1000_adapter, - fifo_stall_task); + struct e1000_adapter, + fifo_stall_task.work); struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; u32 tctl; @@ -2393,7 +2367,7 @@ static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) atomic_set(&adapter->tx_fifo_stall, 0); netif_wake_queue(netdev); } else if (!test_bit(__E1000_DOWN, &adapter->flags)) { - mod_timer(&adapter->tx_fifo_stall_timer, jiffies + 1); + schedule_delayed_work(&adapter->fifo_stall_task, 1); } } rtnl_unlock(); @@ -2437,12 +2411,14 @@ bool e1000_has_link(struct e1000_adapter *adapter) } /** - * e1000_watchdog - Timer Call-back - * @data: pointer to adapter cast into an unsigned long + * e1000_watchdog - work function + * @work: work struct contained inside adapter struct **/ -static void e1000_watchdog(unsigned long data) +static void e1000_watchdog(struct work_struct *work) { - struct e1000_adapter *adapter = (struct e1000_adapter *)data; + struct e1000_adapter *adapter = container_of(work, + struct e1000_adapter, + watchdog_task.work); struct e1000_hw *hw = &adapter->hw; struct net_device *netdev = adapter->netdev; struct e1000_tx_ring *txdr = adapter->tx_ring; @@ -2493,8 +2469,8 @@ static void e1000_watchdog(unsigned long data) netif_carrier_on(netdev); if (!test_bit(__E1000_DOWN, &adapter->flags)) - mod_timer(&adapter->phy_info_timer, - round_jiffies(jiffies + 2 * HZ)); + schedule_delayed_work(&adapter->phy_info_task, + 2 * HZ); adapter->smartspeed = 0; } } else { @@ -2506,8 +2482,8 @@ static void e1000_watchdog(unsigned long data) netif_carrier_off(netdev); if (!test_bit(__E1000_DOWN, &adapter->flags)) - mod_timer(&adapter->phy_info_timer, - round_jiffies(jiffies + 2 * HZ)); + schedule_delayed_work(&adapter->phy_info_task, + 2 * HZ); } e1000_smartspeed(adapter); @@ -2563,10 +2539,9 @@ link_up: /* Force detection of hung controller every watchdog period */ adapter->detect_tx_hung = true; - /* Reset the timer */ + /* Reschedule the task */ if (!test_bit(__E1000_DOWN, &adapter->flags)) - mod_timer(&adapter->watchdog_timer, - round_jiffies(jiffies + 2 * HZ)); + schedule_delayed_work(&adapter->watchdog_task, 2 * HZ); } enum latency_range { @@ -3206,14 +3181,12 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, if (unlikely(e1000_maybe_stop_tx(netdev, tx_ring, count + 2))) return NETDEV_TX_BUSY; - if (unlikely(hw->mac_type == e1000_82547)) { - if (unlikely(e1000_82547_fifo_workaround(adapter, skb))) { - netif_stop_queue(netdev); - if (!test_bit(__E1000_DOWN, &adapter->flags)) - mod_timer(&adapter->tx_fifo_stall_timer, - jiffies + 1); - return NETDEV_TX_BUSY; - } + if (unlikely((hw->mac_type == e1000_82547) && + (e1000_82547_fifo_workaround(adapter, skb)))) { + netif_stop_queue(netdev); + if (!test_bit(__E1000_DOWN, &adapter->flags)) + schedule_delayed_work(&adapter->fifo_stall_task, 1); + return NETDEV_TX_BUSY; } if (vlan_tx_tag_present(skb)) { @@ -3283,7 +3256,7 @@ static void e1000_reset_task(struct work_struct *work) * @netdev: network interface device structure * * Returns the address of the device statistics structure. - * The statistics are actually updated from the timer callback. + * The statistics are actually updated from the watchdog. **/ static struct net_device_stats *e1000_get_stats(struct net_device *netdev) @@ -3551,7 +3524,7 @@ static irqreturn_t e1000_intr(int irq, void *data) hw->get_link_status = 1; /* guard against interrupt when we're going down */ if (!test_bit(__E1000_DOWN, &adapter->flags)) - mod_timer(&adapter->watchdog_timer, jiffies + 1); + schedule_delayed_work(&adapter->watchdog_task, 1); } /* disable interrupts, without the synchronize_irq bit */ From 4e0d8f7d97f9150bdd07f6355e5c1486967dce79 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 5 Oct 2011 07:24:46 +0000 Subject: [PATCH 03/13] e1000: convert mdelay to msleep With the previous commit, there are several functions that are only ever called from thread context, and are able to sleep with msleep instead of mdelay. Signed-off-by: Jesse Brandeburg CC: Thomas Gleixner CC: Tushar Dave Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_hw.c | 22 +++++++++---------- drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index a5a89ecb6f36..36ee76bf4cba 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -5385,7 +5385,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) if (ret_val) return ret_val; - mdelay(20); + msleep(20); ret_val = e1000_write_phy_reg(hw, 0x0000, IGP01E1000_IEEE_FORCE_GIGA); @@ -5413,7 +5413,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) if (ret_val) return ret_val; - mdelay(20); + msleep(20); /* Now enable the transmitter */ ret_val = @@ -5440,7 +5440,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) if (ret_val) return ret_val; - mdelay(20); + msleep(20); ret_val = e1000_write_phy_reg(hw, 0x0000, IGP01E1000_IEEE_FORCE_GIGA); @@ -5457,7 +5457,7 @@ static s32 e1000_config_dsp_after_link_change(struct e1000_hw *hw, bool link_up) if (ret_val) return ret_val; - mdelay(20); + msleep(20); /* Now enable the transmitter */ ret_val = @@ -5750,26 +5750,26 @@ static s32 e1000_polarity_reversal_workaround(struct e1000_hw *hw) if ((mii_status_reg & ~MII_SR_LINK_STATUS) == 0) break; - mdelay(100); + msleep(100); } /* Recommended delay time after link has been lost */ - mdelay(1000); + msleep(1000); /* Now we will re-enable th transmitter on the PHY */ ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_PAGE_SELECT, 0x0019); if (ret_val) return ret_val; - mdelay(50); + msleep(50); ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFFF0); if (ret_val) return ret_val; - mdelay(50); + msleep(50); ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xFF00); if (ret_val) return ret_val; - mdelay(50); + msleep(50); ret_val = e1000_write_phy_reg(hw, M88E1000_PHY_GEN_CONTROL, 0x0000); if (ret_val) return ret_val; @@ -5794,7 +5794,7 @@ static s32 e1000_polarity_reversal_workaround(struct e1000_hw *hw) if (mii_status_reg & MII_SR_LINK_STATUS) break; - mdelay(100); + msleep(100); } return E1000_SUCCESS; } @@ -5825,6 +5825,6 @@ static s32 e1000_get_auto_rd_done(struct e1000_hw *hw) static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw) { e_dbg("e1000_get_phy_cfg_done"); - mdelay(10); + msleep(10); return E1000_SUCCESS; } diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index a0c5ea0d3fd5..6d03d7672699 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -485,7 +485,7 @@ static void e1000_power_down_phy(struct e1000_adapter *adapter) e1000_read_phy_reg(hw, PHY_CTRL, &mii_reg); mii_reg |= MII_CR_POWER_DOWN; e1000_write_phy_reg(hw, PHY_CTRL, mii_reg); - mdelay(1); + msleep(1); } out: return; From 0ef4eedc2e98edd51cd106e1f6a27178622b7e57 Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 5 Oct 2011 07:24:51 +0000 Subject: [PATCH 04/13] e1000: convert to private mutex from rtnl The e1000 driver when running with lockdep could run into some possible deadlocks between the work items acquiring rtnl and the rtnl lock being acquired before work items were cancelled. Use a private mutex to make sure lock ordering isn't violated. The private mutex is only used to protect areas not generally covered by the rtnl lock already. Signed-off-by: Jesse Brandeburg CC: Thomas Gleixner CC: Tushar Dave Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000.h | 2 + drivers/net/ethernet/intel/e1000/e1000_main.c | 38 ++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index fc6fbbda98d9..1e1596990b5c 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -310,6 +310,8 @@ struct e1000_adapter { struct delayed_work watchdog_task; struct delayed_work fifo_stall_task; struct delayed_work phy_info_task; + + struct mutex mutex; }; enum e1000_state_t { diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 6d03d7672699..a42421f26678 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -546,10 +546,10 @@ static void e1000_reinit_safe(struct e1000_adapter *adapter) { while (test_and_set_bit(__E1000_RESETTING, &adapter->flags)) msleep(1); - rtnl_lock(); + mutex_lock(&adapter->mutex); e1000_down(adapter); e1000_up(adapter); - rtnl_unlock(); + mutex_unlock(&adapter->mutex); clear_bit(__E1000_RESETTING, &adapter->flags); } @@ -1317,6 +1317,7 @@ static int __devinit e1000_sw_init(struct e1000_adapter *adapter) e1000_irq_disable(adapter); spin_lock_init(&adapter->stats_lock); + mutex_init(&adapter->mutex); set_bit(__E1000_DOWN, &adapter->flags); @@ -2331,9 +2332,11 @@ static void e1000_update_phy_info_task(struct work_struct *work) struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, phy_info_task.work); - rtnl_lock(); + if (test_bit(__E1000_DOWN, &adapter->flags)) + return; + mutex_lock(&adapter->mutex); e1000_phy_get_info(&adapter->hw, &adapter->phy_info); - rtnl_unlock(); + mutex_unlock(&adapter->mutex); } /** @@ -2349,7 +2352,9 @@ static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) struct net_device *netdev = adapter->netdev; u32 tctl; - rtnl_lock(); + if (test_bit(__E1000_DOWN, &adapter->flags)) + return; + mutex_lock(&adapter->mutex); if (atomic_read(&adapter->tx_fifo_stall)) { if ((er32(TDT) == er32(TDH)) && (er32(TDFT) == er32(TDFH)) && @@ -2370,7 +2375,7 @@ static void e1000_82547_tx_fifo_stall_task(struct work_struct *work) schedule_delayed_work(&adapter->fifo_stall_task, 1); } } - rtnl_unlock(); + mutex_unlock(&adapter->mutex); } bool e1000_has_link(struct e1000_adapter *adapter) @@ -2424,6 +2429,10 @@ static void e1000_watchdog(struct work_struct *work) struct e1000_tx_ring *txdr = adapter->tx_ring; u32 link, tctl; + if (test_bit(__E1000_DOWN, &adapter->flags)) + return; + + mutex_lock(&adapter->mutex); link = e1000_has_link(adapter); if ((netif_carrier_ok(netdev)) && link) goto link_up; @@ -2512,8 +2521,8 @@ link_up: * (Do the reset outside of interrupt context). */ adapter->tx_timeout_count++; schedule_work(&adapter->reset_task); - /* return immediately since reset is imminent */ - return; + /* exit immediately since reset is imminent */ + goto unlock; } } @@ -2542,6 +2551,9 @@ link_up: /* Reschedule the task */ if (!test_bit(__E1000_DOWN, &adapter->flags)) schedule_delayed_work(&adapter->watchdog_task, 2 * HZ); + +unlock: + mutex_unlock(&adapter->mutex); } enum latency_range { @@ -3248,6 +3260,8 @@ static void e1000_reset_task(struct work_struct *work) struct e1000_adapter *adapter = container_of(work, struct e1000_adapter, reset_task); + if (test_bit(__E1000_DOWN, &adapter->flags)) + return; e1000_reinit_safe(adapter); } @@ -4702,6 +4716,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) netif_device_detach(netdev); + mutex_lock(&adapter->mutex); + if (netif_running(netdev)) { WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags)); e1000_down(adapter); @@ -4709,8 +4725,10 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) #ifdef CONFIG_PM retval = pci_save_state(pdev); - if (retval) + if (retval) { + mutex_unlock(&adapter->mutex); return retval; + } #endif status = er32(STATUS); @@ -4765,6 +4783,8 @@ static int __e1000_shutdown(struct pci_dev *pdev, bool *enable_wake) if (netif_running(netdev)) e1000_free_irq(adapter); + mutex_unlock(&adapter->mutex); + pci_disable_device(pdev); return 0; From b64e9dd5d04561c2cee7e9d9d70bd6d45cc01e7c Mon Sep 17 00:00:00 2001 From: Bruce Allan Date: Fri, 30 Sep 2011 08:07:00 +0000 Subject: [PATCH 05/13] e1000e: bad short packets received when jumbos enabled on 82579 When short packets are received with jumbos enabled on 82579, they can be interpreted to have a receive address that does not match any configured address. This is due to a hardware bug that can be worked around by reducing the number of IPG octets added when the packet is transferred from the PHY to the MAC. Signed-off-by: Bruce Allan Tested-by: Jeff Pieper Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000e/ich8lan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c index ad34de086ee1..4f709749dbce 100644 --- a/drivers/net/ethernet/intel/e1000e/ich8lan.c +++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c @@ -1578,7 +1578,7 @@ s32 e1000_lv_jumbo_workaround_ich8lan(struct e1000_hw *hw, bool enable) ret_val = e1e_wphy(hw, PHY_REG(776, 20), data); if (ret_val) goto out; - ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0xFE00); + ret_val = e1e_wphy(hw, PHY_REG(776, 23), 0xF100); if (ret_val) goto out; e1e_rphy(hw, HV_PM_CTRL, &data); From 13fde97a48b622a192ae7d0a8011248be891cdd4 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 5 Oct 2011 13:35:24 +0000 Subject: [PATCH 06/13] igb: Make Tx budget for NAPI user adjustable This change is to make the NAPI budget limits for transmit adjustable. Currently they are only set to 128, and when the changes/improvements to NAPI occur to allow for adjustability, it would be possible to tune the value for optimal performance with applications such as routing. v2: remove tie between NAPI and interrupt moderation fix work limit define name (s/IXGBE/IGB/) Update patch description to better reflect patch Signed-off-by: Alexander Duyck Signed-off-by: Jeff Kirsher Tested-by: Aaron Brown --- drivers/net/ethernet/intel/igb/igb.h | 3 + drivers/net/ethernet/intel/igb/igb_ethtool.c | 1 + drivers/net/ethernet/intel/igb/igb_main.c | 138 +++++++++++-------- 3 files changed, 88 insertions(+), 54 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index b72593785600..beab918e09ab 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -47,6 +47,7 @@ struct igb_adapter; /* TX/RX descriptor defines */ #define IGB_DEFAULT_TXD 256 +#define IGB_DEFAULT_TX_WORK 128 #define IGB_MIN_TXD 80 #define IGB_MAX_TXD 4096 @@ -177,6 +178,7 @@ struct igb_q_vector { u32 eims_value; u16 cpu; + u16 tx_work_limit; u16 itr_val; u8 set_itr; @@ -266,6 +268,7 @@ struct igb_adapter { u16 rx_itr; /* TX */ + u16 tx_work_limit; u32 tx_timeout_count; int num_tx_queues; struct igb_ring *tx_ring[16]; diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index f231d82cc6cf..a445c4fbf19e 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2011,6 +2011,7 @@ static int igb_set_coalesce(struct net_device *netdev, for (i = 0; i < adapter->num_q_vectors; i++) { struct igb_q_vector *q_vector = adapter->q_vector[i]; + q_vector->tx_work_limit = adapter->tx_work_limit; if (q_vector->rx_ring) q_vector->itr_val = adapter->rx_itr_setting; else diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 7ad25e867add..12faa99cac53 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -136,8 +136,8 @@ static irqreturn_t igb_msix_ring(int irq, void *); static void igb_update_dca(struct igb_q_vector *); static void igb_setup_dca(struct igb_adapter *); #endif /* CONFIG_IGB_DCA */ -static bool igb_clean_tx_irq(struct igb_q_vector *); static int igb_poll(struct napi_struct *, int); +static bool igb_clean_tx_irq(struct igb_q_vector *); static bool igb_clean_rx_irq(struct igb_q_vector *, int); static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); static void igb_tx_timeout(struct net_device *); @@ -1120,6 +1120,7 @@ static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter, q_vector->tx_ring = adapter->tx_ring[ring_idx]; q_vector->tx_ring->q_vector = q_vector; q_vector->itr_val = adapter->tx_itr_setting; + q_vector->tx_work_limit = adapter->tx_work_limit; if (q_vector->itr_val && q_vector->itr_val <= 3) q_vector->itr_val = IGB_START_ITR; } @@ -2388,11 +2389,17 @@ static int __devinit igb_sw_init(struct igb_adapter *adapter) pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); + /* set default ring sizes */ adapter->tx_ring_count = IGB_DEFAULT_TXD; adapter->rx_ring_count = IGB_DEFAULT_RXD; + + /* set default ITR values */ adapter->rx_itr_setting = IGB_DEFAULT_ITR; adapter->tx_itr_setting = IGB_DEFAULT_ITR; + /* set default work limits */ + adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; + adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; @@ -5496,7 +5503,7 @@ static int igb_poll(struct napi_struct *napi, int budget) igb_update_dca(q_vector); #endif if (q_vector->tx_ring) - clean_complete = !!igb_clean_tx_irq(q_vector); + clean_complete = igb_clean_tx_irq(q_vector); if (q_vector->rx_ring) clean_complete &= igb_clean_rx_irq(q_vector, budget); @@ -5578,64 +5585,69 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx_ring; - struct net_device *netdev = tx_ring->netdev; - struct e1000_hw *hw = &adapter->hw; - struct igb_buffer *buffer_info; - union e1000_adv_tx_desc *tx_desc, *eop_desc; + struct igb_buffer *tx_buffer; + union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; - unsigned int i, eop, count = 0; - bool cleaned = false; + unsigned int budget = q_vector->tx_work_limit; + u16 i = tx_ring->next_to_clean; - i = tx_ring->next_to_clean; - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = IGB_TX_DESC(tx_ring, eop); + if (test_bit(__IGB_DOWN, &adapter->state)) + return true; - while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) && - (count < tx_ring->count)) { - rmb(); /* read buffer_info after eop_desc status */ - for (cleaned = false; !cleaned; count++) { - tx_desc = IGB_TX_DESC(tx_ring, i); - buffer_info = &tx_ring->buffer_info[i]; - cleaned = (i == eop); + tx_buffer = &tx_ring->buffer_info[i]; + tx_desc = IGB_TX_DESC(tx_ring, i); - if (buffer_info->skb) { - total_bytes += buffer_info->bytecount; - /* gso_segs is currently only valid for tcp */ - total_packets += buffer_info->gso_segs; - igb_tx_hwtstamp(q_vector, buffer_info); + for (; budget; budget--) { + u16 eop = tx_buffer->next_to_watch; + union e1000_adv_tx_desc *eop_desc; + + eop_desc = IGB_TX_DESC(tx_ring, eop); + + /* if DD is not set pending work has not been completed */ + if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) + break; + + /* prevent any other reads prior to eop_desc being verified */ + rmb(); + + do { + tx_desc->wb.status = 0; + if (likely(tx_desc == eop_desc)) { + eop_desc = NULL; + + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; + igb_tx_hwtstamp(q_vector, tx_buffer); } - igb_unmap_and_free_tx_resource(tx_ring, buffer_info); - tx_desc->wb.status = 0; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + tx_buffer++; + tx_desc++; i++; - if (i == tx_ring->count) + if (unlikely(i == tx_ring->count)) { i = 0; - } - eop = tx_ring->buffer_info[i].next_to_watch; - eop_desc = IGB_TX_DESC(tx_ring, eop); + tx_buffer = tx_ring->buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } + } while (eop_desc); } tx_ring->next_to_clean = i; - - if (unlikely(count && - netif_carrier_ok(netdev) && - igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { - /* Make sure that anybody stopping the queue after this - * sees the new next_to_clean. - */ - smp_mb(); - if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) && - !(test_bit(__IGB_DOWN, &adapter->state))) { - netif_wake_subqueue(netdev, tx_ring->queue_index); - - u64_stats_update_begin(&tx_ring->tx_syncp); - tx_ring->tx_stats.restart_queue++; - u64_stats_update_end(&tx_ring->tx_syncp); - } - } + u64_stats_update_begin(&tx_ring->tx_syncp); + tx_ring->tx_stats.bytes += total_bytes; + tx_ring->tx_stats.packets += total_packets; + u64_stats_update_end(&tx_ring->tx_syncp); + tx_ring->total_bytes += total_bytes; + tx_ring->total_packets += total_packets; if (tx_ring->detect_tx_hung) { + struct e1000_hw *hw = &adapter->hw; + u16 eop = tx_ring->buffer_info[i].next_to_watch; + union e1000_adv_tx_desc *eop_desc; + + eop_desc = IGB_TX_DESC(tx_ring, eop); + /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ tx_ring->detect_tx_hung = false; @@ -5666,16 +5678,34 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) eop, jiffies, eop_desc->wb.status); - netif_stop_subqueue(netdev, tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); + + /* we are about to reset, no point in enabling stuff */ + return true; } } - tx_ring->total_bytes += total_bytes; - tx_ring->total_packets += total_packets; - u64_stats_update_begin(&tx_ring->tx_syncp); - tx_ring->tx_stats.bytes += total_bytes; - tx_ring->tx_stats.packets += total_packets; - u64_stats_update_end(&tx_ring->tx_syncp); - return count < tx_ring->count; + + if (unlikely(total_packets && + netif_carrier_ok(tx_ring->netdev) && + igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (__netif_subqueue_stopped(tx_ring->netdev, + tx_ring->queue_index) && + !(test_bit(__IGB_DOWN, &adapter->state))) { + netif_wake_subqueue(tx_ring->netdev, + tx_ring->queue_index); + + u64_stats_update_begin(&tx_ring->tx_syncp); + tx_ring->tx_stats.restart_queue++; + u64_stats_update_end(&tx_ring->tx_syncp); + } + } + + return !!budget; } static inline void igb_rx_checksum(struct igb_ring *ring, From 0603464956e863810af60c08b4b2e8ab50363a54 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 26 Aug 2011 07:44:22 +0000 Subject: [PATCH 07/13] igb: split buffer_info into tx_buffer_info and rx_buffer_info In order to be able to improve the performance of the TX path it has been necessary to add addition info to the tx_buffer_info structure. However a side effect is that the structure has gotten larger and this in turn has also increased the size of the RX buffer info structure. In order to avoid this in the future I am splitting the single buffer_info structure into two separate ones and instead I will join them by making the buffer_info pointer in the ring a union of the two. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 42 +++---- drivers/net/ethernet/intel/igb/igb_ethtool.c | 15 +-- drivers/net/ethernet/intel/igb/igb_main.c | 123 ++++++++++--------- 3 files changed, 92 insertions(+), 88 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index beab918e09ab..56c68fc8bca2 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -132,27 +132,24 @@ struct vf_data_storage { /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer */ -struct igb_buffer { +struct igb_tx_buffer { + u16 next_to_watch; + unsigned long time_stamp; + dma_addr_t dma; + u32 length; + u32 tx_flags; + struct sk_buff *skb; + unsigned int bytecount; + u16 gso_segs; + u8 mapped_as_page; +}; + +struct igb_rx_buffer { struct sk_buff *skb; dma_addr_t dma; - union { - /* TX */ - struct { - unsigned long time_stamp; - u16 length; - u16 next_to_watch; - unsigned int bytecount; - u16 gso_segs; - u8 tx_flags; - u8 mapped_as_page; - }; - /* RX */ - struct { - struct page *page; - dma_addr_t page_dma; - u16 page_offset; - }; - }; + struct page *page; + dma_addr_t page_dma; + u32 page_offset; }; struct igb_tx_queue_stats { @@ -191,7 +188,10 @@ struct igb_ring { struct igb_q_vector *q_vector; /* backlink to q_vector */ struct net_device *netdev; /* back pointer to net_device */ struct device *dev; /* device pointer for dma mapping */ - struct igb_buffer *buffer_info; /* array of buffer info structs */ + union { /* array of buffer info structs */ + struct igb_tx_buffer *tx_buffer_info; + struct igb_rx_buffer *rx_buffer_info; + }; void *desc; /* descriptor ring memory */ unsigned long flags; /* ring specific flags */ void __iomem *tail; /* pointer to ring tail register */ @@ -377,7 +377,7 @@ extern void igb_setup_tctl(struct igb_adapter *); extern void igb_setup_rctl(struct igb_adapter *); extern netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *); extern void igb_unmap_and_free_tx_resource(struct igb_ring *, - struct igb_buffer *); + struct igb_tx_buffer *); extern void igb_alloc_rx_buffers(struct igb_ring *, u16); extern void igb_update_stats(struct igb_adapter *, struct rtnl_link_stats64 *); extern bool igb_has_link(struct igb_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index a445c4fbf19e..f227fc57eb11 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -1579,7 +1579,8 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, unsigned int size) { union e1000_adv_rx_desc *rx_desc; - struct igb_buffer *buffer_info; + struct igb_rx_buffer *rx_buffer_info; + struct igb_tx_buffer *tx_buffer_info; int rx_ntc, tx_ntc, count = 0; u32 staterr; @@ -1591,22 +1592,22 @@ static int igb_clean_test_rings(struct igb_ring *rx_ring, while (staterr & E1000_RXD_STAT_DD) { /* check rx buffer */ - buffer_info = &rx_ring->buffer_info[rx_ntc]; + rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc]; /* unmap rx buffer, will be remapped by alloc_rx_buffers */ dma_unmap_single(rx_ring->dev, - buffer_info->dma, + rx_buffer_info->dma, IGB_RX_HDR_LEN, DMA_FROM_DEVICE); - buffer_info->dma = 0; + rx_buffer_info->dma = 0; /* verify contents of skb */ - if (!igb_check_lbtest_frame(buffer_info->skb, size)) + if (!igb_check_lbtest_frame(rx_buffer_info->skb, size)) count++; /* unmap buffer on tx side */ - buffer_info = &tx_ring->buffer_info[tx_ntc]; - igb_unmap_and_free_tx_resource(tx_ring, buffer_info); + tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); /* increment rx/tx next to clean counters */ rx_ntc++; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 12faa99cac53..2bdc78368b64 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -339,7 +339,6 @@ static void igb_dump(struct igb_adapter *adapter) struct igb_ring *tx_ring; union e1000_adv_tx_desc *tx_desc; struct my_u0 { u64 a; u64 b; } *u0; - struct igb_buffer *buffer_info; struct igb_ring *rx_ring; union e1000_adv_rx_desc *rx_desc; u32 staterr; @@ -376,8 +375,9 @@ static void igb_dump(struct igb_adapter *adapter) printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma ]" " leng ntw timestamp\n"); for (n = 0; n < adapter->num_tx_queues; n++) { + struct igb_tx_buffer *buffer_info; tx_ring = adapter->tx_ring[n]; - buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean]; + buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, (u64)buffer_info->dma, @@ -413,8 +413,9 @@ static void igb_dump(struct igb_adapter *adapter) "leng ntw timestamp bi->skb\n"); for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) { + struct igb_tx_buffer *buffer_info; tx_desc = IGB_TX_DESC(tx_ring, i); - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; u0 = (struct my_u0 *)tx_desc; printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX" " %04X %3X %016llX %p", i, @@ -493,7 +494,8 @@ rx_ring_summary: "<-- Adv Rx Write-Back format\n"); for (i = 0; i < rx_ring->count; i++) { - buffer_info = &rx_ring->buffer_info[i]; + struct igb_rx_buffer *buffer_info; + buffer_info = &rx_ring->rx_buffer_info[i]; rx_desc = IGB_RX_DESC(rx_ring, i); u0 = (struct my_u0 *)rx_desc; staterr = le32_to_cpu(rx_desc->wb.upper.status_error); @@ -2576,9 +2578,9 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) struct device *dev = tx_ring->dev; int size; - size = sizeof(struct igb_buffer) * tx_ring->count; - tx_ring->buffer_info = vzalloc(size); - if (!tx_ring->buffer_info) + size = sizeof(struct igb_tx_buffer) * tx_ring->count; + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) goto err; /* round up to nearest 4K */ @@ -2598,7 +2600,7 @@ int igb_setup_tx_resources(struct igb_ring *tx_ring) return 0; err: - vfree(tx_ring->buffer_info); + vfree(tx_ring->tx_buffer_info); dev_err(dev, "Unable to allocate memory for the transmit descriptor ring\n"); return -ENOMEM; @@ -2719,9 +2721,9 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) struct device *dev = rx_ring->dev; int size, desc_len; - size = sizeof(struct igb_buffer) * rx_ring->count; - rx_ring->buffer_info = vzalloc(size); - if (!rx_ring->buffer_info) + size = sizeof(struct igb_rx_buffer) * rx_ring->count; + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) goto err; desc_len = sizeof(union e1000_adv_rx_desc); @@ -2744,8 +2746,8 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) return 0; err: - vfree(rx_ring->buffer_info); - rx_ring->buffer_info = NULL; + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; dev_err(dev, "Unable to allocate memory for the receive descriptor" " ring\n"); return -ENOMEM; @@ -3107,8 +3109,8 @@ void igb_free_tx_resources(struct igb_ring *tx_ring) { igb_clean_tx_ring(tx_ring); - vfree(tx_ring->buffer_info); - tx_ring->buffer_info = NULL; + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; /* if not set, then don't free */ if (!tx_ring->desc) @@ -3135,7 +3137,7 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter) } void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring, - struct igb_buffer *buffer_info) + struct igb_tx_buffer *buffer_info) { if (buffer_info->dma) { if (buffer_info->mapped_as_page) @@ -3166,21 +3168,21 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring, **/ static void igb_clean_tx_ring(struct igb_ring *tx_ring) { - struct igb_buffer *buffer_info; + struct igb_tx_buffer *buffer_info; unsigned long size; unsigned int i; - if (!tx_ring->buffer_info) + if (!tx_ring->tx_buffer_info) return; /* Free all the Tx ring sk_buffs */ for (i = 0; i < tx_ring->count; i++) { - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; igb_unmap_and_free_tx_resource(tx_ring, buffer_info); } - size = sizeof(struct igb_buffer) * tx_ring->count; - memset(tx_ring->buffer_info, 0, size); + size = sizeof(struct igb_tx_buffer) * tx_ring->count; + memset(tx_ring->tx_buffer_info, 0, size); /* Zero out the descriptor ring */ memset(tx_ring->desc, 0, tx_ring->size); @@ -3211,8 +3213,8 @@ void igb_free_rx_resources(struct igb_ring *rx_ring) { igb_clean_rx_ring(rx_ring); - vfree(rx_ring->buffer_info); - rx_ring->buffer_info = NULL; + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; /* if not set, then don't free */ if (!rx_ring->desc) @@ -3247,12 +3249,12 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) unsigned long size; u16 i; - if (!rx_ring->buffer_info) + if (!rx_ring->rx_buffer_info) return; /* Free all the Rx ring sk_buffs */ for (i = 0; i < rx_ring->count; i++) { - struct igb_buffer *buffer_info = &rx_ring->buffer_info[i]; + struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; if (buffer_info->dma) { dma_unmap_single(rx_ring->dev, buffer_info->dma, @@ -3279,8 +3281,8 @@ static void igb_clean_rx_ring(struct igb_ring *rx_ring) } } - size = sizeof(struct igb_buffer) * rx_ring->count; - memset(rx_ring->buffer_info, 0, size); + size = sizeof(struct igb_rx_buffer) * rx_ring->count; + memset(rx_ring->rx_buffer_info, 0, size); /* Zero out the descriptor ring */ memset(rx_ring->desc, 0, rx_ring->size); @@ -3964,7 +3966,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, struct e1000_adv_tx_context_desc *context_desc; unsigned int i; int err; - struct igb_buffer *buffer_info; + struct igb_tx_buffer *buffer_info; u32 info = 0, tu_cmd = 0; u32 mss_l4len_idx; u8 l4len; @@ -3995,7 +3997,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, i = tx_ring->next_to_use; - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; context_desc = IGB_TX_CTXTDESC(tx_ring, i); /* VLAN MACLEN IPLEN */ if (tx_flags & IGB_TX_FLAGS_VLAN) @@ -4043,14 +4045,14 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, { struct e1000_adv_tx_context_desc *context_desc; struct device *dev = tx_ring->dev; - struct igb_buffer *buffer_info; + struct igb_tx_buffer *buffer_info; u32 info = 0, tu_cmd = 0; unsigned int i; if ((skb->ip_summed == CHECKSUM_PARTIAL) || (tx_flags & IGB_TX_FLAGS_VLAN)) { i = tx_ring->next_to_use; - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; context_desc = IGB_TX_CTXTDESC(tx_ring, i); if (tx_flags & IGB_TX_FLAGS_VLAN) @@ -4126,7 +4128,7 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, unsigned int first) { - struct igb_buffer *buffer_info; + struct igb_tx_buffer *buffer_info; struct device *dev = tx_ring->dev; unsigned int hlen = skb_headlen(skb); unsigned int count = 0, i; @@ -4135,7 +4137,7 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, i = tx_ring->next_to_use; - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD); buffer_info->length = hlen; /* set time_stamp *before* dma to help avoid a possible race */ @@ -4155,7 +4157,7 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, if (i == tx_ring->count) i = 0; - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(len >= IGB_MAX_DATA_PER_TXD); buffer_info->length = len; buffer_info->time_stamp = jiffies; @@ -4168,12 +4170,12 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, } - tx_ring->buffer_info[i].skb = skb; - tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags; + buffer_info->skb = skb; + buffer_info->tx_flags = skb_shinfo(skb)->tx_flags; /* multiply data chunks by size of headers */ - tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len; - tx_ring->buffer_info[i].gso_segs = gso_segs; - tx_ring->buffer_info[first].next_to_watch = i; + buffer_info->bytecount = ((gso_segs - 1) * hlen) + skb->len; + buffer_info->gso_segs = gso_segs; + tx_ring->tx_buffer_info[first].next_to_watch = i; return ++count; @@ -4192,7 +4194,7 @@ dma_error: if (i == 0) i = tx_ring->count; i--; - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; igb_unmap_and_free_tx_resource(tx_ring, buffer_info); } @@ -4204,7 +4206,7 @@ static inline void igb_tx_queue(struct igb_ring *tx_ring, u8 hdr_len) { union e1000_adv_tx_desc *tx_desc; - struct igb_buffer *buffer_info; + struct igb_tx_buffer *buffer_info; u32 olinfo_status = 0, cmd_type_len; unsigned int i = tx_ring->next_to_use; @@ -4240,7 +4242,7 @@ static inline void igb_tx_queue(struct igb_ring *tx_ring, olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT); do { - buffer_info = &tx_ring->buffer_info[i]; + buffer_info = &tx_ring->tx_buffer_info[i]; tx_desc = IGB_TX_DESC(tx_ring, i); tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma); tx_desc->read.cmd_type_len = @@ -4353,7 +4355,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, count = igb_tx_map(tx_ring, skb, first); if (!count) { dev_kfree_skb_any(skb); - tx_ring->buffer_info[first].time_stamp = 0; + tx_ring->tx_buffer_info[first].time_stamp = 0; tx_ring->next_to_use = first; return NETDEV_TX_OK; } @@ -5551,13 +5553,14 @@ static void igb_systim_to_hwtstamp(struct igb_adapter *adapter, /** * igb_tx_hwtstamp - utility function which checks for TX time stamp * @q_vector: pointer to q_vector containing needed info - * @buffer: pointer to igb_buffer structure + * @buffer: pointer to igb_tx_buffer structure * * If we were asked to do hardware stamping and such a time stamp is * available, then it must have been for this skb here because we only * allow only one such packet into the queue. */ -static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info) +static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, + struct igb_tx_buffer *buffer_info) { struct igb_adapter *adapter = q_vector->adapter; struct e1000_hw *hw = &adapter->hw; @@ -5585,7 +5588,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) { struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx_ring; - struct igb_buffer *tx_buffer; + struct igb_tx_buffer *tx_buffer; union e1000_adv_tx_desc *tx_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx_work_limit; @@ -5594,7 +5597,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (test_bit(__IGB_DOWN, &adapter->state)) return true; - tx_buffer = &tx_ring->buffer_info[i]; + tx_buffer = &tx_ring->tx_buffer_info[i]; tx_desc = IGB_TX_DESC(tx_ring, i); for (; budget; budget--) { @@ -5627,7 +5630,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) i++; if (unlikely(i == tx_ring->count)) { i = 0; - tx_buffer = tx_ring->buffer_info; + tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } } while (eop_desc); @@ -5643,7 +5646,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (tx_ring->detect_tx_hung) { struct e1000_hw *hw = &adapter->hw; - u16 eop = tx_ring->buffer_info[i].next_to_watch; + u16 eop = tx_ring->tx_buffer_info[i].next_to_watch; union e1000_adv_tx_desc *eop_desc; eop_desc = IGB_TX_DESC(tx_ring, eop); @@ -5651,8 +5654,8 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ tx_ring->detect_tx_hung = false; - if (tx_ring->buffer_info[i].time_stamp && - time_after(jiffies, tx_ring->buffer_info[i].time_stamp + + if (tx_ring->tx_buffer_info[i].time_stamp && + time_after(jiffies, tx_ring->tx_buffer_info[i].time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { @@ -5674,7 +5677,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) readl(tx_ring->tail), tx_ring->next_to_use, tx_ring->next_to_clean, - tx_ring->buffer_info[eop].time_stamp, + tx_ring->tx_buffer_info[eop].time_stamp, eop, jiffies, eop_desc->wb.status); @@ -5802,7 +5805,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) staterr = le32_to_cpu(rx_desc->wb.upper.status_error); while (staterr & E1000_RXD_STAT_DD) { - struct igb_buffer *buffer_info = &rx_ring->buffer_info[i]; + struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; struct sk_buff *skb = buffer_info->skb; union e1000_adv_rx_desc *next_rxd; @@ -5855,8 +5858,8 @@ static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) } if (!(staterr & E1000_RXD_STAT_EOP)) { - struct igb_buffer *next_buffer; - next_buffer = &rx_ring->buffer_info[i]; + struct igb_rx_buffer *next_buffer; + next_buffer = &rx_ring->rx_buffer_info[i]; buffer_info->skb = next_buffer->skb; buffer_info->dma = next_buffer->dma; next_buffer->skb = skb; @@ -5917,7 +5920,7 @@ next_desc: } static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, - struct igb_buffer *bi) + struct igb_rx_buffer *bi) { struct sk_buff *skb = bi->skb; dma_addr_t dma = bi->dma; @@ -5951,7 +5954,7 @@ static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, } static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, - struct igb_buffer *bi) + struct igb_rx_buffer *bi) { struct page *page = bi->page; dma_addr_t page_dma = bi->page_dma; @@ -5990,11 +5993,11 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) { union e1000_adv_rx_desc *rx_desc; - struct igb_buffer *bi; + struct igb_rx_buffer *bi; u16 i = rx_ring->next_to_use; rx_desc = IGB_RX_DESC(rx_ring, i); - bi = &rx_ring->buffer_info[i]; + bi = &rx_ring->rx_buffer_info[i]; i -= rx_ring->count; while (cleaned_count--) { @@ -6015,7 +6018,7 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) i++; if (unlikely(!i)) { rx_desc = IGB_RX_DESC(rx_ring, 0); - bi = rx_ring->buffer_info; + bi = rx_ring->rx_buffer_info; i -= rx_ring->count; } From 7d13a7d0da74d127457cc6f88e47fd8e85960a13 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 26 Aug 2011 07:44:32 +0000 Subject: [PATCH 08/13] igb: Consolidate creation of Tx context descriptors into a single function This patch is meant to simplify the transmit path by reducing the overhead for creating a transmit context descriptor. The current implementation is split with igb_tso and igb_tx_csum doing two separate implementations on how to setup the tx_buffer_info structure and the tx_desc. By combining them it is possible to reduce code and simplify things since now only one function will create context descriptors. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 239 ++++++++++------------ 1 file changed, 109 insertions(+), 130 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 2bdc78368b64..a0bb81d9ef1b 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -45,6 +45,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -3960,16 +3963,39 @@ set_itr_now: #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGB_TX_FLAGS_VLAN_SHIFT 16 +void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, + u32 type_tucmd, u32 mss_l4len_idx) +{ + struct e1000_adv_tx_context_desc *context_desc; + u16 i = tx_ring->next_to_use; + + context_desc = IGB_TX_CTXTDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* set bits to identify this as an advanced context descriptor */ + type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + /* For 82575, context index must be unique per ring. */ + if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) + mss_l4len_idx |= tx_ring->reg_idx << 4; + + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->seqnum_seed = 0; + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); +} + static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb, u32 tx_flags, u8 *hdr_len) { - struct e1000_adv_tx_context_desc *context_desc; - unsigned int i; int err; - struct igb_tx_buffer *buffer_info; - u32 info = 0, tu_cmd = 0; - u32 mss_l4len_idx; - u8 l4len; + u32 vlan_macip_lens, type_tucmd; + u32 mss_l4len_idx, l4len; + + if (!skb_is_gso(skb)) + return 0; if (skb_header_cloned(skb)) { err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); @@ -3977,8 +4003,8 @@ static inline int igb_tso(struct igb_ring *tx_ring, return err; } - l4len = tcp_hdrlen(skb); - *hdr_len += l4len; + /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; if (skb->protocol == htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); @@ -3988,6 +4014,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, iph->daddr, 0, IPPROTO_TCP, 0); + type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; } else if (skb_is_gso_v6(skb)) { ipv6_hdr(skb)->payload_len = 0; tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, @@ -3995,131 +4022,85 @@ static inline int igb_tso(struct igb_ring *tx_ring, 0, IPPROTO_TCP, 0); } - i = tx_ring->next_to_use; - - buffer_info = &tx_ring->tx_buffer_info[i]; - context_desc = IGB_TX_CTXTDESC(tx_ring, i); - /* VLAN MACLEN IPLEN */ - if (tx_flags & IGB_TX_FLAGS_VLAN) - info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK); - info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT); - *hdr_len += skb_network_offset(skb); - info |= skb_network_header_len(skb); - *hdr_len += skb_network_header_len(skb); - context_desc->vlan_macip_lens = cpu_to_le32(info); - - /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ - tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); - - if (skb->protocol == htons(ETH_P_IP)) - tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - - context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd); + l4len = tcp_hdrlen(skb); + *hdr_len = skb_transport_offset(skb) + l4len; /* MSS L4LEN IDX */ - mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT); - mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT); + mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; + mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; - /* For 82575, context index must be unique per ring. */ - if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) - mss_l4len_idx |= tx_ring->reg_idx << 4; + /* VLAN MACLEN IPLEN */ + vlan_macip_lens = skb_network_header_len(skb); + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK; - context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); - context_desc->seqnum_seed = 0; + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); - buffer_info->time_stamp = jiffies; - buffer_info->next_to_watch = i; - buffer_info->dma = 0; - i++; - if (i == tx_ring->count) - i = 0; - - tx_ring->next_to_use = i; - - return true; + return 1; } static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb, u32 tx_flags) { - struct e1000_adv_tx_context_desc *context_desc; - struct device *dev = tx_ring->dev; - struct igb_tx_buffer *buffer_info; - u32 info = 0, tu_cmd = 0; - unsigned int i; + u32 vlan_macip_lens = 0; + u32 mss_l4len_idx = 0; + u32 type_tucmd = 0; - if ((skb->ip_summed == CHECKSUM_PARTIAL) || - (tx_flags & IGB_TX_FLAGS_VLAN)) { - i = tx_ring->next_to_use; - buffer_info = &tx_ring->tx_buffer_info[i]; - context_desc = IGB_TX_CTXTDESC(tx_ring, i); - - if (tx_flags & IGB_TX_FLAGS_VLAN) - info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK); - - info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT); - if (skb->ip_summed == CHECKSUM_PARTIAL) - info |= skb_network_header_len(skb); - - context_desc->vlan_macip_lens = cpu_to_le32(info); - - tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT); - - if (skb->ip_summed == CHECKSUM_PARTIAL) { - __be16 protocol; - - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - const struct vlan_ethhdr *vhdr = - (const struct vlan_ethhdr*)skb->data; - - protocol = vhdr->h_vlan_encapsulated_proto; - } else { - protocol = skb->protocol; - } - - switch (protocol) { - case cpu_to_be16(ETH_P_IP): - tu_cmd |= E1000_ADVTXD_TUCMD_IPV4; - if (ip_hdr(skb)->protocol == IPPROTO_TCP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - else if (ip_hdr(skb)->protocol == IPPROTO_SCTP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; - break; - case cpu_to_be16(ETH_P_IPV6): - /* XXX what about other V6 headers?? */ - if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP; - else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP) - tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; - break; - default: - if (unlikely(net_ratelimit())) - dev_warn(dev, - "partial checksum but proto=%x!\n", - skb->protocol); - break; + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(tx_flags & IGB_TX_FLAGS_VLAN)) + return false; + } else { + u8 l4_hdr = 0; + switch (skb->protocol) { + case __constant_htons(ETH_P_IP): + vlan_macip_lens |= skb_network_header_len(skb); + type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; + l4_hdr = ip_hdr(skb)->protocol; + break; + case __constant_htons(ETH_P_IPV6): + vlan_macip_lens |= skb_network_header_len(skb); + l4_hdr = ipv6_hdr(skb)->nexthdr; + break; + default: + if (unlikely(net_ratelimit())) { + dev_warn(tx_ring->dev, + "partial checksum but proto=%x!\n", + skb->protocol); } + break; } - context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd); - context_desc->seqnum_seed = 0; - if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) - context_desc->mss_l4len_idx = - cpu_to_le32(tx_ring->reg_idx << 4); - - buffer_info->time_stamp = jiffies; - buffer_info->next_to_watch = i; - buffer_info->dma = 0; - - i++; - if (i == tx_ring->count) - i = 0; - tx_ring->next_to_use = i; - - return true; + switch (l4_hdr) { + case IPPROTO_TCP: + type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; + mss_l4len_idx = tcp_hdrlen(skb) << + E1000_ADVTXD_L4LEN_SHIFT; + break; + case IPPROTO_SCTP: + type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; + mss_l4len_idx = sizeof(struct sctphdr) << + E1000_ADVTXD_L4LEN_SHIFT; + break; + case IPPROTO_UDP: + mss_l4len_idx = sizeof(struct udphdr) << + E1000_ADVTXD_L4LEN_SHIFT; + break; + default: + if (unlikely(net_ratelimit())) { + dev_warn(tx_ring->dev, + "partial checksum but l4 proto=%x!\n", + l4_hdr); + } + break; + } } - return false; + + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= tx_flags & IGB_TX_FLAGS_VLAN_MASK; + + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); + + return (skb->ip_summed == CHECKSUM_PARTIAL); } #define IGB_MAX_TXD_PWR 16 @@ -4140,8 +4121,6 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD); buffer_info->length = hlen; - /* set time_stamp *before* dma to help avoid a possible race */ - buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; buffer_info->dma = dma_map_single(dev, skb->data, hlen, DMA_TO_DEVICE); @@ -4160,7 +4139,6 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(len >= IGB_MAX_DATA_PER_TXD); buffer_info->length = len; - buffer_info->time_stamp = jiffies; buffer_info->next_to_watch = i; buffer_info->mapped_as_page = true; buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len, @@ -4176,6 +4154,7 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, buffer_info->bytecount = ((gso_segs - 1) * hlen) + skb->len; buffer_info->gso_segs = gso_segs; tx_ring->tx_buffer_info[first].next_to_watch = i; + tx_ring->tx_buffer_info[first].time_stamp = jiffies; return ++count; @@ -4304,7 +4283,7 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size) netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { - int tso = 0, count; + int tso, count; u32 tx_flags = 0; u16 first; u8 hdr_len = 0; @@ -4333,16 +4312,12 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, tx_flags |= IGB_TX_FLAGS_IPV4; first = tx_ring->next_to_use; - if (skb_is_gso(skb)) { - tso = igb_tso(tx_ring, skb, tx_flags, &hdr_len); - if (tso < 0) { - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; - } - } + tso = igb_tso(tx_ring, skb, tx_flags, &hdr_len); - if (tso) + if (tso < 0) + goto out_drop; + else if (tso) tx_flags |= IGB_TX_FLAGS_TSO; else if (igb_tx_csum(tx_ring, skb, tx_flags) && (skb->ip_summed == CHECKSUM_PARTIAL)) @@ -4366,6 +4341,10 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); return NETDEV_TX_OK; + +out_drop: + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, From 8542db05dbc99f603889c349e5cf8f3f81cddbf5 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 26 Aug 2011 07:44:43 +0000 Subject: [PATCH 09/13] igb: Make first and tx_buffer_info->next_to_watch into pointers This change converts two tx_buffer_info index values into pointers. The advantage to this is that we reduce unnecessary computations and in the case of next_to_watch we get an added bonus of the value being able to provide additional information as a NULL value indicates it is unset versus a 0 not having any meaning for the index value. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 66 ++++++++++++----------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 56c68fc8bca2..7185667bf261 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -133,7 +133,7 @@ struct vf_data_storage { /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer */ struct igb_tx_buffer { - u16 next_to_watch; + union e1000_adv_tx_desc *next_to_watch; unsigned long time_stamp; dma_addr_t dma; u32 length; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a0bb81d9ef1b..edc2caeb6c16 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -381,7 +381,7 @@ static void igb_dump(struct igb_adapter *adapter) struct igb_tx_buffer *buffer_info; tx_ring = adapter->tx_ring[n]; buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean]; - printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n", + printk(KERN_INFO " %5d %5X %5X %016llX %04X %p %016llX\n", n, tx_ring->next_to_use, tx_ring->next_to_clean, (u64)buffer_info->dma, buffer_info->length, @@ -421,7 +421,7 @@ static void igb_dump(struct igb_adapter *adapter) buffer_info = &tx_ring->tx_buffer_info[i]; u0 = (struct my_u0 *)tx_desc; printk(KERN_INFO "T [0x%03X] %016llX %016llX %016llX" - " %04X %3X %016llX %p", i, + " %04X %p %016llX %p", i, le64_to_cpu(u0->a), le64_to_cpu(u0->b), (u64)buffer_info->dma, @@ -3161,7 +3161,7 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring, } buffer_info->time_stamp = 0; buffer_info->length = 0; - buffer_info->next_to_watch = 0; + buffer_info->next_to_watch = NULL; buffer_info->mapped_as_page = false; } @@ -4107,7 +4107,7 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, #define IGB_MAX_DATA_PER_TXD (1<dev; @@ -4121,7 +4121,6 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD); buffer_info->length = hlen; - buffer_info->next_to_watch = i; buffer_info->dma = dma_map_single(dev, skb->data, hlen, DMA_TO_DEVICE); if (dma_mapping_error(dev, buffer_info->dma)) @@ -4139,7 +4138,6 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(len >= IGB_MAX_DATA_PER_TXD); buffer_info->length = len; - buffer_info->next_to_watch = i; buffer_info->mapped_as_page = true; buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE); @@ -4153,8 +4151,12 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, /* multiply data chunks by size of headers */ buffer_info->bytecount = ((gso_segs - 1) * hlen) + skb->len; buffer_info->gso_segs = gso_segs; - tx_ring->tx_buffer_info[first].next_to_watch = i; - tx_ring->tx_buffer_info[first].time_stamp = jiffies; + + /* set the timestamp */ + first->time_stamp = jiffies; + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = IGB_TX_DESC(tx_ring, i); return ++count; @@ -4165,7 +4167,6 @@ dma_error: buffer_info->dma = 0; buffer_info->time_stamp = 0; buffer_info->length = 0; - buffer_info->next_to_watch = 0; buffer_info->mapped_as_page = false; /* clear timestamp and dma mappings for remaining portion of packet */ @@ -4283,9 +4284,9 @@ static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size) netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { + struct igb_tx_buffer *first; int tso, count; u32 tx_flags = 0; - u16 first; u8 hdr_len = 0; /* need: 1 descriptor per page, @@ -4311,7 +4312,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, if (skb->protocol == htons(ETH_P_IP)) tx_flags |= IGB_TX_FLAGS_IPV4; - first = tx_ring->next_to_use; + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; tso = igb_tso(tx_ring, skb, tx_flags, &hdr_len); @@ -4330,8 +4332,8 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, count = igb_tx_map(tx_ring, skb, first); if (!count) { dev_kfree_skb_any(skb); - tx_ring->tx_buffer_info[first].time_stamp = 0; - tx_ring->next_to_use = first; + first->time_stamp = 0; + tx_ring->next_to_use = first - tx_ring->tx_buffer_info; return NETDEV_TX_OK; } @@ -5568,29 +5570,34 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *tx_ring = q_vector->tx_ring; struct igb_tx_buffer *tx_buffer; - union e1000_adv_tx_desc *tx_desc; + union e1000_adv_tx_desc *tx_desc, *eop_desc; unsigned int total_bytes = 0, total_packets = 0; unsigned int budget = q_vector->tx_work_limit; - u16 i = tx_ring->next_to_clean; + unsigned int i = tx_ring->next_to_clean; if (test_bit(__IGB_DOWN, &adapter->state)) return true; tx_buffer = &tx_ring->tx_buffer_info[i]; tx_desc = IGB_TX_DESC(tx_ring, i); + i -= tx_ring->count; for (; budget; budget--) { - u16 eop = tx_buffer->next_to_watch; - union e1000_adv_tx_desc *eop_desc; + eop_desc = tx_buffer->next_to_watch; - eop_desc = IGB_TX_DESC(tx_ring, eop); + /* prevent any other reads prior to eop_desc */ + rmb(); + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; /* if DD is not set pending work has not been completed */ if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) break; - /* prevent any other reads prior to eop_desc being verified */ - rmb(); + /* clear next_to_watch to prevent false hangs */ + tx_buffer->next_to_watch = NULL; do { tx_desc->wb.status = 0; @@ -5607,14 +5614,15 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_buffer++; tx_desc++; i++; - if (unlikely(i == tx_ring->count)) { - i = 0; + if (unlikely(!i)) { + i -= tx_ring->count; tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } } while (eop_desc); } + i += tx_ring->count; tx_ring->next_to_clean = i; u64_stats_update_begin(&tx_ring->tx_syncp); tx_ring->tx_stats.bytes += total_bytes; @@ -5625,16 +5633,14 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) if (tx_ring->detect_tx_hung) { struct e1000_hw *hw = &adapter->hw; - u16 eop = tx_ring->tx_buffer_info[i].next_to_watch; - union e1000_adv_tx_desc *eop_desc; - eop_desc = IGB_TX_DESC(tx_ring, eop); + eop_desc = tx_buffer->next_to_watch; /* Detect a transmit hang in hardware, this serializes the * check with the clearing of time_stamp and movement of i */ tx_ring->detect_tx_hung = false; - if (tx_ring->tx_buffer_info[i].time_stamp && - time_after(jiffies, tx_ring->tx_buffer_info[i].time_stamp + + if (eop_desc && + time_after(jiffies, tx_buffer->time_stamp + (adapter->tx_timeout_factor * HZ)) && !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) { @@ -5648,7 +5654,7 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) " next_to_clean <%x>\n" "buffer_info[next_to_clean]\n" " time_stamp <%lx>\n" - " next_to_watch <%x>\n" + " next_to_watch <%p>\n" " jiffies <%lx>\n" " desc.status <%x>\n", tx_ring->queue_index, @@ -5656,8 +5662,8 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) readl(tx_ring->tail), tx_ring->next_to_use, tx_ring->next_to_clean, - tx_ring->tx_buffer_info[eop].time_stamp, - eop, + tx_buffer->time_stamp, + eop_desc, jiffies, eop_desc->wb.status); netif_stop_subqueue(tx_ring->netdev, From e032afc80ca16e6b62cfe5938977bf678eec0dd0 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 26 Aug 2011 07:44:48 +0000 Subject: [PATCH 10/13] igb: Create separate functions for generating cmd_type and olinfo This change is meant to improve the readability of the driver by separating out the cmd_type configuration and the olinfo configuration into their own functions. By doing this it is much easier to determine which ingredients go into setting up these to portions of the descriptor. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/e1000_82575.h | 2 + drivers/net/ethernet/intel/igb/igb.h | 2 +- drivers/net/ethernet/intel/igb/igb_main.c | 105 +++++++++++-------- 3 files changed, 65 insertions(+), 44 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 786e110011a3..08a757eb6608 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -130,7 +130,9 @@ union e1000_adv_tx_desc { #define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */ #define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ #define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ +#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ #define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ #define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ #define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ #define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 7185667bf261..160811053d0f 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -231,7 +231,7 @@ struct igb_ring { #define IGB_RING_FLAG_TX_CTX_IDX 0x00000001 /* HW requires context index */ -#define IGB_ADVTXD_DCMD (E1000_TXD_CMD_EOP | E1000_TXD_CMD_RS) +#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) #define IGB_RX_DESC(R, i) \ (&(((union e1000_adv_rx_desc *)((R)->desc))[i])) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index edc2caeb6c16..2c61ec46586f 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4103,6 +4103,50 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, return (skb->ip_summed == CHECKSUM_PARTIAL); } +static __le32 igb_tx_cmd_type(u32 tx_flags) +{ + /* set type for advanced descriptor with frame checksum insertion */ + __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_IFCS | + E1000_ADVTXD_DCMD_DEXT); + + /* set HW vlan bit if vlan is present */ + if (tx_flags & IGB_TX_FLAGS_VLAN) + cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE); + + /* set timestamp bit if present */ + if (tx_flags & IGB_TX_FLAGS_TSTAMP) + cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP); + + /* set segmentation bits for TSO */ + if (tx_flags & IGB_TX_FLAGS_TSO) + cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE); + + return cmd_type; +} + +static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen, + struct igb_ring *tx_ring) +{ + u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; + + /* 82575 requires a unique index per ring if any offload is enabled */ + if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) && + (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)) + olinfo_status |= tx_ring->reg_idx << 4; + + /* insert L4 checksum */ + if (tx_flags & IGB_TX_FLAGS_CSUM) { + olinfo_status |= E1000_TXD_POPTS_TXSM << 8; + + /* insert IPv4 checksum */ + if (tx_flags & IGB_TX_FLAGS_IPV4) + olinfo_status |= E1000_TXD_POPTS_IXSM << 8; + } + + return cpu_to_le32(olinfo_status); +} + #define IGB_MAX_TXD_PWR 16 #define IGB_MAX_DATA_PER_TXD (1<next_to_use; - cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS | - E1000_ADVTXD_DCMD_DEXT); - - if (tx_flags & IGB_TX_FLAGS_VLAN) - cmd_type_len |= E1000_ADVTXD_DCMD_VLE; - - if (tx_flags & IGB_TX_FLAGS_TSTAMP) - cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP; - - if (tx_flags & IGB_TX_FLAGS_TSO) { - cmd_type_len |= E1000_ADVTXD_DCMD_TSE; - - /* insert tcp checksum */ - olinfo_status |= E1000_TXD_POPTS_TXSM << 8; - - /* insert ip checksum */ - if (tx_flags & IGB_TX_FLAGS_IPV4) - olinfo_status |= E1000_TXD_POPTS_IXSM << 8; - - } else if (tx_flags & IGB_TX_FLAGS_CSUM) { - olinfo_status |= E1000_TXD_POPTS_TXSM << 8; - } - - if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) && - (tx_flags & (IGB_TX_FLAGS_CSUM | - IGB_TX_FLAGS_TSO | - IGB_TX_FLAGS_VLAN))) - olinfo_status |= tx_ring->reg_idx << 4; - - olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT); + cmd_type = igb_tx_cmd_type(tx_flags); + olinfo_status = igb_tx_olinfo_status(tx_flags, + paylen - hdr_len, + tx_ring); do { buffer_info = &tx_ring->tx_buffer_info[i]; tx_desc = IGB_TX_DESC(tx_ring, i); tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma); - tx_desc->read.cmd_type_len = - cpu_to_le32(cmd_type_len | buffer_info->length); - tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); + tx_desc->read.cmd_type_len = cmd_type | + cpu_to_le32(buffer_info->length); + tx_desc->read.olinfo_status = olinfo_status; count--; i++; if (i == tx_ring->count) i = 0; } while (count > 0); - tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD); + tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_TXD_DCMD); /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -4309,21 +4327,22 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); } - if (skb->protocol == htons(ETH_P_IP)) - tx_flags |= IGB_TX_FLAGS_IPV4; - /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; tso = igb_tso(tx_ring, skb, tx_flags, &hdr_len); - if (tso < 0) + if (tso < 0) { goto out_drop; - else if (tso) - tx_flags |= IGB_TX_FLAGS_TSO; - else if (igb_tx_csum(tx_ring, skb, tx_flags) && - (skb->ip_summed == CHECKSUM_PARTIAL)) + } else if (tso) { + tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM; + if (skb->protocol == htons(ETH_P_IP)) + tx_flags |= IGB_TX_FLAGS_IPV4; + + } else if (igb_tx_csum(tx_ring, skb, tx_flags) && + (skb->ip_summed == CHECKSUM_PARTIAL)) { tx_flags |= IGB_TX_FLAGS_CSUM; + } /* * count reflects descriptors mapped, if 0 or less then mapping error From 31f6adbb352ae118550ab51f2a5ed1023ec7eb03 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 26 Aug 2011 07:44:53 +0000 Subject: [PATCH 11/13] igb: Cleanup protocol handling in transmit path This change is meant to cleanup the protocol handling in the transmit path so that it correctly offloads software VLAN tagged frames. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb_main.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 2c61ec46586f..3ebeb3e51a1d 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3987,8 +3987,8 @@ void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); } -static inline int igb_tso(struct igb_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags, u8 *hdr_len) +static inline int igb_tso(struct igb_ring *tx_ring, struct sk_buff *skb, + u32 tx_flags, __be16 protocol, u8 *hdr_len) { int err; u32 vlan_macip_lens, type_tucmd; @@ -4006,7 +4006,7 @@ static inline int igb_tso(struct igb_ring *tx_ring, /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; - if (skb->protocol == htons(ETH_P_IP)) { + if (protocol == __constant_htons(ETH_P_IP)) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = 0; iph->check = 0; @@ -4039,8 +4039,8 @@ static inline int igb_tso(struct igb_ring *tx_ring, return 1; } -static inline bool igb_tx_csum(struct igb_ring *tx_ring, - struct sk_buff *skb, u32 tx_flags) +static inline bool igb_tx_csum(struct igb_ring *tx_ring, struct sk_buff *skb, + u32 tx_flags, __be16 protocol) { u32 vlan_macip_lens = 0; u32 mss_l4len_idx = 0; @@ -4051,7 +4051,7 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, return false; } else { u8 l4_hdr = 0; - switch (skb->protocol) { + switch (protocol) { case __constant_htons(ETH_P_IP): vlan_macip_lens |= skb_network_header_len(skb); type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; @@ -4065,7 +4065,7 @@ static inline bool igb_tx_csum(struct igb_ring *tx_ring, if (unlikely(net_ratelimit())) { dev_warn(tx_ring->dev, "partial checksum but proto=%x!\n", - skb->protocol); + protocol); } break; } @@ -4305,6 +4305,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_tx_buffer *first; int tso, count; u32 tx_flags = 0; + __be16 protocol = vlan_get_protocol(skb); u8 hdr_len = 0; /* need: 1 descriptor per page, @@ -4330,16 +4331,14 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, /* record the location of the first descriptor for this packet */ first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; - tso = igb_tso(tx_ring, skb, tx_flags, &hdr_len); - + tso = igb_tso(tx_ring, skb, tx_flags, protocol, &hdr_len); if (tso < 0) { goto out_drop; } else if (tso) { tx_flags |= IGB_TX_FLAGS_TSO | IGB_TX_FLAGS_CSUM; - if (skb->protocol == htons(ETH_P_IP)) + if (protocol == htons(ETH_P_IP)) tx_flags |= IGB_TX_FLAGS_IPV4; - - } else if (igb_tx_csum(tx_ring, skb, tx_flags) && + } else if (igb_tx_csum(tx_ring, skb, tx_flags, protocol) && (skb->ip_summed == CHECKSUM_PARTIAL)) { tx_flags |= IGB_TX_FLAGS_CSUM; } From 2bbfebe2db3453f9ad5a3de56b77d383b91a7829 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 26 Aug 2011 07:44:59 +0000 Subject: [PATCH 12/13] igb: Combine all flag info fields into a single tx_flags structure This change is meant to combine all of the TX flags fields into one u32 flags field so that it can be stored into the tx_buffer_info structure. This includes the time stamp flag as well as mapped_as_page flag info. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 9 +++++++++ drivers/net/ethernet/intel/igb/igb_main.c | 24 ++++++++--------------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 160811053d0f..b71d1863e551 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -130,6 +130,15 @@ struct vf_data_storage { #define IGB_MNG_VLAN_NONE -1 +#define IGB_TX_FLAGS_CSUM 0x00000001 +#define IGB_TX_FLAGS_VLAN 0x00000002 +#define IGB_TX_FLAGS_TSO 0x00000004 +#define IGB_TX_FLAGS_IPV4 0x00000008 +#define IGB_TX_FLAGS_TSTAMP 0x00000010 +#define IGB_TX_FLAGS_MAPPED_AS_PAGE 0x00000020 +#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IGB_TX_FLAGS_VLAN_SHIFT 16 + /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer */ struct igb_tx_buffer { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 3ebeb3e51a1d..dc93d64cf165 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3143,7 +3143,7 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring, struct igb_tx_buffer *buffer_info) { if (buffer_info->dma) { - if (buffer_info->mapped_as_page) + if (buffer_info->tx_flags & IGB_TX_FLAGS_MAPPED_AS_PAGE) dma_unmap_page(tx_ring->dev, buffer_info->dma, buffer_info->length, @@ -3162,7 +3162,6 @@ void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring, buffer_info->time_stamp = 0; buffer_info->length = 0; buffer_info->next_to_watch = NULL; - buffer_info->mapped_as_page = false; } /** @@ -3955,14 +3954,6 @@ set_itr_now: } } -#define IGB_TX_FLAGS_CSUM 0x00000001 -#define IGB_TX_FLAGS_VLAN 0x00000002 -#define IGB_TX_FLAGS_TSO 0x00000004 -#define IGB_TX_FLAGS_IPV4 0x00000008 -#define IGB_TX_FLAGS_TSTAMP 0x00000010 -#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 -#define IGB_TX_FLAGS_VLAN_SHIFT 16 - void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, u32 type_tucmd, u32 mss_l4len_idx) { @@ -4151,7 +4142,7 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen, #define IGB_MAX_DATA_PER_TXD (1<dev; @@ -4165,11 +4156,14 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD); buffer_info->length = hlen; + buffer_info->tx_flags = tx_flags; buffer_info->dma = dma_map_single(dev, skb->data, hlen, DMA_TO_DEVICE); if (dma_mapping_error(dev, buffer_info->dma)) goto dma_error; + tx_flags |= IGB_TX_FLAGS_MAPPED_AS_PAGE; + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f]; unsigned int len = frag->size; @@ -4182,7 +4176,7 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, buffer_info = &tx_ring->tx_buffer_info[i]; BUG_ON(len >= IGB_MAX_DATA_PER_TXD); buffer_info->length = len; - buffer_info->mapped_as_page = true; + buffer_info->tx_flags = tx_flags; buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE); if (dma_mapping_error(dev, buffer_info->dma)) @@ -4191,7 +4185,6 @@ static inline int igb_tx_map(struct igb_ring *tx_ring, struct sk_buff *skb, } buffer_info->skb = skb; - buffer_info->tx_flags = skb_shinfo(skb)->tx_flags; /* multiply data chunks by size of headers */ buffer_info->bytecount = ((gso_segs - 1) * hlen) + skb->len; buffer_info->gso_segs = gso_segs; @@ -4211,7 +4204,6 @@ dma_error: buffer_info->dma = 0; buffer_info->time_stamp = 0; buffer_info->length = 0; - buffer_info->mapped_as_page = false; /* clear timestamp and dma mappings for remaining portion of packet */ while (count--) { @@ -4347,7 +4339,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, * count reflects descriptors mapped, if 0 or less then mapping error * has occurred and we need to rewind the descriptor queue */ - count = igb_tx_map(tx_ring, skb, first); + count = igb_tx_map(tx_ring, skb, first, tx_flags); if (!count) { dev_kfree_skb_any(skb); first->time_stamp = 0; @@ -5567,7 +5559,7 @@ static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, u64 regval; /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) || + if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) || !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) return; From ebe42d169bd0b4c3e2e355374d07ba7d51744601 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Fri, 26 Aug 2011 07:45:09 +0000 Subject: [PATCH 13/13] igb: consolidate creation of Tx buffer info and data descriptor This change will combine the writes of tx_buffer_info and the Tx data descriptors into a single function. The advantage of this is that we can avoid needless memory reads from the buffer info struct and speed things up by keeping the accesses to the local registers. Signed-off-by: Alexander Duyck Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/igb/igb.h | 8 +- drivers/net/ethernet/intel/igb/igb_main.c | 318 ++++++++++++---------- 2 files changed, 184 insertions(+), 142 deletions(-) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index b71d1863e551..77793a9debcc 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -135,7 +135,6 @@ struct vf_data_storage { #define IGB_TX_FLAGS_TSO 0x00000004 #define IGB_TX_FLAGS_IPV4 0x00000008 #define IGB_TX_FLAGS_TSTAMP 0x00000010 -#define IGB_TX_FLAGS_MAPPED_AS_PAGE 0x00000020 #define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 #define IGB_TX_FLAGS_VLAN_SHIFT 16 @@ -144,13 +143,12 @@ struct vf_data_storage { struct igb_tx_buffer { union e1000_adv_tx_desc *next_to_watch; unsigned long time_stamp; - dma_addr_t dma; - u32 length; - u32 tx_flags; struct sk_buff *skb; unsigned int bytecount; u16 gso_segs; - u8 mapped_as_page; + dma_addr_t dma; + u32 length; + u32 tx_flags; }; struct igb_rx_buffer { diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index dc93d64cf165..862dd7c0cc70 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -3139,29 +3139,26 @@ static void igb_free_all_tx_resources(struct igb_adapter *adapter) igb_free_tx_resources(adapter->tx_ring[i]); } -void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring, - struct igb_tx_buffer *buffer_info) +void igb_unmap_and_free_tx_resource(struct igb_ring *ring, + struct igb_tx_buffer *tx_buffer) { - if (buffer_info->dma) { - if (buffer_info->tx_flags & IGB_TX_FLAGS_MAPPED_AS_PAGE) - dma_unmap_page(tx_ring->dev, - buffer_info->dma, - buffer_info->length, - DMA_TO_DEVICE); - else - dma_unmap_single(tx_ring->dev, - buffer_info->dma, - buffer_info->length, - DMA_TO_DEVICE); - buffer_info->dma = 0; + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->dma) + dma_unmap_single(ring->dev, + tx_buffer->dma, + tx_buffer->length, + DMA_TO_DEVICE); + } else if (tx_buffer->dma) { + dma_unmap_page(ring->dev, + tx_buffer->dma, + tx_buffer->length, + DMA_TO_DEVICE); } - if (buffer_info->skb) { - dev_kfree_skb_any(buffer_info->skb); - buffer_info->skb = NULL; - } - buffer_info->time_stamp = 0; - buffer_info->length = 0; - buffer_info->next_to_watch = NULL; + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + tx_buffer->dma = 0; + /* buffer_info must be completely set up in the transmit path */ } /** @@ -4138,124 +4135,153 @@ static __le32 igb_tx_olinfo_status(u32 tx_flags, unsigned int paylen, return cpu_to_le32(olinfo_status); } -#define IGB_MAX_TXD_PWR 16 -#define IGB_MAX_DATA_PER_TXD (1<dev; - unsigned int hlen = skb_headlen(skb); - unsigned int count = 0, i; - unsigned int f; - u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1; + struct igb_tx_buffer *tx_buffer_info; + union e1000_adv_tx_desc *tx_desc; + dma_addr_t dma; + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned int data_len = skb->data_len; + unsigned int size = skb_headlen(skb); + unsigned int paylen = skb->len - hdr_len; + __le32 cmd_type; + u16 i = tx_ring->next_to_use; + u16 gso_segs; - i = tx_ring->next_to_use; + if (tx_flags & IGB_TX_FLAGS_TSO) + gso_segs = skb_shinfo(skb)->gso_segs; + else + gso_segs = 1; - buffer_info = &tx_ring->tx_buffer_info[i]; - BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD); - buffer_info->length = hlen; - buffer_info->tx_flags = tx_flags; - buffer_info->dma = dma_map_single(dev, skb->data, hlen, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, buffer_info->dma)) + /* multiply data chunks by size of headers */ + first->bytecount = paylen + (gso_segs * hdr_len); + first->gso_segs = gso_segs; + first->skb = skb; + + tx_desc = IGB_TX_DESC(tx_ring, i); + + tx_desc->read.olinfo_status = + igb_tx_olinfo_status(tx_flags, paylen, tx_ring); + + cmd_type = igb_tx_cmd_type(tx_flags); + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; - tx_flags |= IGB_TX_FLAGS_MAPPED_AS_PAGE; + /* record length, and DMA address */ + first->length = size; + first->dma = dma; + first->tx_flags = tx_flags; + tx_desc->read.buffer_addr = cpu_to_le64(dma); - for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f]; - unsigned int len = frag->size; + for (;;) { + while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { + tx_desc->read.cmd_type_len = + cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGB_TX_DESC(tx_ring, 0); + i = 0; + } + + dma += IGB_MAX_DATA_PER_TXD; + size -= IGB_MAX_DATA_PER_TXD; + + tx_desc->read.olinfo_status = 0; + tx_desc->read.buffer_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + + tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size); - count++; i++; - if (i == tx_ring->count) + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGB_TX_DESC(tx_ring, 0); i = 0; + } - buffer_info = &tx_ring->tx_buffer_info[i]; - BUG_ON(len >= IGB_MAX_DATA_PER_TXD); - buffer_info->length = len; - buffer_info->tx_flags = tx_flags; - buffer_info->dma = skb_frag_dma_map(dev, frag, 0, len, - DMA_TO_DEVICE); - if (dma_mapping_error(dev, buffer_info->dma)) + size = frag->size; + data_len -= size; + + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, + size, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) goto dma_error; + tx_buffer_info = &tx_ring->tx_buffer_info[i]; + tx_buffer_info->length = size; + tx_buffer_info->dma = dma; + + tx_desc->read.olinfo_status = 0; + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + frag++; } - buffer_info->skb = skb; - /* multiply data chunks by size of headers */ - buffer_info->bytecount = ((gso_segs - 1) * hlen) + skb->len; - buffer_info->gso_segs = gso_segs; + /* write last descriptor with RS and EOP bits */ + cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD); + tx_desc->read.cmd_type_len = cmd_type; /* set the timestamp */ first->time_stamp = jiffies; - /* set next_to_watch value indicating a packet is present */ - first->next_to_watch = IGB_TX_DESC(tx_ring, i); - - return ++count; - -dma_error: - dev_err(dev, "TX DMA map failed\n"); - - /* clear timestamp and dma mappings for failed buffer_info mapping */ - buffer_info->dma = 0; - buffer_info->time_stamp = 0; - buffer_info->length = 0; - - /* clear timestamp and dma mappings for remaining portion of packet */ - while (count--) { - if (i == 0) - i = tx_ring->count; - i--; - buffer_info = &tx_ring->tx_buffer_info[i]; - igb_unmap_and_free_tx_resource(tx_ring, buffer_info); - } - - return 0; -} - -static inline void igb_tx_queue(struct igb_ring *tx_ring, - u32 tx_flags, int count, u32 paylen, - u8 hdr_len) -{ - union e1000_adv_tx_desc *tx_desc; - struct igb_tx_buffer *buffer_info; - __le32 olinfo_status, cmd_type; - unsigned int i = tx_ring->next_to_use; - - cmd_type = igb_tx_cmd_type(tx_flags); - olinfo_status = igb_tx_olinfo_status(tx_flags, - paylen - hdr_len, - tx_ring); - - do { - buffer_info = &tx_ring->tx_buffer_info[i]; - tx_desc = IGB_TX_DESC(tx_ring, i); - tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma); - tx_desc->read.cmd_type_len = cmd_type | - cpu_to_le32(buffer_info->length); - tx_desc->read.olinfo_status = olinfo_status; - count--; - i++; - if (i == tx_ring->count) - i = 0; - } while (count > 0); - - tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_TXD_DCMD); - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. (Only - * applicable for weak-ordered memory model archs, - * such as IA-64). */ + /* + * Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. (Only applicable for weak-ordered + * memory model archs, such as IA-64). + * + * We also need this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ wmb(); + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + i++; + if (i == tx_ring->count) + i = 0; + tx_ring->next_to_use = i; + writel(i, tx_ring->tail); + /* we need this if more than one processor can write to our tail * at a time, it syncronizes IO on IA64/Altix systems */ mmiowb(); + + return; + +dma_error: + dev_err(tx_ring->dev, "TX DMA map failed\n"); + + /* clear dma mappings for failed tx_buffer_info map */ + for (;;) { + tx_buffer_info = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); + if (tx_buffer_info == first) + break; + if (i == 0) + i = tx_ring->count; + i--; + } + + tx_ring->next_to_use = i; } static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size) @@ -4295,7 +4321,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, struct igb_ring *tx_ring) { struct igb_tx_buffer *first; - int tso, count; + int tso; u32 tx_flags = 0; __be16 protocol = vlan_get_protocol(skb); u8 hdr_len = 0; @@ -4335,19 +4361,7 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, tx_flags |= IGB_TX_FLAGS_CSUM; } - /* - * count reflects descriptors mapped, if 0 or less then mapping error - * has occurred and we need to rewind the descriptor queue - */ - count = igb_tx_map(tx_ring, skb, first, tx_flags); - if (!count) { - dev_kfree_skb_any(skb); - first->time_stamp = 0; - tx_ring->next_to_use = first - tx_ring->tx_buffer_info; - return NETDEV_TX_OK; - } - - igb_tx_queue(tx_ring, tx_flags, count, skb->len, hdr_len); + igb_tx_map(tx_ring, skb, first, tx_flags, hdr_len); /* Make sure there is space in the ring for the next send. */ igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4); @@ -5609,17 +5623,26 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) /* clear next_to_watch to prevent false hangs */ tx_buffer->next_to_watch = NULL; - do { - tx_desc->wb.status = 0; - if (likely(tx_desc == eop_desc)) { - eop_desc = NULL; + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; - total_bytes += tx_buffer->bytecount; - total_packets += tx_buffer->gso_segs; - igb_tx_hwtstamp(q_vector, tx_buffer); - } + /* retrieve hardware timestamp */ + igb_tx_hwtstamp(q_vector, tx_buffer); - igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + /* free the skb */ + dev_kfree_skb_any(tx_buffer->skb); + tx_buffer->skb = NULL; + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + tx_buffer->dma, + tx_buffer->length, + DMA_TO_DEVICE); + + /* clear last DMA location and unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer->dma = 0; tx_buffer++; tx_desc++; @@ -5629,7 +5652,28 @@ static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) tx_buffer = tx_ring->tx_buffer_info; tx_desc = IGB_TX_DESC(tx_ring, 0); } - } while (eop_desc); + + /* unmap any remaining paged data */ + if (tx_buffer->dma) { + dma_unmap_page(tx_ring->dev, + tx_buffer->dma, + tx_buffer->length, + DMA_TO_DEVICE); + } + } + + /* clear last DMA location */ + tx_buffer->dma = 0; + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } } i += tx_ring->count;