From d5bc77a223b0e9b9dfb002048d2b34a79e7d0b48 Mon Sep 17 00:00:00 2001 From: Dean Nelson Date: Fri, 16 Sep 2011 16:52:54 +0000 Subject: [PATCH 1/8] e1000: don't enable dma receives until after dma address has been setup Doing an 'ifconfig ethN down' followed by an 'ifconfig ethN up' on a qemu-kvm guest system configured with two e1000 NICs can result in an 'unable to handle kernel paging request at 0000000100000000' or 'bad page map in process ...' or something similar. These result from a 4096-byte page being corrupted with the following two-word pattern (16-bytes) repeated throughout the entire page: 0x0000000000000000 0x0000000100000000 There can be other bits set as well. What is a constant is that the 2nd word has the 32nd bit set. So one could see: : 0x0000000000000000 0x0000000100000000 0x0000000000000000 0x0000000172adc067 <<< bad pte 0x800000006ec60067 0x0000000700000040 0x0000000000000000 0x0000000100000000 : Which came from from a process' page table I dumped out when the marked line was seen as bad by print_bad_pte(). The repeating pattern represents the e1000's two-word receive descriptor: struct e1000_rx_desc { __le64 buffer_addr; /* Address of the descriptor's data buffer */ __le16 length; /* Length of data DMAed into data buffer */ __le16 csum; /* Packet checksum */ u8 status; /* Descriptor status */ u8 errors; /* Descriptor Errors */ __le16 special; }; And the 32nd bit of the 2nd word maps to the 'u8 status' member, and corresponds to E1000_RXD_STAT_DD which indicates the descriptor is done. The corruption appears to result from the following... . An 'ifconfig ethN down' gets us into e1000_close(), which through a number of subfunctions results in: 1. E1000_RCTL_EN being cleared in RCTL register. [e1000_down()] 2. dma_free_coherent() being called. [e1000_free_rx_resources()] . An 'ifconfig ethN up' gets us into e1000_open(), which through a number of subfunctions results in: 1. dma_alloc_coherent() being called. [e1000_setup_rx_resources()] 2. E1000_RCTL_EN being set in RCTL register. [e1000_setup_rctl()] 3. E1000_RCTL_EN being cleared in RCTL register. [e1000_configure_rx()] 4. RDLEN, RDBAH and RDBAL registers being set to reflect the dma page allocated in step 1. [e1000_configure_rx()] 5. E1000_RCTL_EN being set in RCTL register. [e1000_configure_rx()] During the 'ifconfig ethN up' there is a window opened, starting in step 2 where the receives are enabled up until they are disabled in step 3, in which the address of the receive descriptor dma page known by the NIC is still the previous one which was freed during the 'ifconfig ethN down'. If this memory has been reallocated for some other use and the NIC feels so inclined, it will write to that former dma page with predictably unpleasant results. I realize that in the guest, we're dealing with an e1000 NIC that is software emulated by qemu-kvm. The problem doesn't appear to occur on bare-metal. Andy suspects that this is because in the emulator link-up is essentially instant and traffic can start flowing immediately. Whereas on bare-metal, link-up usually seems to take at least a few milliseconds. And this might be enough to prevent traffic from flowing into the device inside the window where E1000_RCTL_EN is set. So perhaps a modification needs to be made to the qemu-kvm e1000 NIC emulator to delay the link-up. But in defense of the emulator, it seems like a bad idea to enable dma operations before the address of the memory to be involved has been made known. The following patch no longer enables receives in e1000_setup_rctl() but leaves them however they were. It only enables receives in e1000_configure_rx(), and only after the dma address has been made known to the hardware. There are two places where e1000_setup_rctl() gets called. The one in e1000_configure() is followed immediately by a call to e1000_configure_rx(), so there's really no change functionally (except for the removal of the problem window. The other is in __e1000_shutdown() and is not followed by a call to e1000_configure_rx(), so there is a change functionally. But consider... . An 'ifconfig ethN down' (just as described above). . A 'suspend' of the system, which (I'm assuming) will find its way into e1000_suspend() which calls __e1000_shutdown() resulting in: 1. E1000_RCTL_EN being set in RCTL register. [e1000_setup_rctl()] And again we've re-opened the problem window for some unknown amount of time. Signed-off-by: Andy Gospodarek Signed-off-by: Dean Nelson Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/e1000/e1000_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 27f586afcc34..4bbc05ad9ba1 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1814,8 +1814,8 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter) rctl &= ~(3 << E1000_RCTL_MO_SHIFT); - rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | - E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF | + rctl |= E1000_RCTL_BAM | E1000_RCTL_LBM_NO | + E1000_RCTL_RDMTS_HALF | (hw->mc_filter_type << E1000_RCTL_MO_SHIFT); if (hw->tbi_compatibility_on == 1) @@ -1917,7 +1917,7 @@ static void e1000_configure_rx(struct e1000_adapter *adapter) } /* Enable Receives */ - ew32(RCTL, rctl); + ew32(RCTL, rctl | E1000_RCTL_EN); } /** From dd1ed3b7bfed15f6162f63840941e9cf4f3611a1 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Sat, 27 Aug 2011 02:06:25 +0000 Subject: [PATCH 2/8] ixgbevf: Fix broken trunk vlan Changes to clean up the vlan rx path broke trunk vlan. Trunk vlans in a VF driver are those set using: "ip link set vf " Signed-off-by: Greg Rose CC: Jiri Pirko Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d72905b77aba..4930c4605493 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -293,12 +293,10 @@ static void ixgbevf_receive_skb(struct ixgbevf_q_vector *q_vector, { struct ixgbevf_adapter *adapter = q_vector->adapter; bool is_vlan = (status & IXGBE_RXD_STAT_VP); + u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan); - if (is_vlan) { - u16 tag = le16_to_cpu(rx_desc->wb.upper.vlan); - + if (is_vlan && test_bit(tag, adapter->active_vlans)) __vlan_hwaccel_put_tag(skb, tag); - } if (!(adapter->flags & IXGBE_FLAG_IN_NETPOLL)) napi_gro_receive(&q_vector->napi, skb); From d5bf4f67a6b414628dc95b9c4891525296c09a29 Mon Sep 17 00:00:00 2001 From: Emil Tantilov Date: Wed, 31 Aug 2011 00:01:16 +0000 Subject: [PATCH 3/8] ixgbe: Cleanup q_vector interrupt throttle rate logic This patch is meant to help cleanup the interrupt throttle rate logic by storing the interrupt throttle rate as a value in microseconds instead of interrupts per second. The advantage to this approach is that the value can now be stored in an 16 bit field and doesn't require as much math to flip the value back and forth since the hardware already used microseconds when setting the rate. Signed-off-by: Emil Tantilov Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe.h | 25 +-- .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c | 153 ++++++------------ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 77 ++++----- 3 files changed, 96 insertions(+), 159 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 1f4a4caeb00e..38940d72991d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -301,26 +301,29 @@ struct ixgbe_ring_container { */ struct ixgbe_q_vector { struct ixgbe_adapter *adapter; - unsigned int v_idx; /* index of q_vector within array, also used for - * finding the bit in EICR and friends that - * represents the vector for this ring */ #ifdef CONFIG_IXGBE_DCA int cpu; /* CPU for DCA */ #endif - struct napi_struct napi; + u16 v_idx; /* index of q_vector within array, also used for + * finding the bit in EICR and friends that + * represents the vector for this ring */ + u16 itr; /* Interrupt throttle rate written to EITR */ struct ixgbe_ring_container rx, tx; - u32 eitr; + + struct napi_struct napi; cpumask_var_t affinity_mask; char name[IFNAMSIZ + 9]; }; -/* Helper macros to switch between ints/sec and what the register uses. - * And yes, it's the same math going both ways. The lowest value - * supported by all of the ixgbe hardware is 8. +/* + * microsecond values for various ITR rates shifted by 2 to fit itr register + * with the first 3 bits reserved 0 */ -#define EITR_INTS_PER_SEC_TO_REG(_eitr) \ - ((_eitr) ? (1000000000 / ((_eitr) * 256)) : 8) -#define EITR_REG_TO_INTS_PER_SEC EITR_INTS_PER_SEC_TO_REG +#define IXGBE_MIN_RSC_ITR 24 +#define IXGBE_100K_ITR 40 +#define IXGBE_20K_ITR 200 +#define IXGBE_10K_ITR 400 +#define IXGBE_8K_ITR 500 static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index debcf5f350c7..ae9fba5d3036 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -2026,39 +2026,20 @@ static int ixgbe_get_coalesce(struct net_device *netdev, ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit; /* only valid if in constant ITR mode */ - switch (adapter->rx_itr_setting) { - case 0: - /* throttling disabled */ - ec->rx_coalesce_usecs = 0; - break; - case 1: - /* dynamic ITR mode */ - ec->rx_coalesce_usecs = 1; - break; - default: - /* fixed interrupt rate mode */ - ec->rx_coalesce_usecs = 1000000/adapter->rx_eitr_param; - break; - } + if (adapter->rx_itr_setting <= 1) + ec->rx_coalesce_usecs = adapter->rx_itr_setting; + else + ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; /* if in mixed tx/rx queues per vector mode, report only rx settings */ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count) return 0; /* only valid if in constant ITR mode */ - switch (adapter->tx_itr_setting) { - case 0: - /* throttling disabled */ - ec->tx_coalesce_usecs = 0; - break; - case 1: - /* dynamic ITR mode */ - ec->tx_coalesce_usecs = 1; - break; - default: - ec->tx_coalesce_usecs = 1000000/adapter->tx_eitr_param; - break; - } + if (adapter->tx_itr_setting <= 1) + ec->tx_coalesce_usecs = adapter->tx_itr_setting; + else + ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; return 0; } @@ -2077,10 +2058,9 @@ static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter, /* if interrupt rate is too high then disable RSC */ if (ec->rx_coalesce_usecs != 1 && - ec->rx_coalesce_usecs <= 1000000/IXGBE_MAX_RSC_INT_RATE) { + ec->rx_coalesce_usecs <= (IXGBE_MIN_RSC_ITR >> 2)) { if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { - e_info(probe, "rx-usecs set too low, " - "disabling RSC\n"); + e_info(probe, "rx-usecs set too low, disabling RSC\n"); adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; return true; } @@ -2088,8 +2068,7 @@ static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter, /* check the feature flag value and enable RSC if necessary */ if ((netdev->features & NETIF_F_LRO) && !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { - e_info(probe, "rx-usecs set to %d, " - "re-enabling RSC\n", + e_info(probe, "rx-usecs set to %d, re-enabling RSC\n", ec->rx_coalesce_usecs); adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; return true; @@ -2104,97 +2083,59 @@ static int ixgbe_set_coalesce(struct net_device *netdev, struct ixgbe_adapter *adapter = netdev_priv(netdev); struct ixgbe_q_vector *q_vector; int i; + int num_vectors; + u16 tx_itr_param, rx_itr_param; bool need_reset = false; /* don't accept tx specific changes if we've got mixed RxTx vectors */ if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count - && ec->tx_coalesce_usecs) + && ec->tx_coalesce_usecs) return -EINVAL; if (ec->tx_max_coalesced_frames_irq) adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq; - if (ec->rx_coalesce_usecs > 1) { - /* check the limits */ - if ((1000000/ec->rx_coalesce_usecs > IXGBE_MAX_INT_RATE) || - (1000000/ec->rx_coalesce_usecs < IXGBE_MIN_INT_RATE)) - return -EINVAL; + if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) || + (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2))) + return -EINVAL; - /* check the old value and enable RSC if necessary */ - need_reset = ixgbe_update_rsc(adapter, ec); + /* check the old value and enable RSC if necessary */ + need_reset = ixgbe_update_rsc(adapter, ec); - /* store the value in ints/second */ - adapter->rx_eitr_param = 1000000/ec->rx_coalesce_usecs; + if (ec->rx_coalesce_usecs > 1) + adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; + else + adapter->rx_itr_setting = ec->rx_coalesce_usecs; - /* static value of interrupt rate */ - adapter->rx_itr_setting = adapter->rx_eitr_param; - /* clear the lower bit as its used for dynamic state */ - adapter->rx_itr_setting &= ~1; - } else if (ec->rx_coalesce_usecs == 1) { - /* check the old value and enable RSC if necessary */ - need_reset = ixgbe_update_rsc(adapter, ec); + if (adapter->rx_itr_setting == 1) + rx_itr_param = IXGBE_20K_ITR; + else + rx_itr_param = adapter->rx_itr_setting; - /* 1 means dynamic mode */ - adapter->rx_eitr_param = 20000; - adapter->rx_itr_setting = 1; - } else { - /* check the old value and enable RSC if necessary */ - need_reset = ixgbe_update_rsc(adapter, ec); - /* - * any other value means disable eitr, which is best - * served by setting the interrupt rate very high - */ - adapter->rx_eitr_param = IXGBE_MAX_INT_RATE; - adapter->rx_itr_setting = 0; - } + if (ec->tx_coalesce_usecs > 1) + adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; + else + adapter->tx_itr_setting = ec->tx_coalesce_usecs; - if (ec->tx_coalesce_usecs > 1) { - /* - * don't have to worry about max_int as above because - * tx vectors don't do hardware RSC (an rx function) - */ - /* check the limits */ - if ((1000000/ec->tx_coalesce_usecs > IXGBE_MAX_INT_RATE) || - (1000000/ec->tx_coalesce_usecs < IXGBE_MIN_INT_RATE)) - return -EINVAL; + if (adapter->tx_itr_setting == 1) + tx_itr_param = IXGBE_10K_ITR; + else + tx_itr_param = adapter->tx_itr_setting; - /* store the value in ints/second */ - adapter->tx_eitr_param = 1000000/ec->tx_coalesce_usecs; + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + else + num_vectors = 1; - /* static value of interrupt rate */ - adapter->tx_itr_setting = adapter->tx_eitr_param; - - /* clear the lower bit as its used for dynamic state */ - adapter->tx_itr_setting &= ~1; - } else if (ec->tx_coalesce_usecs == 1) { - /* 1 means dynamic mode */ - adapter->tx_eitr_param = 10000; - adapter->tx_itr_setting = 1; - } else { - adapter->tx_eitr_param = IXGBE_MAX_INT_RATE; - adapter->tx_itr_setting = 0; - } - - /* MSI/MSIx Interrupt Mode */ - if (adapter->flags & - (IXGBE_FLAG_MSIX_ENABLED | IXGBE_FLAG_MSI_ENABLED)) { - int num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; - for (i = 0; i < num_vectors; i++) { - q_vector = adapter->q_vector[i]; - if (q_vector->tx.count && !q_vector->rx.count) - /* tx only */ - q_vector->eitr = adapter->tx_eitr_param; - else - /* rx only or mixed */ - q_vector->eitr = adapter->rx_eitr_param; - q_vector->tx.work_limit = adapter->tx_work_limit; - ixgbe_write_eitr(q_vector); - } - /* Legacy Interrupt Mode */ - } else { - q_vector = adapter->q_vector[0]; - q_vector->eitr = adapter->rx_eitr_param; + for (i = 0; i < num_vectors; i++) { + q_vector = adapter->q_vector[i]; q_vector->tx.work_limit = adapter->tx_work_limit; + if (q_vector->tx.count && !q_vector->rx.count) + /* tx only */ + q_vector->itr = tx_itr_param; + else + /* rx only or mixed */ + q_vector->itr = rx_itr_param; ixgbe_write_eitr(q_vector); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index c26ea9437fed..3594b09f4993 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1500,12 +1500,19 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) for (ring = q_vector->tx.ring; ring != NULL; ring = ring->next) ixgbe_set_ivar(adapter, 1, ring->reg_idx, v_idx); - if (q_vector->tx.ring && !q_vector->rx.ring) - /* tx only */ - q_vector->eitr = adapter->tx_eitr_param; - else if (q_vector->rx.ring) - /* rx or mixed */ - q_vector->eitr = adapter->rx_eitr_param; + if (q_vector->tx.ring && !q_vector->rx.ring) { + /* tx only vector */ + if (adapter->tx_itr_setting == 1) + q_vector->itr = IXGBE_10K_ITR; + else + q_vector->itr = adapter->tx_itr_setting; + } else { + /* rx or rx/tx vector */ + if (adapter->rx_itr_setting == 1) + q_vector->itr = IXGBE_20K_ITR; + else + q_vector->itr = adapter->rx_itr_setting; + } ixgbe_write_eitr(q_vector); } @@ -1519,7 +1526,6 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) case ixgbe_mac_X540: ixgbe_set_ivar(adapter, -1, 1, v_idx); break; - default: break; } @@ -1527,12 +1533,10 @@ static void ixgbe_configure_msix(struct ixgbe_adapter *adapter) /* set up to autoclear timer, and the vectors */ mask = IXGBE_EIMS_ENABLE_MASK; - if (adapter->num_vfs) - mask &= ~(IXGBE_EIMS_OTHER | - IXGBE_EIMS_MAILBOX | - IXGBE_EIMS_LSC); - else - mask &= ~(IXGBE_EIMS_OTHER | IXGBE_EIMS_LSC); + mask &= ~(IXGBE_EIMS_OTHER | + IXGBE_EIMS_MAILBOX | + IXGBE_EIMS_LSC); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIAC, mask); } @@ -1577,7 +1581,7 @@ static void ixgbe_update_itr(struct ixgbe_q_vector *q_vector, * 100-1249MB/s bulk (8000 ints/s) */ /* what was last interrupt timeslice? */ - timepassed_us = 1000000/q_vector->eitr; + timepassed_us = q_vector->itr >> 2; bytes_perint = bytes / timepassed_us; /* bytes/usec */ switch (itr_setting) { @@ -1618,7 +1622,7 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_hw *hw = &adapter->hw; int v_idx = q_vector->v_idx; - u32 itr_reg = EITR_INTS_PER_SEC_TO_REG(q_vector->eitr); + u32 itr_reg = q_vector->itr; switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: @@ -1627,15 +1631,6 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: - /* - * 82599 and X540 can support a value of zero, so allow it for - * max interrupt rate, but there is an errata where it can - * not be zero with RSC - */ - if (itr_reg == 8 && - !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) - itr_reg = 0; - /* * set the WDIS bit to not clear the timer bits and cause an * immediate assertion of the interrupt @@ -1650,7 +1645,7 @@ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector) static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) { - u32 new_itr = q_vector->eitr; + u32 new_itr = q_vector->itr; u8 current_itr; ixgbe_update_itr(q_vector, &q_vector->tx); @@ -1661,24 +1656,25 @@ static void ixgbe_set_itr(struct ixgbe_q_vector *q_vector) switch (current_itr) { /* counts and packets in update_itr are dependent on these numbers */ case lowest_latency: - new_itr = 100000; + new_itr = IXGBE_100K_ITR; break; case low_latency: - new_itr = 20000; /* aka hwitr = ~200 */ + new_itr = IXGBE_20K_ITR; break; case bulk_latency: - new_itr = 8000; + new_itr = IXGBE_8K_ITR; break; default: break; } - if (new_itr != q_vector->eitr) { + if (new_itr != q_vector->itr) { /* do an exponential smoothing */ - new_itr = ((q_vector->eitr * 9) + new_itr)/10; + new_itr = (10 * new_itr * q_vector->itr) / + ((9 * new_itr) + q_vector->itr); /* save the algorithm value here */ - q_vector->eitr = new_itr; + q_vector->itr = new_itr & IXGBE_MAX_EITR; ixgbe_write_eitr(q_vector); } @@ -2301,10 +2297,15 @@ static inline void ixgbe_irq_disable(struct ixgbe_adapter *adapter) **/ static void ixgbe_configure_msi_and_legacy(struct ixgbe_adapter *adapter) { - struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_q_vector *q_vector = adapter->q_vector[0]; - IXGBE_WRITE_REG(hw, IXGBE_EITR(0), - EITR_INTS_PER_SEC_TO_REG(adapter->rx_eitr_param)); + /* rx/tx vector */ + if (adapter->rx_itr_setting == 1) + q_vector->itr = IXGBE_20K_ITR; + else + q_vector->itr = adapter->rx_itr_setting; + + ixgbe_write_eitr(q_vector); ixgbe_set_ivar(adapter, 0, 0, 0); ixgbe_set_ivar(adapter, 1, 0, 0); @@ -4613,12 +4614,6 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter) if (!alloc_cpumask_var(&q_vector->affinity_mask, GFP_KERNEL)) goto err_out; cpumask_set_cpu(v_idx, q_vector->affinity_mask); - - if (q_vector->tx.count && !q_vector->rx.count) - q_vector->eitr = adapter->tx_eitr_param; - else - q_vector->eitr = adapter->rx_eitr_param; - netif_napi_add(adapter->netdev, &q_vector->napi, ixgbe_poll, 64); adapter->q_vector[v_idx] = q_vector; @@ -4864,9 +4859,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) /* enable itr by default in dynamic mode */ adapter->rx_itr_setting = 1; - adapter->rx_eitr_param = 20000; adapter->tx_itr_setting = 1; - adapter->tx_eitr_param = 10000; /* set defaults for eitr in MegaBytes */ adapter->eitr_low = 10; From 934c18cc5a2318f525a187e77a46d559d3b8cb44 Mon Sep 17 00:00:00 2001 From: Vasu Dev Date: Thu, 18 Aug 2011 06:20:07 +0000 Subject: [PATCH 4/8] ixgbe: disable LLI for FCoE Disable LLI for FCoE since regular interrupt and their moderation rate works slightly better for FCoE also. Signed-off-by: Vasu Dev Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c index cae766d28b03..323f4529992d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_fcoe.c @@ -661,9 +661,7 @@ void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter) IXGBE_ETQS_QUEUE_EN | (fcoe_q << IXGBE_ETQS_RX_QUEUE_SHIFT)); - IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL, - IXGBE_FCRXCTRL_FCOELLI | - IXGBE_FCRXCTRL_FCCRCBO | + IXGBE_WRITE_REG(hw, IXGBE_FCRXCTRL, IXGBE_FCRXCTRL_FCCRCBO | (FC_FCOE_VER << IXGBE_FCRXCTRL_FCOEVER_SHIFT)); return; From 9da712d2ede7e3e3a0da180351505310ee271773 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Tue, 23 Aug 2011 03:14:22 +0000 Subject: [PATCH 5/8] ixgbe: update {P}FC thresholds to account for X540 and loopback Revise high and low threshold marks wrt flow control to account for the X540 devices and latency introduced by the loopback switch. Without this it was in theory possible to drop frames on a supposedly lossless link with X540 or SR-IOV enabled. Previously we used a magic number in a define to calculate the threshold values. This made it difficult to sort out exactly which latencies were or were not being accounted for. Here I was overly explicit and tried to used #define names that would be recognizable after reading the IEEE 802.1Qbb specification. Signed-off-by: John Fastabend Tested-by: Ross Brattain Signed-off-by: Jeff Kirsher --- .../net/ethernet/intel/ixgbe/ixgbe_82598.c | 8 +- .../net/ethernet/intel/ixgbe/ixgbe_common.c | 12 +- drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h | 1 - .../ethernet/intel/ixgbe/ixgbe_dcb_82598.c | 9 +- .../ethernet/intel/ixgbe/ixgbe_dcb_82599.c | 8 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 128 +++++++++++++++++- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 62 ++++++++- 7 files changed, 190 insertions(+), 38 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c index b816a624a6ce..fa079bbab89a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c @@ -358,7 +358,6 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num) u32 fctrl_reg; u32 rmcs_reg; u32 reg; - u32 rx_pba_size; u32 link_speed = 0; bool link_up; @@ -461,16 +460,13 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw, s32 packetbuf_num) /* Set up and enable Rx high/low water mark thresholds, enable XON. */ if (hw->fc.current_mode & ixgbe_fc_tx_pause) { - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - - reg = (rx_pba_size - hw->fc.low_water) << 6; + reg = hw->fc.low_water << 6; if (hw->fc.send_xon) reg |= IXGBE_FCRTL_XONE; IXGBE_WRITE_REG(hw, IXGBE_FCRTL(packetbuf_num), reg); - reg = (rx_pba_size - hw->fc.high_water) << 6; + reg = hw->fc.high_water[packetbuf_num] << 6; reg |= IXGBE_FCRTH_FCEN; IXGBE_WRITE_REG(hw, IXGBE_FCRTH(packetbuf_num), reg); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 84ed9ef7288d..59cd54cfdc1f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1932,7 +1932,6 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw, s32 packetbuf_num) s32 ret_val = 0; u32 mflcn_reg, fccfg_reg; u32 reg; - u32 rx_pba_size; u32 fcrtl, fcrth; #ifdef CONFIG_DCB @@ -2012,11 +2011,8 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw, s32 packetbuf_num) IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg); IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg); - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(packetbuf_num)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - - fcrth = (rx_pba_size - hw->fc.high_water) << 10; - fcrtl = (rx_pba_size - hw->fc.low_water) << 10; + fcrth = hw->fc.high_water[packetbuf_num] << 10; + fcrtl = hw->fc.low_water << 10; if (hw->fc.current_mode & ixgbe_fc_tx_pause) { fcrth |= IXGBE_FCRTH_FCEN; @@ -2293,7 +2289,9 @@ static s32 ixgbe_setup_fc(struct ixgbe_hw *hw, s32 packetbuf_num) * Validate the water mark configuration. Zero water marks are invalid * because it causes the controller to just blast out fc packets. */ - if (!hw->fc.low_water || !hw->fc.high_water || !hw->fc.pause_time) { + if (!hw->fc.low_water || + !hw->fc.high_water[packetbuf_num] || + !hw->fc.pause_time) { hw_dbg(hw, "Invalid water mark configuration\n"); ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; goto out; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h index 0a68aa7f5d18..df095a9bbe2b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb.h @@ -36,7 +36,6 @@ #define IXGBE_MAX_PACKET_BUFFERS 8 #define MAX_USER_PRIORITY 8 -#define MAX_TRAFFIC_CLASS 8 #define MAX_BW_GROUP 8 #define BW_PERCENT 100 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c index 2288c3cac010..fcd0e479721f 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82598.c @@ -191,7 +191,7 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82598(struct ixgbe_hw *hw, */ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) { - u32 reg, rx_pba_size; + u32 reg; u8 i; if (pfc_en) { @@ -222,9 +222,8 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { int enabled = pfc_en & (1 << i); - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - reg = (rx_pba_size - hw->fc.low_water) << 10; + + reg = hw->fc.low_water << 10; if (enabled == pfc_enabled_tx || enabled == pfc_enabled_full) @@ -232,7 +231,7 @@ s32 ixgbe_dcb_config_pfc_82598(struct ixgbe_hw *hw, u8 pfc_en) IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), reg); - reg = (rx_pba_size - hw->fc.high_water) << 10; + reg = hw->fc.high_water[i] << 10; if (enabled == pfc_enabled_tx || enabled == pfc_enabled_full) reg |= IXGBE_FCRTH_FCEN; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c index d64fb872978e..02f6724bf48e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_dcb_82599.c @@ -210,21 +210,19 @@ s32 ixgbe_dcb_config_tx_data_arbiter_82599(struct ixgbe_hw *hw, */ s32 ixgbe_dcb_config_pfc_82599(struct ixgbe_hw *hw, u8 pfc_en) { - u32 i, reg, rx_pba_size; + u32 i, reg; /* Configure PFC Tx thresholds per TC */ for (i = 0; i < MAX_TRAFFIC_CLASS; i++) { int enabled = pfc_en & (1 << i); - rx_pba_size = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); - rx_pba_size >>= IXGBE_RXPBSIZE_SHIFT; - reg = (rx_pba_size - hw->fc.low_water) << 10; + reg = hw->fc.low_water << 10; if (enabled) reg |= IXGBE_FCRTL_XONE; IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), reg); - reg = (rx_pba_size - hw->fc.high_water) << 10; + reg = hw->fc.high_water[i] << 10; if (enabled) reg |= IXGBE_FCRTH_FCEN; IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), reg); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3594b09f4993..ba703d30f3a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3351,8 +3351,127 @@ static void ixgbe_configure_dcb(struct ixgbe_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_RQTC, reg); } } +#endif + +/* Additional bittime to account for IXGBE framing */ +#define IXGBE_ETH_FRAMING 20 + +/* + * ixgbe_hpbthresh - calculate high water mark for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int link, tc, kb, marker; + u32 dv_id, rx_pba; + + /* Calculate max LAN frame size */ + tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING; + +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if (dev->features & NETIF_F_FCOE_MTU) { + int fcoe_pb = 0; + +#ifdef CONFIG_IXGBE_DCB + fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); #endif + if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; + } +#endif + + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_DV_X540(link, tc); + break; + default: + dv_id = IXGBE_DV(link, tc); + break; + } + + /* Loopback switch introduces additional latency */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + dv_id += IXGBE_B2BT(tc); + + /* Delay value is calculated in bit times convert to KB */ + kb = IXGBE_BT2KB(dv_id); + rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10; + + marker = rx_pba - kb; + + /* It is possible that the packet buffer is not large enough + * to provide required headroom. In this case throw an error + * to user and a do the best we can. + */ + if (marker < 0) { + e_warn(drv, "Packet Buffer(%i) can not provide enough" + "headroom to support flow control." + "Decrease MTU or number of traffic classes\n", pb); + marker = tc + 1; + } + + return marker; +} + +/* + * ixgbe_lpbthresh - calculate low water mark for for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int tc; + u32 dv_id; + + /* Calculate max LAN frame size */ + tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; + + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_LOW_DV_X540(tc); + break; + default: + dv_id = IXGBE_LOW_DV(tc); + break; + } + + /* Delay value is calculated in bit times convert to KB */ + return IXGBE_BT2KB(dv_id); +} + +/* + * ixgbe_pbthresh_setup - calculate and setup high low water marks + */ +static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int num_tc = netdev_get_num_tc(adapter->netdev); + int i; + + if (!num_tc) + num_tc = 1; + + hw->fc.low_water = ixgbe_lpbthresh(adapter); + + for (i = 0; i < num_tc; i++) { + hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i); + + /* Low water marks must not be larger than high water marks */ + if (hw->fc.low_water > hw->fc.high_water[i]) + hw->fc.low_water = 0; + } +} static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) { @@ -3367,6 +3486,7 @@ static void ixgbe_configure_pb(struct ixgbe_adapter *adapter) hdrm = 0; hw->mac.ops.set_rxpba(hw, tc, hdrm, PBA_STRATEGY_EQUAL); + ixgbe_pbthresh_setup(adapter); } static void ixgbe_fdir_filter_restore(struct ixgbe_adapter *adapter) @@ -4769,13 +4889,11 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; struct pci_dev *pdev = adapter->pdev; - struct net_device *dev = adapter->netdev; unsigned int rss; #ifdef CONFIG_IXGBE_DCB int j; struct tc_configuration *tc; #endif - int max_frame = dev->mtu + ETH_HLEN + ETH_FCS_LEN; /* PCI config space info */ @@ -4851,8 +4969,7 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) #ifdef CONFIG_DCB adapter->last_lfc_mode = hw->fc.current_mode; #endif - hw->fc.high_water = FC_HIGH_WATER(max_frame); - hw->fc.low_water = FC_LOW_WATER(max_frame); + ixgbe_pbthresh_setup(adapter); hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE; hw->fc.send_xon = true; hw->fc.disable_fc_autoneg = false; @@ -5119,9 +5236,6 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu) /* must set new MTU before calling down or up */ netdev->mtu = new_mtu; - hw->fc.high_water = FC_HIGH_WATER(max_frame); - hw->fc.low_water = FC_LOW_WATER(max_frame); - if (netif_running(netdev)) ixgbe_reinit_locked(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 9a03341e5261..16dd461d4af3 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -404,6 +404,7 @@ #define IXGBE_WUPL_LENGTH_MASK 0xFFFF /* DCB registers */ +#define MAX_TRAFFIC_CLASS 8 #define IXGBE_RMCS 0x03D00 #define IXGBE_DPMCS 0x07F40 #define IXGBE_PDPMCS 0x0CD00 @@ -2323,13 +2324,60 @@ typedef u32 ixgbe_physical_layer; #define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 #define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 -/* Flow Control Macros */ -#define PAUSE_RTT 8 -#define PAUSE_MTU(MTU) ((MTU + 1024 - 1) / 1024) +/* Flow Control Data Sheet defined values + * Calculation and defines taken from 802.1bb Annex O + */ -#define FC_HIGH_WATER(MTU) ((((PAUSE_RTT + PAUSE_MTU(MTU)) * 144) + 99) / 100 +\ - PAUSE_MTU(MTU)) -#define FC_LOW_WATER(MTU) (2 * (2 * PAUSE_MTU(MTU) + PAUSE_RTT)) +/* BitTimes (BT) conversion */ +#define IXGBE_BT2KB(BT) ((BT + 1023) / (8 * 1024)) +#define IXGBE_B2BT(BT) (BT * 8) + +/* Calculate Delay to respond to PFC */ +#define IXGBE_PFC_D 672 + +/* Calculate Cable Delay */ +#define IXGBE_CABLE_DC 5556 /* Delay Copper */ +#define IXGBE_CABLE_DO 5000 /* Delay Optical */ + +/* Calculate Interface Delay X540 */ +#define IXGBE_PHY_DC 25600 /* Delay 10G BASET */ +#define IXGBE_MAC_DC 8192 /* Delay Copper XAUI interface */ +#define IXGBE_XAUI_DC (2 * 2048) /* Delay Copper Phy */ + +#define IXGBE_ID_X540 (IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC) + +/* Calculate Interface Delay 82598, 82599 */ +#define IXGBE_PHY_D 12800 +#define IXGBE_MAC_D 4096 +#define IXGBE_XAUI_D (2 * 1024) + +#define IXGBE_ID (IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D) + +/* Calculate Delay incurred from higher layer */ +#define IXGBE_HD 6144 + +/* Calculate PCI Bus delay for low thresholds */ +#define IXGBE_PCI_DELAY 10000 + +/* Calculate X540 delay value in bit times */ +#define IXGBE_FILL_RATE (36 / 25) + +#define IXGBE_DV_X540(LINK, TC) (IXGBE_FILL_RATE * \ + (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID_X540) + \ + IXGBE_HD + IXGBE_B2BT(TC))) + +/* Calculate 82599, 82598 delay value in bit times */ +#define IXGBE_DV(LINK, TC) (IXGBE_FILL_RATE * \ + (IXGBE_B2BT(LINK) + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + (2 * IXGBE_ID) + \ + IXGBE_HD + IXGBE_B2BT(TC))) + +/* Calculate low threshold delay values */ +#define IXGBE_LOW_DV_X540(TC) (2 * IXGBE_B2BT(TC) + \ + (IXGBE_FILL_RATE * IXGBE_PCI_DELAY)) +#define IXGBE_LOW_DV(TC) (2 * IXGBE_LOW_DV_X540(TC)) /* Software ATR hash keys */ #define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2 @@ -2548,7 +2596,7 @@ struct ixgbe_bus_info { /* Flow control parameters */ struct ixgbe_fc_info { - u32 high_water; /* Flow Control High-water */ + u32 high_water[MAX_TRAFFIC_CLASS]; /* Flow Control High-water */ u32 low_water; /* Flow Control Low-water */ u16 pause_time; /* Flow Control Pause timer */ bool send_xon; /* Flow control send XON */ From 4f51bf702395ab45aa68e6b702df2728cc7fe344 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Sat, 20 Aug 2011 04:49:45 +0000 Subject: [PATCH 6/8] ixgbe add thermal sensor support for x540 hardware Add code to enable thermal sensors for the x540 hardware, as well as a thermal interrupt check which will exit with a critical message of a thermal overheat is detected. Intent of code allows other mac types to be added with different configuration in the future. Fixed in this version is the addition of setting the temp_sensor capable flag which was previously only set for a specific mac. Signed-off-by: Jacob Keller Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 66 ++++++++++++++----- drivers/net/ethernet/intel/ixgbe/ixgbe_type.h | 2 + 2 files changed, 50 insertions(+), 18 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index ba703d30f3a9..79636eaeb74d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1751,6 +1751,39 @@ static void ixgbe_check_fan_failure(struct ixgbe_adapter *adapter, u32 eicr) } } +static void ixgbe_check_overtemp_event(struct ixgbe_adapter *adapter, u32 eicr) +{ + if (!(adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE)) + return; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + /* + * Need to check link state so complete overtemp check + * on service task + */ + if (((eicr & IXGBE_EICR_GPI_SDP0) || (eicr & IXGBE_EICR_LSC)) && + (!test_bit(__IXGBE_DOWN, &adapter->state))) { + adapter->interrupt_event = eicr; + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT; + ixgbe_service_event_schedule(adapter); + return; + } + return; + case ixgbe_mac_X540: + if (!(eicr & IXGBE_EICR_TS)) + return; + break; + default: + return; + } + + e_crit(drv, + "Network adapter has been stopped because it has over heated. " + "Restart the computer. If the problem persists, " + "power off the system and replace the adapter\n"); +} + static void ixgbe_check_sfp_event(struct ixgbe_adapter *adapter, u32 eicr) { struct ixgbe_hw *hw = &adapter->hw; @@ -1854,7 +1887,16 @@ static inline void ixgbe_irq_enable(struct ixgbe_adapter *adapter, bool queues, mask &= ~IXGBE_EIMS_LSC; if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) - mask |= IXGBE_EIMS_GPI_SDP0; + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + mask |= IXGBE_EIMS_GPI_SDP0; + break; + case ixgbe_mac_X540: + mask |= IXGBE_EIMS_TS; + break; + default: + break; + } if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) mask |= IXGBE_EIMS_GPI_SDP1; switch (adapter->hw.mac.type) { @@ -1924,14 +1966,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) } } ixgbe_check_sfp_event(adapter, eicr); - if ((adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) && - ((eicr & IXGBE_EICR_GPI_SDP0) || (eicr & IXGBE_EICR_LSC))) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - adapter->interrupt_event = eicr; - adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT; - ixgbe_service_event_schedule(adapter); - } - } + ixgbe_check_overtemp_event(adapter, eicr); break; default: break; @@ -2140,15 +2175,9 @@ static irqreturn_t ixgbe_intr(int irq, void *data) switch (hw->mac.type) { case ixgbe_mac_82599EB: + case ixgbe_mac_X540: ixgbe_check_sfp_event(adapter, eicr); - if ((adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) && - ((eicr & IXGBE_EICR_GPI_SDP0) || (eicr & IXGBE_EICR_LSC))) { - if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - adapter->interrupt_event = eicr; - adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_EVENT; - ixgbe_service_event_schedule(adapter); - } - } + ixgbe_check_overtemp_event(adapter, eicr); break; default: break; @@ -4913,8 +4942,9 @@ static int __devinit ixgbe_sw_init(struct ixgbe_adapter *adapter) adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE; adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82598; break; - case ixgbe_mac_82599EB: case ixgbe_mac_X540: + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; + case ixgbe_mac_82599EB: adapter->max_msix_q_vectors = MAX_MSIX_Q_VECTORS_82599; adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 16dd461d4af3..838847cd8c6a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -1280,6 +1280,7 @@ enum { #define IXGBE_EICR_LSC 0x00100000 /* Link Status Change */ #define IXGBE_EICR_LINKSEC 0x00200000 /* PN Threshold */ #define IXGBE_EICR_MNG 0x00400000 /* Manageability Event Interrupt */ +#define IXGBE_EICR_TS 0x00800000 /* Thermal Sensor Event */ #define IXGBE_EICR_GPI_SDP0 0x01000000 /* Gen Purpose Interrupt on SDP0 */ #define IXGBE_EICR_GPI_SDP1 0x02000000 /* Gen Purpose Interrupt on SDP1 */ #define IXGBE_EICR_GPI_SDP2 0x04000000 /* Gen Purpose Interrupt on SDP2 */ @@ -1314,6 +1315,7 @@ enum { #define IXGBE_EIMS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ #define IXGBE_EIMS_LSC IXGBE_EICR_LSC /* Link Status Change */ #define IXGBE_EIMS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ +#define IXGBE_EIMS_TS IXGBE_EICR_TS /* Thermel Sensor Event */ #define IXGBE_EIMS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ #define IXGBE_EIMS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ #define IXGBE_EIMS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ From f3df98ec9e8ed127456a601f99619c88e9d6017f Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Wed, 17 Aug 2011 10:15:21 +0000 Subject: [PATCH 7/8] ixgbe: cleanup ixgbe_setup_gpie() for X540 The X540 thermal sensor interrupt isn't a General Purpose Interrupt so doesn't need to be enabled in ixgbe_setup_gpie(). Likewise X540 doesn't use the SDP0 for thermal sensor so it doesn't need to be enabled for any device other than 82599. Signed-off-by: Don Skidmore Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 79636eaeb74d..3f5c5a4291a6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -3680,8 +3680,18 @@ static void ixgbe_setup_gpie(struct ixgbe_adapter *adapter) } /* Enable Thermal over heat sensor interrupt */ - if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) - gpie |= IXGBE_SDP0_GPIEN; + if (adapter->flags2 & IXGBE_FLAG2_TEMP_SENSOR_CAPABLE) { + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + gpie |= IXGBE_SDP0_GPIEN; + break; + case ixgbe_mac_X540: + gpie |= IXGBE_EIMS_TS; + break; + default: + break; + } + } /* Enable fan failure interrupt */ if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) From 0ccb974df5ac5f721491c1f07154450168b6fd0a Mon Sep 17 00:00:00 2001 From: Don Skidmore Date: Thu, 4 Aug 2011 02:07:48 +0000 Subject: [PATCH 8/8] ixgbe: add ECC warning for legacy interrupts Noticed that the legacy Interrupt handler didn't have the same ECC warning as did the MSI. So this patch adds it. Signed-off-by: Don Skidmore Tested-by: Phil Schmitt Signed-off-by: Jeff Kirsher --- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 3f5c5a4291a6..9cd44adcea14 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -2175,8 +2175,12 @@ static irqreturn_t ixgbe_intr(int irq, void *data) switch (hw->mac.type) { case ixgbe_mac_82599EB: - case ixgbe_mac_X540: ixgbe_check_sfp_event(adapter, eicr); + /* Fall through */ + case ixgbe_mac_X540: + if (eicr & IXGBE_EICR_ECC) + e_info(link, "Received unrecoverable ECC err, please " + "reboot\n"); ixgbe_check_overtemp_event(adapter, eicr); break; default: