linux/drivers/net/e1000e/netdev.c

5252 lines
142 KiB
C
Raw Normal View History

/*******************************************************************************
Intel PRO/1000 Linux driver
Copyright(c) 1999 - 2008 Intel Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms and conditions of the GNU General Public License,
version 2, as published by the Free Software Foundation.
This program is distributed in the hope it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
The full GNU General Public License is included in this distribution in
the file called "COPYING".
Contact Information:
Linux NICS <linux.nics@intel.com>
e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
*******************************************************************************/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/delay.h>
#include <linux/netdevice.h>
#include <linux/tcp.h>
#include <linux/ipv6.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
#include <linux/mii.h>
#include <linux/ethtool.h>
#include <linux/if_vlan.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/pm_qos_params.h>
#include "e1000.h"
#define DRV_VERSION "0.3.3.3-k6"
char e1000e_driver_name[] = "e1000e";
const char e1000e_driver_version[] = DRV_VERSION;
static const struct e1000_info *e1000_info_tbl[] = {
[board_82571] = &e1000_82571_info,
[board_82572] = &e1000_82572_info,
[board_82573] = &e1000_82573_info,
[board_82574] = &e1000_82574_info,
[board_80003es2lan] = &e1000_es2_info,
[board_ich8lan] = &e1000_ich8_info,
[board_ich9lan] = &e1000_ich9_info,
[board_ich10lan] = &e1000_ich10_info,
};
#ifdef DEBUG
/**
* e1000_get_hw_dev_name - return device name string
* used by hardware layer to print debugging information
**/
char *e1000e_get_hw_dev_name(struct e1000_hw *hw)
{
return hw->adapter->netdev->name;
}
#endif
/**
* e1000_desc_unused - calculate if we have unused descriptors
**/
static int e1000_desc_unused(struct e1000_ring *ring)
{
if (ring->next_to_clean > ring->next_to_use)
return ring->next_to_clean - ring->next_to_use - 1;
return ring->count + ring->next_to_clean - ring->next_to_use - 1;
}
/**
* e1000_receive_skb - helper function to handle Rx indications
* @adapter: board private structure
* @status: descriptor status field as written by hardware
* @vlan: descriptor vlan field as written by hardware (no le/be conversion)
* @skb: pointer to sk_buff to be indicated to stack
**/
static void e1000_receive_skb(struct e1000_adapter *adapter,
struct net_device *netdev,
struct sk_buff *skb,
u8 status, __le16 vlan)
{
skb->protocol = eth_type_trans(skb, netdev);
if (adapter->vlgrp && (status & E1000_RXD_STAT_VP))
vlan_hwaccel_receive_skb(skb, adapter->vlgrp,
le16_to_cpu(vlan));
else
netif_receive_skb(skb);
}
/**
* e1000_rx_checksum - Receive Checksum Offload for 82543
* @adapter: board private structure
* @status_err: receive descriptor status and error fields
* @csum: receive descriptor csum field
* @sk_buff: socket buffer with received data
**/
static void e1000_rx_checksum(struct e1000_adapter *adapter, u32 status_err,
u32 csum, struct sk_buff *skb)
{
u16 status = (u16)status_err;
u8 errors = (u8)(status_err >> 24);
skb->ip_summed = CHECKSUM_NONE;
/* Ignore Checksum bit is set */
if (status & E1000_RXD_STAT_IXSM)
return;
/* TCP/UDP checksum error bit is set */
if (errors & E1000_RXD_ERR_TCPE) {
/* let the stack verify checksum errors */
adapter->hw_csum_err++;
return;
}
/* TCP/UDP Checksum has not been calculated */
if (!(status & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)))
return;
/* It must be a TCP or UDP packet with a valid checksum */
if (status & E1000_RXD_STAT_TCPCS) {
/* TCP checksum is good */
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else {
/*
* IP fragment with UDP payload
* Hardware complements the payload checksum, so we undo it
* and then put the value in host order for further stack use.
*/
__sum16 sum = (__force __sum16)htons(csum);
skb->csum = csum_unfold(~sum);
skb->ip_summed = CHECKSUM_COMPLETE;
}
adapter->hw_csum_good++;
}
/**
* e1000_alloc_rx_buffers - Replace used receive buffers; legacy & extended
* @adapter: address of board private structure
**/
static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
int cleaned_count)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_rx_desc *rx_desc;
struct e1000_buffer *buffer_info;
struct sk_buff *skb;
unsigned int i;
unsigned int bufsz = adapter->rx_buffer_len + NET_IP_ALIGN;
i = rx_ring->next_to_use;
buffer_info = &rx_ring->buffer_info[i];
while (cleaned_count--) {
skb = buffer_info->skb;
if (skb) {
skb_trim(skb, 0);
goto map_skb;
}
skb = netdev_alloc_skb(netdev, bufsz);
if (!skb) {
/* Better luck next round */
adapter->alloc_rx_buff_failed++;
break;
}
/*
* Make buffer alignment 2 beyond a 16 byte boundary
* this will result in a 16 byte aligned IP header after
* the 14 byte MAC header is removed
*/
skb_reserve(skb, NET_IP_ALIGN);
buffer_info->skb = skb;
map_skb:
buffer_info->dma = pci_map_single(pdev, skb->data,
adapter->rx_buffer_len,
PCI_DMA_FROMDEVICE);
dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Muli Ben-Yehuda <muli@il.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Avi Kivity <avi@qumranet.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 19:44:49 -07:00
if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
dev_err(&pdev->dev, "RX DMA map failed\n");
adapter->rx_dma_failed++;
break;
}
rx_desc = E1000_RX_DESC(*rx_ring, i);
rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
i++;
if (i == rx_ring->count)
i = 0;
buffer_info = &rx_ring->buffer_info[i];
}
if (rx_ring->next_to_use != i) {
rx_ring->next_to_use = i;
if (i-- == 0)
i = (rx_ring->count - 1);
/*
* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
writel(i, adapter->hw.hw_addr + rx_ring->tail);
}
}
/**
* e1000_alloc_rx_buffers_ps - Replace used receive buffers; packet split
* @adapter: address of board private structure
**/
static void e1000_alloc_rx_buffers_ps(struct e1000_adapter *adapter,
int cleaned_count)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
union e1000_rx_desc_packet_split *rx_desc;
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_buffer *buffer_info;
struct e1000_ps_page *ps_page;
struct sk_buff *skb;
unsigned int i, j;
i = rx_ring->next_to_use;
buffer_info = &rx_ring->buffer_info[i];
while (cleaned_count--) {
rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
for (j = 0; j < PS_PAGE_BUFFERS; j++) {
ps_page = &buffer_info->ps_pages[j];
if (j >= adapter->rx_ps_pages) {
/* all unused desc entries get hw null ptr */
rx_desc->read.buffer_addr[j+1] = ~cpu_to_le64(0);
continue;
}
if (!ps_page->page) {
ps_page->page = alloc_page(GFP_ATOMIC);
if (!ps_page->page) {
adapter->alloc_rx_buff_failed++;
goto no_buffers;
}
ps_page->dma = pci_map_page(pdev,
ps_page->page,
0, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Muli Ben-Yehuda <muli@il.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Avi Kivity <avi@qumranet.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 19:44:49 -07:00
if (pci_dma_mapping_error(pdev, ps_page->dma)) {
dev_err(&adapter->pdev->dev,
"RX DMA page map failed\n");
adapter->rx_dma_failed++;
goto no_buffers;
}
}
/*
* Refresh the desc even if buffer_addrs
* didn't change because each write-back
* erases this info.
*/
rx_desc->read.buffer_addr[j+1] =
cpu_to_le64(ps_page->dma);
}
skb = netdev_alloc_skb(netdev,
adapter->rx_ps_bsize0 + NET_IP_ALIGN);
if (!skb) {
adapter->alloc_rx_buff_failed++;
break;
}
/*
* Make buffer alignment 2 beyond a 16 byte boundary
* this will result in a 16 byte aligned IP header after
* the 14 byte MAC header is removed
*/
skb_reserve(skb, NET_IP_ALIGN);
buffer_info->skb = skb;
buffer_info->dma = pci_map_single(pdev, skb->data,
adapter->rx_ps_bsize0,
PCI_DMA_FROMDEVICE);
dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Muli Ben-Yehuda <muli@il.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Avi Kivity <avi@qumranet.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 19:44:49 -07:00
if (pci_dma_mapping_error(pdev, buffer_info->dma)) {
dev_err(&pdev->dev, "RX DMA map failed\n");
adapter->rx_dma_failed++;
/* cleanup skb */
dev_kfree_skb_any(skb);
buffer_info->skb = NULL;
break;
}
rx_desc->read.buffer_addr[0] = cpu_to_le64(buffer_info->dma);
i++;
if (i == rx_ring->count)
i = 0;
buffer_info = &rx_ring->buffer_info[i];
}
no_buffers:
if (rx_ring->next_to_use != i) {
rx_ring->next_to_use = i;
if (!(i--))
i = (rx_ring->count - 1);
/*
* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
/*
* Hardware increments by 16 bytes, but packet split
* descriptors are 32 bytes...so we increment tail
* twice as much.
*/
writel(i<<1, adapter->hw.hw_addr + rx_ring->tail);
}
}
/**
* e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers
* @adapter: address of board private structure
* @rx_ring: pointer to receive ring structure
* @cleaned_count: number of buffers to allocate this pass
**/
static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
int cleaned_count)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
struct e1000_rx_desc *rx_desc;
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_buffer *buffer_info;
struct sk_buff *skb;
unsigned int i;
unsigned int bufsz = 256 -
16 /* for skb_reserve */ -
NET_IP_ALIGN;
i = rx_ring->next_to_use;
buffer_info = &rx_ring->buffer_info[i];
while (cleaned_count--) {
skb = buffer_info->skb;
if (skb) {
skb_trim(skb, 0);
goto check_page;
}
skb = netdev_alloc_skb(netdev, bufsz);
if (unlikely(!skb)) {
/* Better luck next round */
adapter->alloc_rx_buff_failed++;
break;
}
/* Make buffer alignment 2 beyond a 16 byte boundary
* this will result in a 16 byte aligned IP header after
* the 14 byte MAC header is removed
*/
skb_reserve(skb, NET_IP_ALIGN);
buffer_info->skb = skb;
check_page:
/* allocate a new page if necessary */
if (!buffer_info->page) {
buffer_info->page = alloc_page(GFP_ATOMIC);
if (unlikely(!buffer_info->page)) {
adapter->alloc_rx_buff_failed++;
break;
}
}
if (!buffer_info->dma)
buffer_info->dma = pci_map_page(pdev,
buffer_info->page, 0,
PAGE_SIZE,
PCI_DMA_FROMDEVICE);
rx_desc = E1000_RX_DESC(*rx_ring, i);
rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
if (unlikely(++i == rx_ring->count))
i = 0;
buffer_info = &rx_ring->buffer_info[i];
}
if (likely(rx_ring->next_to_use != i)) {
rx_ring->next_to_use = i;
if (unlikely(i-- == 0))
i = (rx_ring->count - 1);
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64). */
wmb();
writel(i, adapter->hw.hw_addr + rx_ring->tail);
}
}
/**
* e1000_clean_rx_irq - Send received data up the network stack; legacy
* @adapter: board private structure
*
* the return value indicates whether actual cleaning was done, there
* is no guarantee that everything was cleaned
**/
static bool e1000_clean_rx_irq(struct e1000_adapter *adapter,
int *work_done, int work_to_do)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_rx_desc *rx_desc, *next_rxd;
struct e1000_buffer *buffer_info, *next_buffer;
u32 length;
unsigned int i;
int cleaned_count = 0;
bool cleaned = 0;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
buffer_info = &rx_ring->buffer_info[i];
while (rx_desc->status & E1000_RXD_STAT_DD) {
struct sk_buff *skb;
u8 status;
if (*work_done >= work_to_do)
break;
(*work_done)++;
status = rx_desc->status;
skb = buffer_info->skb;
buffer_info->skb = NULL;
prefetch(skb->data - NET_IP_ALIGN);
i++;
if (i == rx_ring->count)
i = 0;
next_rxd = E1000_RX_DESC(*rx_ring, i);
prefetch(next_rxd);
next_buffer = &rx_ring->buffer_info[i];
cleaned = 1;
cleaned_count++;
pci_unmap_single(pdev,
buffer_info->dma,
adapter->rx_buffer_len,
PCI_DMA_FROMDEVICE);
buffer_info->dma = 0;
length = le16_to_cpu(rx_desc->length);
/* !EOP means multiple descriptors were used to store a single
* packet, also make sure the frame isn't just CRC only */
if (!(status & E1000_RXD_STAT_EOP) || (length <= 4)) {
/* All receives must fit into a single buffer */
e_dbg("%s: Receive packet consumed multiple buffers\n",
netdev->name);
/* recycle */
buffer_info->skb = skb;
goto next_desc;
}
if (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK) {
/* recycle */
buffer_info->skb = skb;
goto next_desc;
}
/* adjust length to remove Ethernet CRC */
if (!(adapter->flags2 & FLAG2_CRC_STRIPPING))
length -= 4;
total_rx_bytes += length;
total_rx_packets++;
/*
* code added for copybreak, this should improve
* performance for small packets with large amounts
* of reassembly being done in the stack
*/
if (length < copybreak) {
struct sk_buff *new_skb =
netdev_alloc_skb(netdev, length + NET_IP_ALIGN);
if (new_skb) {
skb_reserve(new_skb, NET_IP_ALIGN);
skb_copy_to_linear_data_offset(new_skb,
-NET_IP_ALIGN,
(skb->data -
NET_IP_ALIGN),
(length +
NET_IP_ALIGN));
/* save the skb in buffer_info as good */
buffer_info->skb = skb;
skb = new_skb;
}
/* else just continue with the old one */
}
/* end copybreak code */
skb_put(skb, length);
/* Receive Checksum Offload */
e1000_rx_checksum(adapter,
(u32)(status) |
((u32)(rx_desc->errors) << 24),
le16_to_cpu(rx_desc->csum), skb);
e1000_receive_skb(adapter, netdev, skb,status,rx_desc->special);
next_desc:
rx_desc->status = 0;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= E1000_RX_BUFFER_WRITE) {
adapter->alloc_rx_buf(adapter, cleaned_count);
cleaned_count = 0;
}
/* use prefetched values */
rx_desc = next_rxd;
buffer_info = next_buffer;
}
rx_ring->next_to_clean = i;
cleaned_count = e1000_desc_unused(rx_ring);
if (cleaned_count)
adapter->alloc_rx_buf(adapter, cleaned_count);
adapter->total_rx_bytes += total_rx_bytes;
adapter->total_rx_packets += total_rx_packets;
adapter->net_stats.rx_bytes += total_rx_bytes;
adapter->net_stats.rx_packets += total_rx_packets;
return cleaned;
}
static void e1000_put_txbuf(struct e1000_adapter *adapter,
struct e1000_buffer *buffer_info)
{
if (buffer_info->dma) {
pci_unmap_page(adapter->pdev, buffer_info->dma,
buffer_info->length, PCI_DMA_TODEVICE);
buffer_info->dma = 0;
}
if (buffer_info->skb) {
dev_kfree_skb_any(buffer_info->skb);
buffer_info->skb = NULL;
}
}
static void e1000_print_tx_hang(struct e1000_adapter *adapter)
{
struct e1000_ring *tx_ring = adapter->tx_ring;
unsigned int i = tx_ring->next_to_clean;
unsigned int eop = tx_ring->buffer_info[i].next_to_watch;
struct e1000_tx_desc *eop_desc = E1000_TX_DESC(*tx_ring, eop);
/* detected Tx unit hang */
e_err("Detected Tx Unit Hang:\n"
" TDH <%x>\n"
" TDT <%x>\n"
" next_to_use <%x>\n"
" next_to_clean <%x>\n"
"buffer_info[next_to_clean]:\n"
" time_stamp <%lx>\n"
" next_to_watch <%x>\n"
" jiffies <%lx>\n"
" next_to_watch.status <%x>\n",
readl(adapter->hw.hw_addr + tx_ring->head),
readl(adapter->hw.hw_addr + tx_ring->tail),
tx_ring->next_to_use,
tx_ring->next_to_clean,
tx_ring->buffer_info[eop].time_stamp,
eop,
jiffies,
eop_desc->upper.fields.status);
}
/**
* e1000_clean_tx_irq - Reclaim resources after transmit completes
* @adapter: board private structure
*
* the return value indicates whether actual cleaning was done, there
* is no guarantee that everything was cleaned
**/
static bool e1000_clean_tx_irq(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
struct e1000_hw *hw = &adapter->hw;
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_tx_desc *tx_desc, *eop_desc;
struct e1000_buffer *buffer_info;
unsigned int i, eop;
unsigned int count = 0;
bool cleaned = 0;
unsigned int total_tx_bytes = 0, total_tx_packets = 0;
i = tx_ring->next_to_clean;
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
while (eop_desc->upper.data & cpu_to_le32(E1000_TXD_STAT_DD)) {
for (cleaned = 0; !cleaned; ) {
tx_desc = E1000_TX_DESC(*tx_ring, i);
buffer_info = &tx_ring->buffer_info[i];
cleaned = (i == eop);
if (cleaned) {
struct sk_buff *skb = buffer_info->skb;
unsigned int segs, bytecount;
segs = skb_shinfo(skb)->gso_segs ?: 1;
/* multiply data chunks by size of headers */
bytecount = ((segs - 1) * skb_headlen(skb)) +
skb->len;
total_tx_packets += segs;
total_tx_bytes += bytecount;
}
e1000_put_txbuf(adapter, buffer_info);
tx_desc->upper.data = 0;
i++;
if (i == tx_ring->count)
i = 0;
}
eop = tx_ring->buffer_info[i].next_to_watch;
eop_desc = E1000_TX_DESC(*tx_ring, eop);
#define E1000_TX_WEIGHT 64
/* weight of a sort for tx, to avoid endless transmit cleanup */
if (count++ == E1000_TX_WEIGHT)
break;
}
tx_ring->next_to_clean = i;
#define TX_WAKE_THRESHOLD 32
if (cleaned && netif_carrier_ok(netdev) &&
e1000_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD) {
/* Make sure that anybody stopping the queue after this
* sees the new next_to_clean.
*/
smp_mb();
if (netif_queue_stopped(netdev) &&
!(test_bit(__E1000_DOWN, &adapter->state))) {
netif_wake_queue(netdev);
++adapter->restart_queue;
}
}
if (adapter->detect_tx_hung) {
/*
* Detect a transmit hang in hardware, this serializes the
* check with the clearing of time_stamp and movement of i
*/
adapter->detect_tx_hung = 0;
if (tx_ring->buffer_info[eop].dma &&
time_after(jiffies, tx_ring->buffer_info[eop].time_stamp
+ (adapter->tx_timeout_factor * HZ))
&& !(er32(STATUS) & E1000_STATUS_TXOFF)) {
e1000_print_tx_hang(adapter);
netif_stop_queue(netdev);
}
}
adapter->total_tx_bytes += total_tx_bytes;
adapter->total_tx_packets += total_tx_packets;
adapter->net_stats.tx_bytes += total_tx_bytes;
adapter->net_stats.tx_packets += total_tx_packets;
return cleaned;
}
/**
* e1000_clean_rx_irq_ps - Send received data up the network stack; packet split
* @adapter: board private structure
*
* the return value indicates whether actual cleaning was done, there
* is no guarantee that everything was cleaned
**/
static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
int *work_done, int work_to_do)
{
union e1000_rx_desc_packet_split *rx_desc, *next_rxd;
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_buffer *buffer_info, *next_buffer;
struct e1000_ps_page *ps_page;
struct sk_buff *skb;
unsigned int i, j;
u32 length, staterr;
int cleaned_count = 0;
bool cleaned = 0;
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC_PS(*rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
buffer_info = &rx_ring->buffer_info[i];
while (staterr & E1000_RXD_STAT_DD) {
if (*work_done >= work_to_do)
break;
(*work_done)++;
skb = buffer_info->skb;
/* in the packet split case this is header only */
prefetch(skb->data - NET_IP_ALIGN);
i++;
if (i == rx_ring->count)
i = 0;
next_rxd = E1000_RX_DESC_PS(*rx_ring, i);
prefetch(next_rxd);
next_buffer = &rx_ring->buffer_info[i];
cleaned = 1;
cleaned_count++;
pci_unmap_single(pdev, buffer_info->dma,
adapter->rx_ps_bsize0,
PCI_DMA_FROMDEVICE);
buffer_info->dma = 0;
if (!(staterr & E1000_RXD_STAT_EOP)) {
e_dbg("%s: Packet Split buffers didn't pick up the "
"full packet\n", netdev->name);
dev_kfree_skb_irq(skb);
goto next_desc;
}
if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
dev_kfree_skb_irq(skb);
goto next_desc;
}
length = le16_to_cpu(rx_desc->wb.middle.length0);
if (!length) {
e_dbg("%s: Last part of the packet spanning multiple "
"descriptors\n", netdev->name);
dev_kfree_skb_irq(skb);
goto next_desc;
}
/* Good Receive */
skb_put(skb, length);
{
/*
* this looks ugly, but it seems compiler issues make it
* more efficient than reusing j
*/
int l1 = le16_to_cpu(rx_desc->wb.upper.length[0]);
/*
* page alloc/put takes too long and effects small packet
* throughput, so unsplit small packets and save the alloc/put
* only valid in softirq (napi) context to call kmap_*
*/
if (l1 && (l1 <= copybreak) &&
((length + l1) <= adapter->rx_ps_bsize0)) {
u8 *vaddr;
ps_page = &buffer_info->ps_pages[0];
/*
* there is no documentation about how to call
* kmap_atomic, so we can't hold the mapping
* very long
*/
pci_dma_sync_single_for_cpu(pdev, ps_page->dma,
PAGE_SIZE, PCI_DMA_FROMDEVICE);
vaddr = kmap_atomic(ps_page->page, KM_SKB_DATA_SOFTIRQ);
memcpy(skb_tail_pointer(skb), vaddr, l1);
kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
pci_dma_sync_single_for_device(pdev, ps_page->dma,
PAGE_SIZE, PCI_DMA_FROMDEVICE);
/* remove the CRC */
if (!(adapter->flags2 & FLAG2_CRC_STRIPPING))
l1 -= 4;
skb_put(skb, l1);
goto copydone;
} /* if */
}
for (j = 0; j < PS_PAGE_BUFFERS; j++) {
length = le16_to_cpu(rx_desc->wb.upper.length[j]);
if (!length)
break;
ps_page = &buffer_info->ps_pages[j];
pci_unmap_page(pdev, ps_page->dma, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
ps_page->dma = 0;
skb_fill_page_desc(skb, j, ps_page->page, 0, length);
ps_page->page = NULL;
skb->len += length;
skb->data_len += length;
skb->truesize += length;
}
/* strip the ethernet crc, problem is we're using pages now so
* this whole operation can get a little cpu intensive
*/
if (!(adapter->flags2 & FLAG2_CRC_STRIPPING))
pskb_trim(skb, skb->len - 4);
copydone:
total_rx_bytes += skb->len;
total_rx_packets++;
e1000_rx_checksum(adapter, staterr, le16_to_cpu(
rx_desc->wb.lower.hi_dword.csum_ip.csum), skb);
if (rx_desc->wb.upper.header_status &
cpu_to_le16(E1000_RXDPS_HDRSTAT_HDRSP))
adapter->rx_hdr_split++;
e1000_receive_skb(adapter, netdev, skb,
staterr, rx_desc->wb.middle.vlan);
next_desc:
rx_desc->wb.middle.status_error &= cpu_to_le32(~0xFF);
buffer_info->skb = NULL;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= E1000_RX_BUFFER_WRITE) {
adapter->alloc_rx_buf(adapter, cleaned_count);
cleaned_count = 0;
}
/* use prefetched values */
rx_desc = next_rxd;
buffer_info = next_buffer;
staterr = le32_to_cpu(rx_desc->wb.middle.status_error);
}
rx_ring->next_to_clean = i;
cleaned_count = e1000_desc_unused(rx_ring);
if (cleaned_count)
adapter->alloc_rx_buf(adapter, cleaned_count);
adapter->total_rx_bytes += total_rx_bytes;
adapter->total_rx_packets += total_rx_packets;
adapter->net_stats.rx_bytes += total_rx_bytes;
adapter->net_stats.rx_packets += total_rx_packets;
return cleaned;
}
/**
* e1000_consume_page - helper function
**/
static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb,
u16 length)
{
bi->page = NULL;
skb->len += length;
skb->data_len += length;
skb->truesize += length;
}
/**
* e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy
* @adapter: board private structure
*
* the return value indicates whether actual cleaning was done, there
* is no guarantee that everything was cleaned
**/
static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
int *work_done, int work_to_do)
{
struct net_device *netdev = adapter->netdev;
struct pci_dev *pdev = adapter->pdev;
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_rx_desc *rx_desc, *next_rxd;
struct e1000_buffer *buffer_info, *next_buffer;
u32 length;
unsigned int i;
int cleaned_count = 0;
bool cleaned = false;
unsigned int total_rx_bytes=0, total_rx_packets=0;
i = rx_ring->next_to_clean;
rx_desc = E1000_RX_DESC(*rx_ring, i);
buffer_info = &rx_ring->buffer_info[i];
while (rx_desc->status & E1000_RXD_STAT_DD) {
struct sk_buff *skb;
u8 status;
if (*work_done >= work_to_do)
break;
(*work_done)++;
status = rx_desc->status;
skb = buffer_info->skb;
buffer_info->skb = NULL;
++i;
if (i == rx_ring->count)
i = 0;
next_rxd = E1000_RX_DESC(*rx_ring, i);
prefetch(next_rxd);
next_buffer = &rx_ring->buffer_info[i];
cleaned = true;
cleaned_count++;
pci_unmap_page(pdev, buffer_info->dma, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
buffer_info->dma = 0;
length = le16_to_cpu(rx_desc->length);
/* errors is only valid for DD + EOP descriptors */
if (unlikely((status & E1000_RXD_STAT_EOP) &&
(rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) {
/* recycle both page and skb */
buffer_info->skb = skb;
/* an error means any chain goes out the window
* too */
if (rx_ring->rx_skb_top)
dev_kfree_skb(rx_ring->rx_skb_top);
rx_ring->rx_skb_top = NULL;
goto next_desc;
}
#define rxtop rx_ring->rx_skb_top
if (!(status & E1000_RXD_STAT_EOP)) {
/* this descriptor is only the beginning (or middle) */
if (!rxtop) {
/* this is the beginning of a chain */
rxtop = skb;
skb_fill_page_desc(rxtop, 0, buffer_info->page,
0, length);
} else {
/* this is the middle of a chain */
skb_fill_page_desc(rxtop,
skb_shinfo(rxtop)->nr_frags,
buffer_info->page, 0, length);
/* re-use the skb, only consumed the page */
buffer_info->skb = skb;
}
e1000_consume_page(buffer_info, rxtop, length);
goto next_desc;
} else {
if (rxtop) {
/* end of the chain */
skb_fill_page_desc(rxtop,
skb_shinfo(rxtop)->nr_frags,
buffer_info->page, 0, length);
/* re-use the current skb, we only consumed the
* page */
buffer_info->skb = skb;
skb = rxtop;
rxtop = NULL;
e1000_consume_page(buffer_info, skb, length);
} else {
/* no chain, got EOP, this buf is the packet
* copybreak to save the put_page/alloc_page */
if (length <= copybreak &&
skb_tailroom(skb) >= length) {
u8 *vaddr;
vaddr = kmap_atomic(buffer_info->page,
KM_SKB_DATA_SOFTIRQ);
memcpy(skb_tail_pointer(skb), vaddr,
length);
kunmap_atomic(vaddr,
KM_SKB_DATA_SOFTIRQ);
/* re-use the page, so don't erase
* buffer_info->page */
skb_put(skb, length);
} else {
skb_fill_page_desc(skb, 0,
buffer_info->page, 0,
length);
e1000_consume_page(buffer_info, skb,
length);
}
}
}
/* Receive Checksum Offload XXX recompute due to CRC strip? */
e1000_rx_checksum(adapter,
(u32)(status) |
((u32)(rx_desc->errors) << 24),
le16_to_cpu(rx_desc->csum), skb);
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
total_rx_packets++;
/* eth type trans needs skb->data to point to something */
if (!pskb_may_pull(skb, ETH_HLEN)) {
e_err("pskb_may_pull failed.\n");
dev_kfree_skb(skb);
goto next_desc;
}
e1000_receive_skb(adapter, netdev, skb, status,
rx_desc->special);
next_desc:
rx_desc->status = 0;
/* return some buffers to hardware, one at a time is too slow */
if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
adapter->alloc_rx_buf(adapter, cleaned_count);
cleaned_count = 0;
}
/* use prefetched values */
rx_desc = next_rxd;
buffer_info = next_buffer;
}
rx_ring->next_to_clean = i;
cleaned_count = e1000_desc_unused(rx_ring);
if (cleaned_count)
adapter->alloc_rx_buf(adapter, cleaned_count);
adapter->total_rx_bytes += total_rx_bytes;
adapter->total_rx_packets += total_rx_packets;
adapter->net_stats.rx_bytes += total_rx_bytes;
adapter->net_stats.rx_packets += total_rx_packets;
return cleaned;
}
/**
* e1000_clean_rx_ring - Free Rx Buffers per Queue
* @adapter: board private structure
**/
static void e1000_clean_rx_ring(struct e1000_adapter *adapter)
{
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_buffer *buffer_info;
struct e1000_ps_page *ps_page;
struct pci_dev *pdev = adapter->pdev;
unsigned int i, j;
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
buffer_info = &rx_ring->buffer_info[i];
if (buffer_info->dma) {
if (adapter->clean_rx == e1000_clean_rx_irq)
pci_unmap_single(pdev, buffer_info->dma,
adapter->rx_buffer_len,
PCI_DMA_FROMDEVICE);
else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq)
pci_unmap_page(pdev, buffer_info->dma,
PAGE_SIZE,
PCI_DMA_FROMDEVICE);
else if (adapter->clean_rx == e1000_clean_rx_irq_ps)
pci_unmap_single(pdev, buffer_info->dma,
adapter->rx_ps_bsize0,
PCI_DMA_FROMDEVICE);
buffer_info->dma = 0;
}
if (buffer_info->page) {
put_page(buffer_info->page);
buffer_info->page = NULL;
}
if (buffer_info->skb) {
dev_kfree_skb(buffer_info->skb);
buffer_info->skb = NULL;
}
for (j = 0; j < PS_PAGE_BUFFERS; j++) {
ps_page = &buffer_info->ps_pages[j];
if (!ps_page->page)
break;
pci_unmap_page(pdev, ps_page->dma, PAGE_SIZE,
PCI_DMA_FROMDEVICE);
ps_page->dma = 0;
put_page(ps_page->page);
ps_page->page = NULL;
}
}
/* there also may be some cached data from a chained receive */
if (rx_ring->rx_skb_top) {
dev_kfree_skb(rx_ring->rx_skb_top);
rx_ring->rx_skb_top = NULL;
}
/* Zero out the descriptor ring */
memset(rx_ring->desc, 0, rx_ring->size);
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
writel(0, adapter->hw.hw_addr + rx_ring->head);
writel(0, adapter->hw.hw_addr + rx_ring->tail);
}
static void e1000e_downshift_workaround(struct work_struct *work)
{
struct e1000_adapter *adapter = container_of(work,
struct e1000_adapter, downshift_task);
e1000e_gig_downshift_workaround_ich8lan(&adapter->hw);
}
/**
* e1000_intr_msi - Interrupt Handler
* @irq: interrupt number
* @data: pointer to a network interface device structure
**/
static irqreturn_t e1000_intr_msi(int irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 icr = er32(ICR);
/*
* read ICR disables interrupts using IAM
*/
if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
hw->mac.get_link_status = 1;
/*
* ICH8 workaround-- Call gig speed drop workaround on cable
* disconnect (LSC) before accessing any PHY registers
*/
if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) &&
(!(er32(STATUS) & E1000_STATUS_LU)))
schedule_work(&adapter->downshift_task);
/*
* 80003ES2LAN workaround-- For packet buffer work-around on
* link down event; disable receives here in the ISR and reset
* adapter in watchdog
*/
if (netif_carrier_ok(netdev) &&
adapter->flags & FLAG_RX_NEEDS_RESTART) {
/* disable receives */
u32 rctl = er32(RCTL);
ew32(RCTL, rctl & ~E1000_RCTL_EN);
adapter->flags |= FLAG_RX_RESTART_NOW;
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
adapter->total_tx_bytes = 0;
adapter->total_tx_packets = 0;
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__netif_rx_schedule(netdev, &adapter->napi);
}
return IRQ_HANDLED;
}
/**
* e1000_intr - Interrupt Handler
* @irq: interrupt number
* @data: pointer to a network interface device structure
**/
static irqreturn_t e1000_intr(int irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 rctl, icr = er32(ICR);
if (!icr)
return IRQ_NONE; /* Not our interrupt */
/*
* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
* not set, then the adapter didn't send an interrupt
*/
if (!(icr & E1000_ICR_INT_ASSERTED))
return IRQ_NONE;
/*
* Interrupt Auto-Mask...upon reading ICR,
* interrupts are masked. No need for the
* IMC write
*/
if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
hw->mac.get_link_status = 1;
/*
* ICH8 workaround-- Call gig speed drop workaround on cable
* disconnect (LSC) before accessing any PHY registers
*/
if ((adapter->flags & FLAG_LSC_GIG_SPEED_DROP) &&
(!(er32(STATUS) & E1000_STATUS_LU)))
schedule_work(&adapter->downshift_task);
/*
* 80003ES2LAN workaround--
* For packet buffer work-around on link down event;
* disable receives here in the ISR and
* reset adapter in watchdog
*/
if (netif_carrier_ok(netdev) &&
(adapter->flags & FLAG_RX_NEEDS_RESTART)) {
/* disable receives */
rctl = er32(RCTL);
ew32(RCTL, rctl & ~E1000_RCTL_EN);
adapter->flags |= FLAG_RX_RESTART_NOW;
}
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
adapter->total_tx_bytes = 0;
adapter->total_tx_packets = 0;
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__netif_rx_schedule(netdev, &adapter->napi);
}
return IRQ_HANDLED;
}
static irqreturn_t e1000_msix_other(int irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 icr = er32(ICR);
if (!(icr & E1000_ICR_INT_ASSERTED)) {
ew32(IMS, E1000_IMS_OTHER);
return IRQ_NONE;
}
if (icr & adapter->eiac_mask)
ew32(ICS, (icr & adapter->eiac_mask));
if (icr & E1000_ICR_OTHER) {
if (!(icr & E1000_ICR_LSC))
goto no_link_interrupt;
hw->mac.get_link_status = 1;
/* guard against interrupt when we're going down */
if (!test_bit(__E1000_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
no_link_interrupt:
ew32(IMS, E1000_IMS_LSC | E1000_IMS_OTHER);
return IRQ_HANDLED;
}
static irqreturn_t e1000_intr_msix_tx(int irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
struct e1000_ring *tx_ring = adapter->tx_ring;
adapter->total_tx_bytes = 0;
adapter->total_tx_packets = 0;
if (!e1000_clean_tx_irq(adapter))
/* Ring was not completely cleaned, so fire another interrupt */
ew32(ICS, tx_ring->ims_val);
return IRQ_HANDLED;
}
static irqreturn_t e1000_intr_msix_rx(int irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
/* Write the ITR value calculated at the end of the
* previous interrupt.
*/
if (adapter->rx_ring->set_itr) {
writel(1000000000 / (adapter->rx_ring->itr_val * 256),
adapter->hw.hw_addr + adapter->rx_ring->itr_register);
adapter->rx_ring->set_itr = 0;
}
if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
adapter->total_rx_bytes = 0;
adapter->total_rx_packets = 0;
__netif_rx_schedule(netdev, &adapter->napi);
}
return IRQ_HANDLED;
}
/**
* e1000_configure_msix - Configure MSI-X hardware
*
* e1000_configure_msix sets up the hardware to properly
* generate MSI-X interrupts.
**/
static void e1000_configure_msix(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_ring *tx_ring = adapter->tx_ring;
int vector = 0;
u32 ctrl_ext, ivar = 0;
adapter->eiac_mask = 0;
/* Workaround issue with spurious interrupts on 82574 in MSI-X mode */
if (hw->mac.type == e1000_82574) {
u32 rfctl = er32(RFCTL);
rfctl |= E1000_RFCTL_ACK_DIS;
ew32(RFCTL, rfctl);
}
#define E1000_IVAR_INT_ALLOC_VALID 0x8
/* Configure Rx vector */
rx_ring->ims_val = E1000_IMS_RXQ0;
adapter->eiac_mask |= rx_ring->ims_val;
if (rx_ring->itr_val)
writel(1000000000 / (rx_ring->itr_val * 256),
hw->hw_addr + rx_ring->itr_register);
else
writel(1, hw->hw_addr + rx_ring->itr_register);
ivar = E1000_IVAR_INT_ALLOC_VALID | vector;
/* Configure Tx vector */
tx_ring->ims_val = E1000_IMS_TXQ0;
vector++;
if (tx_ring->itr_val)
writel(1000000000 / (tx_ring->itr_val * 256),
hw->hw_addr + tx_ring->itr_register);
else
writel(1, hw->hw_addr + tx_ring->itr_register);
adapter->eiac_mask |= tx_ring->ims_val;
ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 8);
/* set vector for Other Causes, e.g. link changes */
vector++;
ivar |= ((E1000_IVAR_INT_ALLOC_VALID | vector) << 16);
if (rx_ring->itr_val)
writel(1000000000 / (rx_ring->itr_val * 256),
hw->hw_addr + E1000_EITR_82574(vector));
else
writel(1, hw->hw_addr + E1000_EITR_82574(vector));
/* Cause Tx interrupts on every write back */
ivar |= (1 << 31);
ew32(IVAR, ivar);
/* enable MSI-X PBA support */
ctrl_ext = er32(CTRL_EXT);
ctrl_ext |= E1000_CTRL_EXT_PBA_CLR;
/* Auto-Mask Other interrupts upon ICR read */
#define E1000_EIAC_MASK_82574 0x01F00000
ew32(IAM, ~E1000_EIAC_MASK_82574 | E1000_IMS_OTHER);
ctrl_ext |= E1000_CTRL_EXT_EIAME;
ew32(CTRL_EXT, ctrl_ext);
e1e_flush();
}
void e1000e_reset_interrupt_capability(struct e1000_adapter *adapter)
{
if (adapter->msix_entries) {
pci_disable_msix(adapter->pdev);
kfree(adapter->msix_entries);
adapter->msix_entries = NULL;
} else if (adapter->flags & FLAG_MSI_ENABLED) {
pci_disable_msi(adapter->pdev);
adapter->flags &= ~FLAG_MSI_ENABLED;
}
return;
}
/**
* e1000e_set_interrupt_capability - set MSI or MSI-X if supported
*
* Attempt to configure interrupts using the best available
* capabilities of the hardware and kernel.
**/
void e1000e_set_interrupt_capability(struct e1000_adapter *adapter)
{
int err;
int numvecs, i;
switch (adapter->int_mode) {
case E1000E_INT_MODE_MSIX:
if (adapter->flags & FLAG_HAS_MSIX) {
numvecs = 3; /* RxQ0, TxQ0 and other */
adapter->msix_entries = kcalloc(numvecs,
sizeof(struct msix_entry),
GFP_KERNEL);
if (adapter->msix_entries) {
for (i = 0; i < numvecs; i++)
adapter->msix_entries[i].entry = i;
err = pci_enable_msix(adapter->pdev,
adapter->msix_entries,
numvecs);
if (err == 0)
return;
}
/* MSI-X failed, so fall through and try MSI */
e_err("Failed to initialize MSI-X interrupts. "
"Falling back to MSI interrupts.\n");
e1000e_reset_interrupt_capability(adapter);
}
adapter->int_mode = E1000E_INT_MODE_MSI;
/* Fall through */
case E1000E_INT_MODE_MSI:
if (!pci_enable_msi(adapter->pdev)) {
adapter->flags |= FLAG_MSI_ENABLED;
} else {
adapter->int_mode = E1000E_INT_MODE_LEGACY;
e_err("Failed to initialize MSI interrupts. Falling "
"back to legacy interrupts.\n");
}
/* Fall through */
case E1000E_INT_MODE_LEGACY:
/* Don't do anything; this is the system default */
break;
}
return;
}
/**
* e1000_request_msix - Initialize MSI-X interrupts
*
* e1000_request_msix allocates MSI-X vectors and requests interrupts from the
* kernel.
**/
static int e1000_request_msix(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
int err = 0, vector = 0;
if (strlen(netdev->name) < (IFNAMSIZ - 5))
sprintf(adapter->rx_ring->name, "%s-rx0", netdev->name);
else
memcpy(adapter->rx_ring->name, netdev->name, IFNAMSIZ);
err = request_irq(adapter->msix_entries[vector].vector,
&e1000_intr_msix_rx, 0, adapter->rx_ring->name,
netdev);
if (err)
goto out;
adapter->rx_ring->itr_register = E1000_EITR_82574(vector);
adapter->rx_ring->itr_val = adapter->itr;
vector++;
if (strlen(netdev->name) < (IFNAMSIZ - 5))
sprintf(adapter->tx_ring->name, "%s-tx0", netdev->name);
else
memcpy(adapter->tx_ring->name, netdev->name, IFNAMSIZ);
err = request_irq(adapter->msix_entries[vector].vector,
&e1000_intr_msix_tx, 0, adapter->tx_ring->name,
netdev);
if (err)
goto out;
adapter->tx_ring->itr_register = E1000_EITR_82574(vector);
adapter->tx_ring->itr_val = adapter->itr;
vector++;
err = request_irq(adapter->msix_entries[vector].vector,
&e1000_msix_other, 0, netdev->name, netdev);
if (err)
goto out;
e1000_configure_msix(adapter);
return 0;
out:
return err;
}
/**
* e1000_request_irq - initialize interrupts
*
* Attempts to configure interrupts using the best available
* capabilities of the hardware and kernel.
**/
static int e1000_request_irq(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
int err;
if (adapter->msix_entries) {
err = e1000_request_msix(adapter);
if (!err)
return err;
/* fall back to MSI */
e1000e_reset_interrupt_capability(adapter);
adapter->int_mode = E1000E_INT_MODE_MSI;
e1000e_set_interrupt_capability(adapter);
}
if (adapter->flags & FLAG_MSI_ENABLED) {
err = request_irq(adapter->pdev->irq, &e1000_intr_msi, 0,
netdev->name, netdev);
if (!err)
return err;
/* fall back to legacy interrupt */
e1000e_reset_interrupt_capability(adapter);
adapter->int_mode = E1000E_INT_MODE_LEGACY;
}
err = request_irq(adapter->pdev->irq, &e1000_intr, IRQF_SHARED,
netdev->name, netdev);
if (err)
e_err("Unable to allocate interrupt, Error: %d\n", err);
return err;
}
static void e1000_free_irq(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
if (adapter->msix_entries) {
int vector = 0;
free_irq(adapter->msix_entries[vector].vector, netdev);
vector++;
free_irq(adapter->msix_entries[vector].vector, netdev);
vector++;
/* Other Causes interrupt vector */
free_irq(adapter->msix_entries[vector].vector, netdev);
return;
}
free_irq(adapter->pdev->irq, netdev);
}
/**
* e1000_irq_disable - Mask off interrupt generation on the NIC
**/
static void e1000_irq_disable(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
ew32(IMC, ~0);
if (adapter->msix_entries)
ew32(EIAC_82574, 0);
e1e_flush();
synchronize_irq(adapter->pdev->irq);
}
/**
* e1000_irq_enable - Enable default interrupt generation settings
**/
static void e1000_irq_enable(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
if (adapter->msix_entries) {
ew32(EIAC_82574, adapter->eiac_mask & E1000_EIAC_MASK_82574);
ew32(IMS, adapter->eiac_mask | E1000_IMS_OTHER | E1000_IMS_LSC);
} else {
ew32(IMS, IMS_ENABLE_MASK);
}
e1e_flush();
}
/**
* e1000_get_hw_control - get control of the h/w from f/w
* @adapter: address of board private structure
*
* e1000_get_hw_control sets {CTRL_EXT|SWSM}:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that
* the driver is loaded. For AMT version (only with 82573)
* of the f/w this means that the network i/f is open.
**/
static void e1000_get_hw_control(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 ctrl_ext;
u32 swsm;
/* Let firmware know the driver has taken over */
if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) {
swsm = er32(SWSM);
ew32(SWSM, swsm | E1000_SWSM_DRV_LOAD);
} else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) {
ctrl_ext = er32(CTRL_EXT);
ew32(CTRL_EXT, ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
}
}
/**
* e1000_release_hw_control - release control of the h/w to f/w
* @adapter: address of board private structure
*
* e1000_release_hw_control resets {CTRL_EXT|SWSM}:DRV_LOAD bit.
* For ASF and Pass Through versions of f/w this means that the
* driver is no longer loaded. For AMT version (only with 82573) i
* of the f/w this means that the network i/f is closed.
*
**/
static void e1000_release_hw_control(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 ctrl_ext;
u32 swsm;
/* Let firmware taken over control of h/w */
if (adapter->flags & FLAG_HAS_SWSM_ON_LOAD) {
swsm = er32(SWSM);
ew32(SWSM, swsm & ~E1000_SWSM_DRV_LOAD);
} else if (adapter->flags & FLAG_HAS_CTRLEXT_ON_LOAD) {
ctrl_ext = er32(CTRL_EXT);
ew32(CTRL_EXT, ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
}
}
/**
* @e1000_alloc_ring - allocate memory for a ring structure
**/
static int e1000_alloc_ring_dma(struct e1000_adapter *adapter,
struct e1000_ring *ring)
{
struct pci_dev *pdev = adapter->pdev;
ring->desc = dma_alloc_coherent(&pdev->dev, ring->size, &ring->dma,
GFP_KERNEL);
if (!ring->desc)
return -ENOMEM;
return 0;
}
/**
* e1000e_setup_tx_resources - allocate Tx resources (Descriptors)
* @adapter: board private structure
*
* Return 0 on success, negative on failure
**/
int e1000e_setup_tx_resources(struct e1000_adapter *adapter)
{
struct e1000_ring *tx_ring = adapter->tx_ring;
int err = -ENOMEM, size;
size = sizeof(struct e1000_buffer) * tx_ring->count;
tx_ring->buffer_info = vmalloc(size);
if (!tx_ring->buffer_info)
goto err;
memset(tx_ring->buffer_info, 0, size);
/* round up to nearest 4K */
tx_ring->size = tx_ring->count * sizeof(struct e1000_tx_desc);
tx_ring->size = ALIGN(tx_ring->size, 4096);
err = e1000_alloc_ring_dma(adapter, tx_ring);
if (err)
goto err;
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
spin_lock_init(&adapter->tx_queue_lock);
return 0;
err:
vfree(tx_ring->buffer_info);
e_err("Unable to allocate memory for the transmit descriptor ring\n");
return err;
}
/**
* e1000e_setup_rx_resources - allocate Rx resources (Descriptors)
* @adapter: board private structure
*
* Returns 0 on success, negative on failure
**/
int e1000e_setup_rx_resources(struct e1000_adapter *adapter)
{
struct e1000_ring *rx_ring = adapter->rx_ring;
struct e1000_buffer *buffer_info;
int i, size, desc_len, err = -ENOMEM;
size = sizeof(struct e1000_buffer) * rx_ring->count;
rx_ring->buffer_info = vmalloc(size);
if (!rx_ring->buffer_info)
goto err;
memset(rx_ring->buffer_info, 0, size);
for (i = 0; i < rx_ring->count; i++) {
buffer_info = &rx_ring->buffer_info[i];
buffer_info->ps_pages = kcalloc(PS_PAGE_BUFFERS,
sizeof(struct e1000_ps_page),
GFP_KERNEL);
if (!buffer_info->ps_pages)
goto err_pages;
}
desc_len = sizeof(union e1000_rx_desc_packet_split);
/* Round up to nearest 4K */
rx_ring->size = rx_ring->count * desc_len;
rx_ring->size = ALIGN(rx_ring->size, 4096);
err = e1000_alloc_ring_dma(adapter, rx_ring);
if (err)
goto err_pages;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
rx_ring->rx_skb_top = NULL;
return 0;
err_pages:
for (i = 0; i < rx_ring->count; i++) {
buffer_info = &rx_ring->buffer_info[i];
kfree(buffer_info->ps_pages);
}
err:
vfree(rx_ring->buffer_info);
e_err("Unable to allocate memory for the transmit descriptor ring\n");
return err;
}
/**
* e1000_clean_tx_ring - Free Tx Buffers
* @adapter: board private structure
**/
static void e1000_clean_tx_ring(struct e1000_adapter *adapter)
{
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_buffer *buffer_info;
unsigned long size;
unsigned int i;
for (i = 0; i < tx_ring->count; i++) {
buffer_info = &tx_ring->buffer_info[i];
e1000_put_txbuf(adapter, buffer_info);
}
size = sizeof(struct e1000_buffer) * tx_ring->count;
memset(tx_ring->buffer_info, 0, size);
memset(tx_ring->desc, 0, tx_ring->size);
tx_ring->next_to_use = 0;
tx_ring->next_to_clean = 0;
writel(0, adapter->hw.hw_addr + tx_ring->head);
writel(0, adapter->hw.hw_addr + tx_ring->tail);
}
/**
* e1000e_free_tx_resources - Free Tx Resources per Queue
* @adapter: board private structure
*
* Free all transmit software resources
**/
void e1000e_free_tx_resources(struct e1000_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
struct e1000_ring *tx_ring = adapter->tx_ring;
e1000_clean_tx_ring(adapter);
vfree(tx_ring->buffer_info);
tx_ring->buffer_info = NULL;
dma_free_coherent(&pdev->dev, tx_ring->size, tx_ring->desc,
tx_ring->dma);
tx_ring->desc = NULL;
}
/**
* e1000e_free_rx_resources - Free Rx Resources
* @adapter: board private structure
*
* Free all receive software resources
**/
void e1000e_free_rx_resources(struct e1000_adapter *adapter)
{
struct pci_dev *pdev = adapter->pdev;
struct e1000_ring *rx_ring = adapter->rx_ring;
int i;
e1000_clean_rx_ring(adapter);
for (i = 0; i < rx_ring->count; i++) {
kfree(rx_ring->buffer_info[i].ps_pages);
}
vfree(rx_ring->buffer_info);
rx_ring->buffer_info = NULL;
dma_free_coherent(&pdev->dev, rx_ring->size, rx_ring->desc,
rx_ring->dma);
rx_ring->desc = NULL;
}
/**
* e1000_update_itr - update the dynamic ITR value based on statistics
* @adapter: pointer to adapter
* @itr_setting: current adapter->itr
* @packets: the number of packets during this measurement interval
* @bytes: the number of bytes during this measurement interval
*
* Stores a new ITR value based on packets and byte
* counts during the last interrupt. The advantage of per interrupt
* computation is faster updates and more accurate ITR for the current
* traffic pattern. Constants in this function were computed
* based on theoretical maximum wire speed and thresholds were set based
* on testing data as well as attempting to minimize response time
* while increasing bulk throughput. This functionality is controlled
* by the InterruptThrottleRate module parameter.
**/
static unsigned int e1000_update_itr(struct e1000_adapter *adapter,
u16 itr_setting, int packets,
int bytes)
{
unsigned int retval = itr_setting;
if (packets == 0)
goto update_itr_done;
switch (itr_setting) {
case lowest_latency:
/* handle TSO and jumbo frames */
if (bytes/packets > 8000)
retval = bulk_latency;
else if ((packets < 5) && (bytes > 512)) {
retval = low_latency;
}
break;
case low_latency: /* 50 usec aka 20000 ints/s */
if (bytes > 10000) {
/* this if handles the TSO accounting */
if (bytes/packets > 8000) {
retval = bulk_latency;
} else if ((packets < 10) || ((bytes/packets) > 1200)) {
retval = bulk_latency;
} else if ((packets > 35)) {
retval = lowest_latency;
}
} else if (bytes/packets > 2000) {
retval = bulk_latency;
} else if (packets <= 2 && bytes < 512) {
retval = lowest_latency;
}
break;
case bulk_latency: /* 250 usec aka 4000 ints/s */
if (bytes > 25000) {
if (packets > 35) {
retval = low_latency;
}
} else if (bytes < 6000) {
retval = low_latency;
}
break;
}
update_itr_done:
return retval;
}
static void e1000_set_itr(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u16 current_itr;
u32 new_itr = adapter->itr;
/* for non-gigabit speeds, just fix the interrupt rate at 4000 */
if (adapter->link_speed != SPEED_1000) {
current_itr = 0;
new_itr = 4000;
goto set_itr_now;
}
adapter->tx_itr = e1000_update_itr(adapter,
adapter->tx_itr,
adapter->total_tx_packets,
adapter->total_tx_bytes);
/* conservative mode (itr 3) eliminates the lowest_latency setting */
if (adapter->itr_setting == 3 && adapter->tx_itr == lowest_latency)
adapter->tx_itr = low_latency;
adapter->rx_itr = e1000_update_itr(adapter,
adapter->rx_itr,
adapter->total_rx_packets,
adapter->total_rx_bytes);
/* conservative mode (itr 3) eliminates the lowest_latency setting */
if (adapter->itr_setting == 3 && adapter->rx_itr == lowest_latency)
adapter->rx_itr = low_latency;
current_itr = max(adapter->rx_itr, adapter->tx_itr);
switch (current_itr) {
/* counts and packets in update_itr are dependent on these numbers */
case lowest_latency:
new_itr = 70000;
break;
case low_latency:
new_itr = 20000; /* aka hwitr = ~200 */
break;
case bulk_latency:
new_itr = 4000;
break;
default:
break;
}
set_itr_now:
if (new_itr != adapter->itr) {
/*
* this attempts to bias the interrupt rate towards Bulk
* by adding intermediate steps when interrupt rate is
* increasing
*/
new_itr = new_itr > adapter->itr ?
min(adapter->itr + (new_itr >> 2), new_itr) :
new_itr;
adapter->itr = new_itr;
adapter->rx_ring->itr_val = new_itr;
if (adapter->msix_entries)
adapter->rx_ring->set_itr = 1;
else
ew32(ITR, 1000000000 / (new_itr * 256));
}
}
/**
* e1000_alloc_queues - Allocate memory for all rings
* @adapter: board private structure to initialize
**/
static int __devinit e1000_alloc_queues(struct e1000_adapter *adapter)
{
adapter->tx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
if (!adapter->tx_ring)
goto err;
adapter->rx_ring = kzalloc(sizeof(struct e1000_ring), GFP_KERNEL);
if (!adapter->rx_ring)
goto err;
return 0;
err:
e_err("Unable to allocate memory for queues\n");
kfree(adapter->rx_ring);
kfree(adapter->tx_ring);
return -ENOMEM;
}
/**
* e1000_clean - NAPI Rx polling callback
* @napi: struct associated with this polling callback
* @budget: amount of packets driver is allowed to process this poll
**/
static int e1000_clean(struct napi_struct *napi, int budget)
{
struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi);
struct e1000_hw *hw = &adapter->hw;
struct net_device *poll_dev = adapter->netdev;
int tx_cleaned = 0, work_done = 0;
adapter = netdev_priv(poll_dev);
if (adapter->msix_entries &&
!(adapter->rx_ring->ims_val & adapter->tx_ring->ims_val))
goto clean_rx;
/*
* e1000_clean is called per-cpu. This lock protects
* tx_ring from being cleaned by multiple cpus
* simultaneously. A failure obtaining the lock means
* tx_ring is currently being cleaned anyway.
*/
if (spin_trylock(&adapter->tx_queue_lock)) {
tx_cleaned = e1000_clean_tx_irq(adapter);
spin_unlock(&adapter->tx_queue_lock);
}
clean_rx:
adapter->clean_rx(adapter, &work_done, budget);
if (tx_cleaned)
work_done = budget;
/* If budget not fully consumed, exit the polling mode */
if (work_done < budget) {
if (adapter->itr_setting & 3)
e1000_set_itr(adapter);
netif_rx_complete(poll_dev, napi);
if (adapter->msix_entries)
ew32(IMS, adapter->rx_ring->ims_val);
else
e1000_irq_enable(adapter);
}
return work_done;
}
static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 vfta, index;
/* don't update vlan cookie if already programmed */
if ((adapter->hw.mng_cookie.status &
E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
(vid == adapter->mng_vlan_id))
return;
/* add VID to filter table */
index = (vid >> 5) & 0x7F;
vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index);
vfta |= (1 << (vid & 0x1F));
e1000e_write_vfta(hw, index, vfta);
}
static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 vfta, index;
if (!test_bit(__E1000_DOWN, &adapter->state))
e1000_irq_disable(adapter);
vlan_group_set_device(adapter->vlgrp, vid, NULL);
if (!test_bit(__E1000_DOWN, &adapter->state))
e1000_irq_enable(adapter);
if ((adapter->hw.mng_cookie.status &
E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
(vid == adapter->mng_vlan_id)) {
/* release control to f/w */
e1000_release_hw_control(adapter);
return;
}
/* remove VID from filter table */
index = (vid >> 5) & 0x7F;
vfta = E1000_READ_REG_ARRAY(hw, E1000_VFTA, index);
vfta &= ~(1 << (vid & 0x1F));
e1000e_write_vfta(hw, index, vfta);
}
static void e1000_update_mng_vlan(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
u16 vid = adapter->hw.mng_cookie.vlan_id;
u16 old_vid = adapter->mng_vlan_id;
if (!adapter->vlgrp)
return;
if (!vlan_group_get_device(adapter->vlgrp, vid)) {
adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
if (adapter->hw.mng_cookie.status &
E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
e1000_vlan_rx_add_vid(netdev, vid);
adapter->mng_vlan_id = vid;
}
if ((old_vid != (u16)E1000_MNG_VLAN_NONE) &&
(vid != old_vid) &&
!vlan_group_get_device(adapter->vlgrp, old_vid))
e1000_vlan_rx_kill_vid(netdev, old_vid);
} else {
adapter->mng_vlan_id = vid;
}
}
static void e1000_vlan_rx_register(struct net_device *netdev,
struct vlan_group *grp)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, rctl;
if (!test_bit(__E1000_DOWN, &adapter->state))
e1000_irq_disable(adapter);
adapter->vlgrp = grp;
if (grp) {
/* enable VLAN tag insert/strip */
ctrl = er32(CTRL);
ctrl |= E1000_CTRL_VME;
ew32(CTRL, ctrl);
if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
/* enable VLAN receive filtering */
rctl = er32(RCTL);
rctl &= ~E1000_RCTL_CFIEN;
ew32(RCTL, rctl);
e1000_update_mng_vlan(adapter);
}
} else {
/* disable VLAN tag insert/strip */
ctrl = er32(CTRL);
ctrl &= ~E1000_CTRL_VME;
ew32(CTRL, ctrl);
if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
if (adapter->mng_vlan_id !=
(u16)E1000_MNG_VLAN_NONE) {
e1000_vlan_rx_kill_vid(netdev,
adapter->mng_vlan_id);
adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
}
}
}
if (!test_bit(__E1000_DOWN, &adapter->state))
e1000_irq_enable(adapter);
}
static void e1000_restore_vlan(struct e1000_adapter *adapter)
{
u16 vid;
e1000_vlan_rx_register(adapter->netdev, adapter->vlgrp);
if (!adapter->vlgrp)
return;
for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
if (!vlan_group_get_device(adapter->vlgrp, vid))
continue;
e1000_vlan_rx_add_vid(adapter->netdev, vid);
}
}
static void e1000_init_manageability(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 manc, manc2h;
if (!(adapter->flags & FLAG_MNG_PT_ENABLED))
return;
manc = er32(MANC);
/*
* enable receiving management packets to the host. this will probably
* generate destination unreachable messages from the host OS, but
* the packets will be handled on SMBUS
*/
manc |= E1000_MANC_EN_MNG2HOST;
manc2h = er32(MANC2H);
#define E1000_MNG2HOST_PORT_623 (1 << 5)
#define E1000_MNG2HOST_PORT_664 (1 << 6)
manc2h |= E1000_MNG2HOST_PORT_623;
manc2h |= E1000_MNG2HOST_PORT_664;
ew32(MANC2H, manc2h);
ew32(MANC, manc);
}
/**
* e1000_configure_tx - Configure 8254x Transmit Unit after Reset
* @adapter: board private structure
*
* Configure the Tx unit of the MAC after a reset.
**/
static void e1000_configure_tx(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct e1000_ring *tx_ring = adapter->tx_ring;
u64 tdba;
u32 tdlen, tctl, tipg, tarc;
u32 ipgr1, ipgr2;
/* Setup the HW Tx Head and Tail descriptor pointers */
tdba = tx_ring->dma;
tdlen = tx_ring->count * sizeof(struct e1000_tx_desc);
ew32(TDBAL, (tdba & DMA_32BIT_MASK));
ew32(TDBAH, (tdba >> 32));
ew32(TDLEN, tdlen);
ew32(TDH, 0);
ew32(TDT, 0);
tx_ring->head = E1000_TDH;
tx_ring->tail = E1000_TDT;
/* Set the default values for the Tx Inter Packet Gap timer */
tipg = DEFAULT_82543_TIPG_IPGT_COPPER; /* 8 */
ipgr1 = DEFAULT_82543_TIPG_IPGR1; /* 8 */
ipgr2 = DEFAULT_82543_TIPG_IPGR2; /* 6 */
if (adapter->flags & FLAG_TIPG_MEDIUM_FOR_80003ESLAN)
ipgr2 = DEFAULT_80003ES2LAN_TIPG_IPGR2; /* 7 */
tipg |= ipgr1 << E1000_TIPG_IPGR1_SHIFT;
tipg |= ipgr2 << E1000_TIPG_IPGR2_SHIFT;
ew32(TIPG, tipg);
/* Set the Tx Interrupt Delay register */
ew32(TIDV, adapter->tx_int_delay);
/* Tx irq moderation */
ew32(TADV, adapter->tx_abs_int_delay);
/* Program the Transmit Control Register */
tctl = er32(TCTL);
tctl &= ~E1000_TCTL_CT;
tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
(E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
if (adapter->flags & FLAG_TARC_SPEED_MODE_BIT) {
tarc = er32(TARC(0));
/*
* set the speed mode bit, we'll clear it if we're not at
* gigabit link later
*/
#define SPEED_MODE_BIT (1 << 21)
tarc |= SPEED_MODE_BIT;
ew32(TARC(0), tarc);
}
/* errata: program both queues to unweighted RR */
if (adapter->flags & FLAG_TARC_SET_BIT_ZERO) {
tarc = er32(TARC(0));
tarc |= 1;
ew32(TARC(0), tarc);
tarc = er32(TARC(1));
tarc |= 1;
ew32(TARC(1), tarc);
}
e1000e_config_collision_dist(hw);
/* Setup Transmit Descriptor Settings for eop descriptor */
adapter->txd_cmd = E1000_TXD_CMD_EOP | E1000_TXD_CMD_IFCS;
/* only set IDE if we are delaying interrupts using the timers */
if (adapter->tx_int_delay)
adapter->txd_cmd |= E1000_TXD_CMD_IDE;
/* enable Report Status bit */
adapter->txd_cmd |= E1000_TXD_CMD_RS;
ew32(TCTL, tctl);
adapter->tx_queue_len = adapter->netdev->tx_queue_len;
}
/**
* e1000_setup_rctl - configure the receive control registers
* @adapter: Board private structure
**/
#define PAGE_USE_COUNT(S) (((S) >> PAGE_SHIFT) + \
(((S) & (PAGE_SIZE - 1)) ? 1 : 0))
static void e1000_setup_rctl(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 rctl, rfctl;
u32 psrctl = 0;
u32 pages = 0;
/* Program MC offset vector base */
rctl = er32(RCTL);
rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
rctl |= E1000_RCTL_EN | E1000_RCTL_BAM |
E1000_RCTL_LBM_NO | E1000_RCTL_RDMTS_HALF |
(adapter->hw.mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
/* Do not Store bad packets */
rctl &= ~E1000_RCTL_SBP;
/* Enable Long Packet receive */
if (adapter->netdev->mtu <= ETH_DATA_LEN)
rctl &= ~E1000_RCTL_LPE;
else
rctl |= E1000_RCTL_LPE;
/* Some systems expect that the CRC is included in SMBUS traffic. The
* hardware strips the CRC before sending to both SMBUS (BMC) and to
* host memory when this is enabled
*/
if (adapter->flags2 & FLAG2_CRC_STRIPPING)
rctl |= E1000_RCTL_SECRC;
/* Setup buffer sizes */
rctl &= ~E1000_RCTL_SZ_4096;
rctl |= E1000_RCTL_BSEX;
switch (adapter->rx_buffer_len) {
case 256:
rctl |= E1000_RCTL_SZ_256;
rctl &= ~E1000_RCTL_BSEX;
break;
case 512:
rctl |= E1000_RCTL_SZ_512;
rctl &= ~E1000_RCTL_BSEX;
break;
case 1024:
rctl |= E1000_RCTL_SZ_1024;
rctl &= ~E1000_RCTL_BSEX;
break;
case 2048:
default:
rctl |= E1000_RCTL_SZ_2048;
rctl &= ~E1000_RCTL_BSEX;
break;
case 4096:
rctl |= E1000_RCTL_SZ_4096;
break;
case 8192:
rctl |= E1000_RCTL_SZ_8192;
break;
case 16384:
rctl |= E1000_RCTL_SZ_16384;
break;
}
/*
* 82571 and greater support packet-split where the protocol
* header is placed in skb->data and the packet data is
* placed in pages hanging off of skb_shinfo(skb)->nr_frags.
* In the case of a non-split, skb->data is linearly filled,
* followed by the page buffers. Therefore, skb->data is
* sized to hold the largest protocol header.
*
* allocations using alloc_page take too long for regular MTU
* so only enable packet split for jumbo frames
*
* Using pages when the page size is greater than 16k wastes
* a lot of memory, since we allocate 3 pages at all times
* per packet.
*/
pages = PAGE_USE_COUNT(adapter->netdev->mtu);
if (!(adapter->flags & FLAG_IS_ICH) && (pages <= 3) &&
(PAGE_SIZE <= 16384) && (rctl & E1000_RCTL_LPE))
adapter->rx_ps_pages = pages;
else
adapter->rx_ps_pages = 0;
if (adapter->rx_ps_pages) {
/* Configure extra packet-split registers */
rfctl = er32(RFCTL);
rfctl |= E1000_RFCTL_EXTEN;
/*
* disable packet split support for IPv6 extension headers,
* because some malformed IPv6 headers can hang the Rx
*/
rfctl |= (E1000_RFCTL_IPV6_EX_DIS |
E1000_RFCTL_NEW_IPV6_EXT_DIS);
ew32(RFCTL, rfctl);
/* Enable Packet split descriptors */
rctl |= E1000_RCTL_DTYP_PS;
psrctl |= adapter->rx_ps_bsize0 >>
E1000_PSRCTL_BSIZE0_SHIFT;
switch (adapter->rx_ps_pages) {
case 3:
psrctl |= PAGE_SIZE <<
E1000_PSRCTL_BSIZE3_SHIFT;
case 2:
psrctl |= PAGE_SIZE <<
E1000_PSRCTL_BSIZE2_SHIFT;
case 1:
psrctl |= PAGE_SIZE >>
E1000_PSRCTL_BSIZE1_SHIFT;
break;
}
ew32(PSRCTL, psrctl);
}
ew32(RCTL, rctl);
/* just started the receive unit, no need to restart */
adapter->flags &= ~FLAG_RX_RESTART_NOW;
}
/**
* e1000_configure_rx - Configure Receive Unit after Reset
* @adapter: board private structure
*
* Configure the Rx unit of the MAC after a reset.
**/
static void e1000_configure_rx(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct e1000_ring *rx_ring = adapter->rx_ring;
u64 rdba;
u32 rdlen, rctl, rxcsum, ctrl_ext;
if (adapter->rx_ps_pages) {
/* this is a 32 byte descriptor */
rdlen = rx_ring->count *
sizeof(union e1000_rx_desc_packet_split);
adapter->clean_rx = e1000_clean_rx_irq_ps;
adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps;
} else if (adapter->netdev->mtu > ETH_FRAME_LEN + ETH_FCS_LEN) {
rdlen = rx_ring->count * sizeof(struct e1000_rx_desc);
adapter->clean_rx = e1000_clean_jumbo_rx_irq;
adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
} else {
rdlen = rx_ring->count * sizeof(struct e1000_rx_desc);
adapter->clean_rx = e1000_clean_rx_irq;
adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
}
/* disable receives while setting up the descriptors */
rctl = er32(RCTL);
ew32(RCTL, rctl & ~E1000_RCTL_EN);
e1e_flush();
msleep(10);
/* set the Receive Delay Timer Register */
ew32(RDTR, adapter->rx_int_delay);
/* irq moderation */
ew32(RADV, adapter->rx_abs_int_delay);
if (adapter->itr_setting != 0)
ew32(ITR, 1000000000 / (adapter->itr * 256));
ctrl_ext = er32(CTRL_EXT);
/* Reset delay timers after every interrupt */
ctrl_ext |= E1000_CTRL_EXT_INT_TIMER_CLR;
/* Auto-Mask interrupts upon ICR access */
ctrl_ext |= E1000_CTRL_EXT_IAME;
ew32(IAM, 0xffffffff);
ew32(CTRL_EXT, ctrl_ext);
e1e_flush();
/*
* Setup the HW Rx Head and Tail Descriptor Pointers and
* the Base and Length of the Rx Descriptor Ring
*/
rdba = rx_ring->dma;
ew32(RDBAL, (rdba & DMA_32BIT_MASK));
ew32(RDBAH, (rdba >> 32));
ew32(RDLEN, rdlen);
ew32(RDH, 0);
ew32(RDT, 0);
rx_ring->head = E1000_RDH;
rx_ring->tail = E1000_RDT;
/* Enable Receive Checksum Offload for TCP and UDP */
rxcsum = er32(RXCSUM);
if (adapter->flags & FLAG_RX_CSUM_ENABLED) {
rxcsum |= E1000_RXCSUM_TUOFL;
/*
* IPv4 payload checksum for UDP fragments must be
* used in conjunction with packet-split.
*/
if (adapter->rx_ps_pages)
rxcsum |= E1000_RXCSUM_IPPCSE;
} else {
rxcsum &= ~E1000_RXCSUM_TUOFL;
/* no need to clear IPPCSE as it defaults to 0 */
}
ew32(RXCSUM, rxcsum);
/*
* Enable early receives on supported devices, only takes effect when
* packet size is equal or larger than the specified value (in 8 byte
* units), e.g. using jumbo frames when setting to E1000_ERT_2048
*/
if ((adapter->flags & FLAG_HAS_ERT) &&
(adapter->netdev->mtu > ETH_DATA_LEN)) {
u32 rxdctl = er32(RXDCTL(0));
ew32(RXDCTL(0), rxdctl | 0x3);
ew32(ERT, E1000_ERT_2048 | (1 << 13));
/*
* With jumbo frames and early-receive enabled, excessive
* C4->C2 latencies result in dropped transactions.
*/
pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY,
e1000e_driver_name, 55);
} else {
pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY,
e1000e_driver_name,
PM_QOS_DEFAULT_VALUE);
}
/* Enable Receives */
ew32(RCTL, rctl);
}
/**
* e1000_update_mc_addr_list - Update Multicast addresses
* @hw: pointer to the HW structure
* @mc_addr_list: array of multicast addresses to program
* @mc_addr_count: number of multicast addresses to program
* @rar_used_count: the first RAR register free to program
* @rar_count: total number of supported Receive Address Registers
*
* Updates the Receive Address Registers and Multicast Table Array.
* The caller must have a packed mc_addr_list of multicast addresses.
* The parameter rar_count will usually be hw->mac.rar_entry_count
* unless there are workarounds that change this. Currently no func pointer
* exists and all implementations are handled in the generic version of this
* function.
**/
static void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list,
u32 mc_addr_count, u32 rar_used_count,
u32 rar_count)
{
hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, mc_addr_count,
rar_used_count, rar_count);
}
/**
* e1000_set_multi - Multicast and Promiscuous mode set
* @netdev: network interface device structure
*
* The set_multi entry point is called whenever the multicast address
* list or the network interface flags are updated. This routine is
* responsible for configuring the hardware for proper multicast,
* promiscuous mode, and all-multi behavior.
**/
static void e1000_set_multi(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
struct e1000_mac_info *mac = &hw->mac;
struct dev_mc_list *mc_ptr;
u8 *mta_list;
u32 rctl;
int i;
/* Check for Promiscuous and All Multicast modes */
rctl = er32(RCTL);
if (netdev->flags & IFF_PROMISC) {
rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
rctl &= ~E1000_RCTL_VFE;
} else {
if (netdev->flags & IFF_ALLMULTI) {
rctl |= E1000_RCTL_MPE;
rctl &= ~E1000_RCTL_UPE;
} else {
rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE);
}
if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER)
rctl |= E1000_RCTL_VFE;
}
ew32(RCTL, rctl);
if (netdev->mc_count) {
mta_list = kmalloc(netdev->mc_count * 6, GFP_ATOMIC);
if (!mta_list)
return;
/* prepare a packed array of only addresses. */
mc_ptr = netdev->mc_list;
for (i = 0; i < netdev->mc_count; i++) {
if (!mc_ptr)
break;
memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr,
ETH_ALEN);
mc_ptr = mc_ptr->next;
}
e1000_update_mc_addr_list(hw, mta_list, i, 1,
mac->rar_entry_count);
kfree(mta_list);
} else {
/*
* if we're called from probe, we might not have
* anything to do here, so clear out the list
*/
e1000_update_mc_addr_list(hw, NULL, 0, 1, mac->rar_entry_count);
}
}
/**
* e1000_configure - configure the hardware for Rx and Tx
* @adapter: private board structure
**/
static void e1000_configure(struct e1000_adapter *adapter)
{
e1000_set_multi(adapter->netdev);
e1000_restore_vlan(adapter);
e1000_init_manageability(adapter);
e1000_configure_tx(adapter);
e1000_setup_rctl(adapter);
e1000_configure_rx(adapter);
adapter->alloc_rx_buf(adapter, e1000_desc_unused(adapter->rx_ring));
}
/**
* e1000e_power_up_phy - restore link in case the phy was powered down
* @adapter: address of board private structure
*
* The phy may be powered down to save power and turn off link when the
* driver is unloaded and wake on lan is not enabled (among others)
* *** this routine MUST be followed by a call to e1000e_reset ***
**/
void e1000e_power_up_phy(struct e1000_adapter *adapter)
{
u16 mii_reg = 0;
/* Just clear the power down bit to wake the phy back up */
if (adapter->hw.phy.media_type == e1000_media_type_copper) {
/*
* According to the manual, the phy will retain its
* settings across a power-down/up cycle
*/
e1e_rphy(&adapter->hw, PHY_CONTROL, &mii_reg);
mii_reg &= ~MII_CR_POWER_DOWN;
e1e_wphy(&adapter->hw, PHY_CONTROL, mii_reg);
}
adapter->hw.mac.ops.setup_link(&adapter->hw);
}
/**
* e1000_power_down_phy - Power down the PHY
*
* Power down the PHY so no link is implied when interface is down
* The PHY cannot be powered down is management or WoL is active
*/
static void e1000_power_down_phy(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u16 mii_reg;
/* WoL is enabled */
if (adapter->wol)
return;
/* non-copper PHY? */
if (adapter->hw.phy.media_type != e1000_media_type_copper)
return;
/* reset is blocked because of a SoL/IDER session */
if (e1000e_check_mng_mode(hw) || e1000_check_reset_block(hw))
return;
/* manageability (AMT) is enabled */
if (er32(MANC) & E1000_MANC_SMBUS_EN)
return;
/* power down the PHY */
e1e_rphy(hw, PHY_CONTROL, &mii_reg);
mii_reg |= MII_CR_POWER_DOWN;
e1e_wphy(hw, PHY_CONTROL, mii_reg);
mdelay(1);
}
/**
* e1000e_reset - bring the hardware into a known good state
*
* This function boots the hardware and enables some settings that
* require a configuration cycle of the hardware - those cannot be
* set/changed during runtime. After reset the device needs to be
* properly configured for Rx, Tx etc.
*/
void e1000e_reset(struct e1000_adapter *adapter)
{
struct e1000_mac_info *mac = &adapter->hw.mac;
struct e1000_fc_info *fc = &adapter->hw.fc;
struct e1000_hw *hw = &adapter->hw;
u32 tx_space, min_tx_space, min_rx_space;
u32 pba = adapter->pba;
u16 hwm;
/* reset Packet Buffer Allocation to default */
ew32(PBA, pba);
if (adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) {
/*
* To maintain wire speed transmits, the Tx FIFO should be
* large enough to accommodate two full transmit packets,
* rounded up to the next 1KB and expressed in KB. Likewise,
* the Rx FIFO should be large enough to accommodate at least
* one full receive packet and is similarly rounded up and
* expressed in KB.
*/
pba = er32(PBA);
/* upper 16 bits has Tx packet buffer allocation size in KB */
tx_space = pba >> 16;
/* lower 16 bits has Rx packet buffer allocation size in KB */
pba &= 0xffff;
/*
* the Tx fifo also stores 16 bytes of information about the tx
* but don't include ethernet FCS because hardware appends it
*/
min_tx_space = (adapter->max_frame_size +
sizeof(struct e1000_tx_desc) -
ETH_FCS_LEN) * 2;
min_tx_space = ALIGN(min_tx_space, 1024);
min_tx_space >>= 10;
/* software strips receive CRC, so leave room for it */
min_rx_space = adapter->max_frame_size;
min_rx_space = ALIGN(min_rx_space, 1024);
min_rx_space >>= 10;
/*
* If current Tx allocation is less than the min Tx FIFO size,
* and the min Tx FIFO size is less than the current Rx FIFO
* allocation, take space away from current Rx allocation
*/
if ((tx_space < min_tx_space) &&
((min_tx_space - tx_space) < pba)) {
pba -= min_tx_space - tx_space;
/*
* if short on Rx space, Rx wins and must trump tx
* adjustment or use Early Receive if available
*/
if ((pba < min_rx_space) &&
(!(adapter->flags & FLAG_HAS_ERT)))
/* ERT enabled in e1000_configure_rx */
pba = min_rx_space;
}
ew32(PBA, pba);
}
/*
* flow control settings
*
* The high water mark must be low enough to fit one full frame
* (or the size used for early receive) above it in the Rx FIFO.
* Set it to the lower of:
* - 90% of the Rx FIFO size, and
* - the full Rx FIFO size minus the early receive size (for parts
* with ERT support assuming ERT set to E1000_ERT_2048), or
* - the full Rx FIFO size minus one full frame
*/
if (adapter->flags & FLAG_HAS_ERT)
hwm = min(((pba << 10) * 9 / 10),
((pba << 10) - (E1000_ERT_2048 << 3)));
else
hwm = min(((pba << 10) * 9 / 10),
((pba << 10) - adapter->max_frame_size));
fc->high_water = hwm & 0xFFF8; /* 8-byte granularity */
fc->low_water = fc->high_water - 8;
if (adapter->flags & FLAG_DISABLE_FC_PAUSE_TIME)
fc->pause_time = 0xFFFF;
else
fc->pause_time = E1000_FC_PAUSE_TIME;
fc->send_xon = 1;
fc->type = fc->original_type;
/* Allow time for pending master requests to run */
mac->ops.reset_hw(hw);
/*
* For parts with AMT enabled, let the firmware know
* that the network interface is in control
*/
if (adapter->flags & FLAG_HAS_AMT)
e1000_get_hw_control(adapter);
ew32(WUC, 0);
if (mac->ops.init_hw(hw))
e_err("Hardware Error\n");
e1000_update_mng_vlan(adapter);
/* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
ew32(VET, ETH_P_8021Q);
e1000e_reset_adaptive(hw);
e1000_get_phy_info(hw);
if (!(adapter->flags & FLAG_SMART_POWER_DOWN)) {
u16 phy_data = 0;
/*
* speed up time to link by disabling smart power down, ignore
* the return value of this function because there is nothing
* different we would do if it failed
*/
e1e_rphy(hw, IGP02E1000_PHY_POWER_MGMT, &phy_data);
phy_data &= ~IGP02E1000_PM_SPD;
e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, phy_data);
}
}
int e1000e_up(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
/* hardware has been reset, we need to reload some things */
e1000_configure(adapter);
clear_bit(__E1000_DOWN, &adapter->state);
napi_enable(&adapter->napi);
if (adapter->msix_entries)
e1000_configure_msix(adapter);
e1000_irq_enable(adapter);
/* fire a link change interrupt to start the watchdog */
ew32(ICS, E1000_ICS_LSC);
return 0;
}
void e1000e_down(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
struct e1000_hw *hw = &adapter->hw;
u32 tctl, rctl;
/*
* signal that we're down so the interrupt handler does not
* reschedule our watchdog timer
*/
set_bit(__E1000_DOWN, &adapter->state);
/* disable receives in the hardware */
rctl = er32(RCTL);
ew32(RCTL, rctl & ~E1000_RCTL_EN);
/* flush and sleep below */
netif_tx_stop_all_queues(netdev);
/* disable transmits in the hardware */
tctl = er32(TCTL);
tctl &= ~E1000_TCTL_EN;
ew32(TCTL, tctl);
/* flush both disables and wait for them to finish */
e1e_flush();
msleep(10);
napi_disable(&adapter->napi);
e1000_irq_disable(adapter);
del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
netdev->tx_queue_len = adapter->tx_queue_len;
netif_carrier_off(netdev);
adapter->link_speed = 0;
adapter->link_duplex = 0;
if (!pci_channel_offline(adapter->pdev))
e1000e_reset(adapter);
e1000_clean_tx_ring(adapter);
e1000_clean_rx_ring(adapter);
/*
* TODO: for power management, we could drop the link and
* pci_disable_device here.
*/
}
void e1000e_reinit_locked(struct e1000_adapter *adapter)
{
might_sleep();
while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
msleep(1);
e1000e_down(adapter);
e1000e_up(adapter);
clear_bit(__E1000_RESETTING, &adapter->state);
}
/**
* e1000_sw_init - Initialize general software structures (struct e1000_adapter)
* @adapter: board private structure to initialize
*
* e1000_sw_init initializes the Adapter private data structure.
* Fields are initialized based on PCI device information and
* OS network device settings (MTU size).
**/
static int __devinit e1000_sw_init(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN;
adapter->rx_ps_bsize0 = 128;
adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
e1000e_set_interrupt_capability(adapter);
if (e1000_alloc_queues(adapter))
return -ENOMEM;
spin_lock_init(&adapter->tx_queue_lock);
/* Explicitly disable IRQ since the NIC can be in any state. */
e1000_irq_disable(adapter);
set_bit(__E1000_DOWN, &adapter->state);
return 0;
}
/**
* e1000_intr_msi_test - Interrupt Handler
* @irq: interrupt number
* @data: pointer to a network interface device structure
**/
static irqreturn_t e1000_intr_msi_test(int irq, void *data)
{
struct net_device *netdev = data;
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 icr = er32(ICR);
e_dbg("%s: icr is %08X\n", netdev->name, icr);
if (icr & E1000_ICR_RXSEQ) {
adapter->flags &= ~FLAG_MSI_TEST_FAILED;
wmb();
}
return IRQ_HANDLED;
}
/**
* e1000_test_msi_interrupt - Returns 0 for successful test
* @adapter: board private struct
*
* code flow taken from tg3.c
**/
static int e1000_test_msi_interrupt(struct e1000_adapter *adapter)
{
struct net_device *netdev = adapter->netdev;
struct e1000_hw *hw = &adapter->hw;
int err;
/* poll_enable hasn't been called yet, so don't need disable */
/* clear any pending events */
er32(ICR);
/* free the real vector and request a test handler */
e1000_free_irq(adapter);
e1000e_reset_interrupt_capability(adapter);
/* Assume that the test fails, if it succeeds then the test
* MSI irq handler will unset this flag */
adapter->flags |= FLAG_MSI_TEST_FAILED;
err = pci_enable_msi(adapter->pdev);
if (err)
goto msi_test_failed;
err = request_irq(adapter->pdev->irq, &e1000_intr_msi_test, 0,
netdev->name, netdev);
if (err) {
pci_disable_msi(adapter->pdev);
goto msi_test_failed;
}
wmb();
e1000_irq_enable(adapter);
/* fire an unusual interrupt on the test handler */
ew32(ICS, E1000_ICS_RXSEQ);
e1e_flush();
msleep(50);
e1000_irq_disable(adapter);
rmb();
if (adapter->flags & FLAG_MSI_TEST_FAILED) {
adapter->int_mode = E1000E_INT_MODE_LEGACY;
err = -EIO;
e_info("MSI interrupt test failed!\n");
}
free_irq(adapter->pdev->irq, netdev);
pci_disable_msi(adapter->pdev);
if (err == -EIO)
goto msi_test_failed;
/* okay so the test worked, restore settings */
e_dbg("%s: MSI interrupt test succeeded!\n", netdev->name);
msi_test_failed:
e1000e_set_interrupt_capability(adapter);
e1000_request_irq(adapter);
return err;
}
/**
* e1000_test_msi - Returns 0 if MSI test succeeds or INTx mode is restored
* @adapter: board private struct
*
* code flow taken from tg3.c, called with e1000 interrupts disabled.
**/
static int e1000_test_msi(struct e1000_adapter *adapter)
{
int err;
u16 pci_cmd;
if (!(adapter->flags & FLAG_MSI_ENABLED))
return 0;
/* disable SERR in case the MSI write causes a master abort */
pci_read_config_word(adapter->pdev, PCI_COMMAND, &pci_cmd);
pci_write_config_word(adapter->pdev, PCI_COMMAND,
pci_cmd & ~PCI_COMMAND_SERR);
err = e1000_test_msi_interrupt(adapter);
/* restore previous setting of command word */
pci_write_config_word(adapter->pdev, PCI_COMMAND, pci_cmd);
/* success ! */
if (!err)
return 0;
/* EIO means MSI test failed */
if (err != -EIO)
return err;
/* back to INTx mode */
e_warn("MSI interrupt test failed, using legacy interrupt.\n");
e1000_free_irq(adapter);
err = e1000_request_irq(adapter);
return err;
}
/**
* e1000_open - Called when a network interface is made active
* @netdev: network interface device structure
*
* Returns 0 on success, negative value on failure
*
* The open entry point is called when a network interface is made
* active by the system (IFF_UP). At this point all resources needed
* for transmit and receive operations are allocated, the interrupt
* handler is registered with the OS, the watchdog timer is started,
* and the stack is notified that the interface is ready.
**/
static int e1000_open(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
int err;
/* disallow open during test */
if (test_bit(__E1000_TESTING, &adapter->state))
return -EBUSY;
/* allocate transmit descriptors */
err = e1000e_setup_tx_resources(adapter);
if (err)
goto err_setup_tx;
/* allocate receive descriptors */
err = e1000e_setup_rx_resources(adapter);
if (err)
goto err_setup_rx;
e1000e_power_up_phy(adapter);
adapter->mng_vlan_id = E1000_MNG_VLAN_NONE;
if ((adapter->hw.mng_cookie.status &
E1000_MNG_DHCP_COOKIE_STATUS_VLAN))
e1000_update_mng_vlan(adapter);
/*
* If AMT is enabled, let the firmware know that the network
* interface is now open
*/
if (adapter->flags & FLAG_HAS_AMT)
e1000_get_hw_control(adapter);
/*
* before we allocate an interrupt, we must be ready to handle it.
* Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
* as soon as we call pci_request_irq, so we have to setup our
* clean_rx handler before we do so.
*/
e1000_configure(adapter);
err = e1000_request_irq(adapter);
if (err)
goto err_req_irq;
/*
* Work around PCIe errata with MSI interrupts causing some chipsets to
* ignore e1000e MSI messages, which means we need to test our MSI
* interrupt now
*/
if (adapter->int_mode != E1000E_INT_MODE_LEGACY) {
err = e1000_test_msi(adapter);
if (err) {
e_err("Interrupt allocation failed\n");
goto err_req_irq;
}
}
/* From here on the code is the same as e1000e_up() */
clear_bit(__E1000_DOWN, &adapter->state);
napi_enable(&adapter->napi);
e1000_irq_enable(adapter);
netif_tx_start_all_queues(netdev);
/* fire a link status change interrupt to start the watchdog */
ew32(ICS, E1000_ICS_LSC);
return 0;
err_req_irq:
e1000_release_hw_control(adapter);
e1000_power_down_phy(adapter);
e1000e_free_rx_resources(adapter);
err_setup_rx:
e1000e_free_tx_resources(adapter);
err_setup_tx:
e1000e_reset(adapter);
return err;
}
/**
* e1000_close - Disables a network interface
* @netdev: network interface device structure
*
* Returns 0, this is not allowed to fail
*
* The close entry point is called when an interface is de-activated
* by the OS. The hardware is still under the drivers control, but
* needs to be disabled. A global MAC reset is issued to stop the
* hardware, and all transmit and receive resources are freed.
**/
static int e1000_close(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
e1000e_down(adapter);
e1000_power_down_phy(adapter);
e1000_free_irq(adapter);
e1000e_free_tx_resources(adapter);
e1000e_free_rx_resources(adapter);
/*
* kill manageability vlan ID if supported, but not if a vlan with
* the same ID is registered on the host OS (let 8021q kill it)
*/
if ((adapter->hw.mng_cookie.status &
E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
!(adapter->vlgrp &&
vlan_group_get_device(adapter->vlgrp, adapter->mng_vlan_id)))
e1000_vlan_rx_kill_vid(netdev, adapter->mng_vlan_id);
/*
* If AMT is enabled, let the firmware know that the network
* interface is now closed
*/
if (adapter->flags & FLAG_HAS_AMT)
e1000_release_hw_control(adapter);
return 0;
}
/**
* e1000_set_mac - Change the Ethernet Address of the NIC
* @netdev: network interface device structure
* @p: pointer to an address structure
*
* Returns 0 on success, negative on failure
**/
static int e1000_set_mac(struct net_device *netdev, void *p)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct sockaddr *addr = p;
if (!is_valid_ether_addr(addr->sa_data))
return -EADDRNOTAVAIL;
memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len);
e1000e_rar_set(&adapter->hw, adapter->hw.mac.addr, 0);
if (adapter->flags & FLAG_RESET_OVERWRITES_LAA) {
/* activate the work around */
e1000e_set_laa_state_82571(&adapter->hw, 1);
/*
* Hold a copy of the LAA in RAR[14] This is done so that
* between the time RAR[0] gets clobbered and the time it
* gets fixed (in e1000_watchdog), the actual LAA is in one
* of the RARs and no incoming packets directed to this port
* are dropped. Eventually the LAA will be in RAR[0] and
* RAR[14]
*/
e1000e_rar_set(&adapter->hw,
adapter->hw.mac.addr,
adapter->hw.mac.rar_entry_count - 1);
}
return 0;
}
/**
* e1000e_update_phy_task - work thread to update phy
* @work: pointer to our work struct
*
* this worker thread exists because we must acquire a
* semaphore to read the phy, which we could msleep while
* waiting for it, and we can't msleep in a timer.
**/
static void e1000e_update_phy_task(struct work_struct *work)
{
struct e1000_adapter *adapter = container_of(work,
struct e1000_adapter, update_phy_task);
e1000_get_phy_info(&adapter->hw);
}
/*
* Need to wait a few seconds after link up to get diagnostic information from
* the phy
*/
static void e1000_update_phy_info(unsigned long data)
{
struct e1000_adapter *adapter = (struct e1000_adapter *) data;
schedule_work(&adapter->update_phy_task);
}
/**
* e1000e_update_stats - Update the board statistics counters
* @adapter: board private structure
**/
void e1000e_update_stats(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct pci_dev *pdev = adapter->pdev;
/*
* Prevent stats update while adapter is being reset, or if the pci
* connection is down.
*/
if (adapter->link_speed == 0)
return;
if (pci_channel_offline(pdev))
return;
adapter->stats.crcerrs += er32(CRCERRS);
adapter->stats.gprc += er32(GPRC);
adapter->stats.gorc += er32(GORCL);
er32(GORCH); /* Clear gorc */
adapter->stats.bprc += er32(BPRC);
adapter->stats.mprc += er32(MPRC);
adapter->stats.roc += er32(ROC);
adapter->stats.mpc += er32(MPC);
adapter->stats.scc += er32(SCC);
adapter->stats.ecol += er32(ECOL);
adapter->stats.mcc += er32(MCC);
adapter->stats.latecol += er32(LATECOL);
adapter->stats.dc += er32(DC);
adapter->stats.xonrxc += er32(XONRXC);
adapter->stats.xontxc += er32(XONTXC);
adapter->stats.xoffrxc += er32(XOFFRXC);
adapter->stats.xofftxc += er32(XOFFTXC);
adapter->stats.gptc += er32(GPTC);
adapter->stats.gotc += er32(GOTCL);
er32(GOTCH); /* Clear gotc */
adapter->stats.rnbc += er32(RNBC);
adapter->stats.ruc += er32(RUC);
adapter->stats.mptc += er32(MPTC);
adapter->stats.bptc += er32(BPTC);
/* used for adaptive IFS */
hw->mac.tx_packet_delta = er32(TPT);
adapter->stats.tpt += hw->mac.tx_packet_delta;
hw->mac.collision_delta = er32(COLC);
adapter->stats.colc += hw->mac.collision_delta;
adapter->stats.algnerrc += er32(ALGNERRC);
adapter->stats.rxerrc += er32(RXERRC);
if (hw->mac.type != e1000_82574)
adapter->stats.tncrs += er32(TNCRS);
adapter->stats.cexterr += er32(CEXTERR);
adapter->stats.tsctc += er32(TSCTC);
adapter->stats.tsctfc += er32(TSCTFC);
/* Fill out the OS statistics structure */
adapter->net_stats.multicast = adapter->stats.mprc;
adapter->net_stats.collisions = adapter->stats.colc;
/* Rx Errors */
/*
* RLEC on some newer hardware can be incorrect so build
* our own version based on RUC and ROC
*/
adapter->net_stats.rx_errors = adapter->stats.rxerrc +
adapter->stats.crcerrs + adapter->stats.algnerrc +
adapter->stats.ruc + adapter->stats.roc +
adapter->stats.cexterr;
adapter->net_stats.rx_length_errors = adapter->stats.ruc +
adapter->stats.roc;
adapter->net_stats.rx_crc_errors = adapter->stats.crcerrs;
adapter->net_stats.rx_frame_errors = adapter->stats.algnerrc;
adapter->net_stats.rx_missed_errors = adapter->stats.mpc;
/* Tx Errors */
adapter->net_stats.tx_errors = adapter->stats.ecol +
adapter->stats.latecol;
adapter->net_stats.tx_aborted_errors = adapter->stats.ecol;
adapter->net_stats.tx_window_errors = adapter->stats.latecol;
adapter->net_stats.tx_carrier_errors = adapter->stats.tncrs;
/* Tx Dropped needs to be maintained elsewhere */
/* Management Stats */
adapter->stats.mgptc += er32(MGTPTC);
adapter->stats.mgprc += er32(MGTPRC);
adapter->stats.mgpdc += er32(MGTPDC);
}
/**
* e1000_phy_read_status - Update the PHY register status snapshot
* @adapter: board private structure
**/
static void e1000_phy_read_status(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct e1000_phy_regs *phy = &adapter->phy_regs;
int ret_val;
if ((er32(STATUS) & E1000_STATUS_LU) &&
(adapter->hw.phy.media_type == e1000_media_type_copper)) {
ret_val = e1e_rphy(hw, PHY_CONTROL, &phy->bmcr);
ret_val |= e1e_rphy(hw, PHY_STATUS, &phy->bmsr);
ret_val |= e1e_rphy(hw, PHY_AUTONEG_ADV, &phy->advertise);
ret_val |= e1e_rphy(hw, PHY_LP_ABILITY, &phy->lpa);
ret_val |= e1e_rphy(hw, PHY_AUTONEG_EXP, &phy->expansion);
ret_val |= e1e_rphy(hw, PHY_1000T_CTRL, &phy->ctrl1000);
ret_val |= e1e_rphy(hw, PHY_1000T_STATUS, &phy->stat1000);
ret_val |= e1e_rphy(hw, PHY_EXT_STATUS, &phy->estatus);
if (ret_val)
e_warn("Error reading PHY register\n");
} else {
/*
* Do not read PHY registers if link is not up
* Set values to typical power-on defaults
*/
phy->bmcr = (BMCR_SPEED1000 | BMCR_ANENABLE | BMCR_FULLDPLX);
phy->bmsr = (BMSR_100FULL | BMSR_100HALF | BMSR_10FULL |
BMSR_10HALF | BMSR_ESTATEN | BMSR_ANEGCAPABLE |
BMSR_ERCAP);
phy->advertise = (ADVERTISE_PAUSE_ASYM | ADVERTISE_PAUSE_CAP |
ADVERTISE_ALL | ADVERTISE_CSMA);
phy->lpa = 0;
phy->expansion = EXPANSION_ENABLENPAGE;
phy->ctrl1000 = ADVERTISE_1000FULL;
phy->stat1000 = 0;
phy->estatus = (ESTATUS_1000_TFULL | ESTATUS_1000_THALF);
}
}
static void e1000_print_link_info(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
u32 ctrl = er32(CTRL);
/* Link status message must follow this format for user tools */
printk(KERN_INFO "e1000e: %s NIC Link is Up %d Mbps %s, "
"Flow Control: %s\n",
adapter->netdev->name,
adapter->link_speed,
(adapter->link_duplex == FULL_DUPLEX) ?
"Full Duplex" : "Half Duplex",
((ctrl & E1000_CTRL_TFCE) && (ctrl & E1000_CTRL_RFCE)) ?
"RX/TX" :
((ctrl & E1000_CTRL_RFCE) ? "RX" :
((ctrl & E1000_CTRL_TFCE) ? "TX" : "None" )));
}
static bool e1000_has_link(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
bool link_active = 0;
s32 ret_val = 0;
/*
* get_link_status is set on LSC (link status) interrupt or
* Rx sequence error interrupt. get_link_status will stay
* false until the check_for_link establishes link
* for copper adapters ONLY
*/
switch (hw->phy.media_type) {
case e1000_media_type_copper:
if (hw->mac.get_link_status) {
ret_val = hw->mac.ops.check_for_link(hw);
link_active = !hw->mac.get_link_status;
} else {
link_active = 1;
}
break;
case e1000_media_type_fiber:
ret_val = hw->mac.ops.check_for_link(hw);
link_active = !!(er32(STATUS) & E1000_STATUS_LU);
break;
case e1000_media_type_internal_serdes:
ret_val = hw->mac.ops.check_for_link(hw);
link_active = adapter->hw.mac.serdes_has_link;
break;
default:
case e1000_media_type_unknown:
break;
}
if ((ret_val == E1000_ERR_PHY) && (hw->phy.type == e1000_phy_igp_3) &&
(er32(CTRL) & E1000_PHY_CTRL_GBE_DISABLE)) {
/* See e1000_kmrn_lock_loss_workaround_ich8lan() */
e_info("Gigabit has been disabled, downgrading speed\n");
}
return link_active;
}
static void e1000e_enable_receives(struct e1000_adapter *adapter)
{
/* make sure the receive unit is started */
if ((adapter->flags & FLAG_RX_NEEDS_RESTART) &&
(adapter->flags & FLAG_RX_RESTART_NOW)) {
struct e1000_hw *hw = &adapter->hw;
u32 rctl = er32(RCTL);
ew32(RCTL, rctl | E1000_RCTL_EN);
adapter->flags &= ~FLAG_RX_RESTART_NOW;
}
}
/**
* e1000_watchdog - Timer Call-back
* @data: pointer to adapter cast into an unsigned long
**/
static void e1000_watchdog(unsigned long data)
{
struct e1000_adapter *adapter = (struct e1000_adapter *) data;
/* Do the rest outside of interrupt context */
schedule_work(&adapter->watchdog_task);
/* TODO: make this use queue_delayed_work() */
}
static void e1000_watchdog_task(struct work_struct *work)
{
struct e1000_adapter *adapter = container_of(work,
struct e1000_adapter, watchdog_task);
struct net_device *netdev = adapter->netdev;
struct e1000_mac_info *mac = &adapter->hw.mac;
struct e1000_phy_info *phy = &adapter->hw.phy;
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_hw *hw = &adapter->hw;
u32 link, tctl;
int tx_pending = 0;
link = e1000_has_link(adapter);
if ((netif_carrier_ok(netdev)) && link) {
e1000e_enable_receives(adapter);
goto link_up;
}
if ((e1000e_enable_tx_pkt_filtering(hw)) &&
(adapter->mng_vlan_id != adapter->hw.mng_cookie.vlan_id))
e1000_update_mng_vlan(adapter);
if (link) {
if (!netif_carrier_ok(netdev)) {
bool txb2b = 1;
/* update snapshot of PHY registers on LSC */
e1000_phy_read_status(adapter);
mac->ops.get_link_up_info(&adapter->hw,
&adapter->link_speed,
&adapter->link_duplex);
e1000_print_link_info(adapter);
/*
* On supported PHYs, check for duplex mismatch only
* if link has autonegotiated at 10/100 half
*/
if ((hw->phy.type == e1000_phy_igp_3 ||
hw->phy.type == e1000_phy_bm) &&
(hw->mac.autoneg == true) &&
(adapter->link_speed == SPEED_10 ||
adapter->link_speed == SPEED_100) &&
(adapter->link_duplex == HALF_DUPLEX)) {
u16 autoneg_exp;
e1e_rphy(hw, PHY_AUTONEG_EXP, &autoneg_exp);
if (!(autoneg_exp & NWAY_ER_LP_NWAY_CAPS))
e_info("Autonegotiated half duplex but"
" link partner cannot autoneg. "
" Try forcing full duplex if "
"link gets many collisions.\n");
}
/*
* tweak tx_queue_len according to speed/duplex
* and adjust the timeout factor
*/
netdev->tx_queue_len = adapter->tx_queue_len;
adapter->tx_timeout_factor = 1;
switch (adapter->link_speed) {
case SPEED_10:
txb2b = 0;
netdev->tx_queue_len = 10;
adapter->tx_timeout_factor = 16;
break;
case SPEED_100:
txb2b = 0;
netdev->tx_queue_len = 100;
/* maybe add some timeout factor ? */
break;
}
/*
* workaround: re-program speed mode bit after
* link-up event
*/
if ((adapter->flags & FLAG_TARC_SPEED_MODE_BIT) &&
!txb2b) {
u32 tarc0;
tarc0 = er32(TARC(0));
tarc0 &= ~SPEED_MODE_BIT;
ew32(TARC(0), tarc0);
}
/*
* disable TSO for pcie and 10/100 speeds, to avoid
* some hardware issues
*/
if (!(adapter->flags & FLAG_TSO_FORCE)) {
switch (adapter->link_speed) {
case SPEED_10:
case SPEED_100:
e_info("10/100 speed: disabling TSO\n");
netdev->features &= ~NETIF_F_TSO;
netdev->features &= ~NETIF_F_TSO6;
break;
case SPEED_1000:
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
break;
default:
/* oops */
break;
}
}
/*
* enable transmits in the hardware, need to do this
* after setting TARC(0)
*/
tctl = er32(TCTL);
tctl |= E1000_TCTL_EN;
ew32(TCTL, tctl);
/*
* Perform any post-link-up configuration before
* reporting link up.
*/
if (phy->ops.cfg_on_link_up)
phy->ops.cfg_on_link_up(hw);
netif_carrier_on(netdev);
netif_tx_wake_all_queues(netdev);
if (!test_bit(__E1000_DOWN, &adapter->state))
mod_timer(&adapter->phy_info_timer,
round_jiffies(jiffies + 2 * HZ));
}
} else {
if (netif_carrier_ok(netdev)) {
adapter->link_speed = 0;
adapter->link_duplex = 0;
/* Link status message must follow this format */
printk(KERN_INFO "e1000e: %s NIC Link is Down\n",
adapter->netdev->name);
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
if (!test_bit(__E1000_DOWN, &adapter->state))
mod_timer(&adapter->phy_info_timer,
round_jiffies(jiffies + 2 * HZ));
if (adapter->flags & FLAG_RX_NEEDS_RESTART)
schedule_work(&adapter->reset_task);
}
}
link_up:
e1000e_update_stats(adapter);
mac->tx_packet_delta = adapter->stats.tpt - adapter->tpt_old;
adapter->tpt_old = adapter->stats.tpt;
mac->collision_delta = adapter->stats.colc - adapter->colc_old;
adapter->colc_old = adapter->stats.colc;
adapter->gorc = adapter->stats.gorc - adapter->gorc_old;
adapter->gorc_old = adapter->stats.gorc;
adapter->gotc = adapter->stats.gotc - adapter->gotc_old;
adapter->gotc_old = adapter->stats.gotc;
e1000e_update_adaptive(&adapter->hw);
if (!netif_carrier_ok(netdev)) {
tx_pending = (e1000_desc_unused(tx_ring) + 1 <
tx_ring->count);
if (tx_pending) {
/*
* We've lost link, so the controller stops DMA,
* but we've got queued Tx work that's never going
* to get done, so reset controller to flush Tx.
* (Do the reset outside of interrupt context).
*/
adapter->tx_timeout_count++;
schedule_work(&adapter->reset_task);
}
}
/* Cause software interrupt to ensure Rx ring is cleaned */
if (adapter->msix_entries)
ew32(ICS, adapter->rx_ring->ims_val);
else
ew32(ICS, E1000_ICS_RXDMT0);
/* Force detection of hung controller every watchdog period */
adapter->detect_tx_hung = 1;
/*
* With 82571 controllers, LAA may be overwritten due to controller
* reset from the other port. Set the appropriate LAA in RAR[0]
*/
if (e1000e_get_laa_state_82571(hw))
e1000e_rar_set(hw, adapter->hw.mac.addr, 0);
/* Reset the timer */
if (!test_bit(__E1000_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer,
round_jiffies(jiffies + 2 * HZ));
}
#define E1000_TX_FLAGS_CSUM 0x00000001
#define E1000_TX_FLAGS_VLAN 0x00000002
#define E1000_TX_FLAGS_TSO 0x00000004
#define E1000_TX_FLAGS_IPV4 0x00000008
#define E1000_TX_FLAGS_VLAN_MASK 0xffff0000
#define E1000_TX_FLAGS_VLAN_SHIFT 16
static int e1000_tso(struct e1000_adapter *adapter,
struct sk_buff *skb)
{
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
unsigned int i;
u32 cmd_length = 0;
u16 ipcse = 0, tucse, mss;
u8 ipcss, ipcso, tucss, tucso, hdr_len;
int err;
if (skb_is_gso(skb)) {
if (skb_header_cloned(skb)) {
err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
if (err)
return err;
}
hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
mss = skb_shinfo(skb)->gso_size;
if (skb->protocol == htons(ETH_P_IP)) {
struct iphdr *iph = ip_hdr(skb);
iph->tot_len = 0;
iph->check = 0;
tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
iph->daddr, 0,
IPPROTO_TCP,
0);
cmd_length = E1000_TXD_CMD_IP;
ipcse = skb_transport_offset(skb) - 1;
} else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
ipv6_hdr(skb)->payload_len = 0;
tcp_hdr(skb)->check =
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr,
0, IPPROTO_TCP, 0);
ipcse = 0;
}
ipcss = skb_network_offset(skb);
ipcso = (void *)&(ip_hdr(skb)->check) - (void *)skb->data;
tucss = skb_transport_offset(skb);
tucso = (void *)&(tcp_hdr(skb)->check) - (void *)skb->data;
tucse = 0;
cmd_length |= (E1000_TXD_CMD_DEXT | E1000_TXD_CMD_TSE |
E1000_TXD_CMD_TCP | (skb->len - (hdr_len)));
i = tx_ring->next_to_use;
context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
buffer_info = &tx_ring->buffer_info[i];
context_desc->lower_setup.ip_fields.ipcss = ipcss;
context_desc->lower_setup.ip_fields.ipcso = ipcso;
context_desc->lower_setup.ip_fields.ipcse = cpu_to_le16(ipcse);
context_desc->upper_setup.tcp_fields.tucss = tucss;
context_desc->upper_setup.tcp_fields.tucso = tucso;
context_desc->upper_setup.tcp_fields.tucse = cpu_to_le16(tucse);
context_desc->tcp_seg_setup.fields.mss = cpu_to_le16(mss);
context_desc->tcp_seg_setup.fields.hdr_len = hdr_len;
context_desc->cmd_and_length = cpu_to_le32(cmd_length);
buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
i++;
if (i == tx_ring->count)
i = 0;
tx_ring->next_to_use = i;
return 1;
}
return 0;
}
static bool e1000_tx_csum(struct e1000_adapter *adapter, struct sk_buff *skb)
{
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_context_desc *context_desc;
struct e1000_buffer *buffer_info;
unsigned int i;
u8 css;
u32 cmd_len = E1000_TXD_CMD_DEXT;
if (skb->ip_summed != CHECKSUM_PARTIAL)
return 0;
switch (skb->protocol) {
case __constant_htons(ETH_P_IP):
if (ip_hdr(skb)->protocol == IPPROTO_TCP)
cmd_len |= E1000_TXD_CMD_TCP;
break;
case __constant_htons(ETH_P_IPV6):
/* XXX not handling all IPV6 headers */
if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
cmd_len |= E1000_TXD_CMD_TCP;
break;
default:
if (unlikely(net_ratelimit()))
e_warn("checksum_partial proto=%x!\n", skb->protocol);
break;
}
css = skb_transport_offset(skb);
i = tx_ring->next_to_use;
buffer_info = &tx_ring->buffer_info[i];
context_desc = E1000_CONTEXT_DESC(*tx_ring, i);
context_desc->lower_setup.ip_config = 0;
context_desc->upper_setup.tcp_fields.tucss = css;
context_desc->upper_setup.tcp_fields.tucso =
css + skb->csum_offset;
context_desc->upper_setup.tcp_fields.tucse = 0;
context_desc->tcp_seg_setup.data = 0;
context_desc->cmd_and_length = cpu_to_le32(cmd_len);
buffer_info->time_stamp = jiffies;
buffer_info->next_to_watch = i;
i++;
if (i == tx_ring->count)
i = 0;
tx_ring->next_to_use = i;
return 1;
}
#define E1000_MAX_PER_TXD 8192
#define E1000_MAX_TXD_PWR 12
static int e1000_tx_map(struct e1000_adapter *adapter,
struct sk_buff *skb, unsigned int first,
unsigned int max_per_txd, unsigned int nr_frags,
unsigned int mss)
{
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_buffer *buffer_info;
unsigned int len = skb->len - skb->data_len;
unsigned int offset = 0, size, count = 0, i;
unsigned int f;
i = tx_ring->next_to_use;
while (len) {
buffer_info = &tx_ring->buffer_info[i];
size = min(len, max_per_txd);
/* Workaround for premature desc write-backs
* in TSO mode. Append 4-byte sentinel desc */
if (mss && !nr_frags && size == len && size > 8)
size -= 4;
buffer_info->length = size;
/* set time_stamp *before* dma to help avoid a possible race */
buffer_info->time_stamp = jiffies;
buffer_info->dma =
pci_map_single(adapter->pdev,
skb->data + offset,
size,
PCI_DMA_TODEVICE);
dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Muli Ben-Yehuda <muli@il.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Avi Kivity <avi@qumranet.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 19:44:49 -07:00
if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) {
dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
adapter->tx_dma_failed++;
return -1;
}
buffer_info->next_to_watch = i;
len -= size;
offset += size;
count++;
i++;
if (i == tx_ring->count)
i = 0;
}
for (f = 0; f < nr_frags; f++) {
struct skb_frag_struct *frag;
frag = &skb_shinfo(skb)->frags[f];
len = frag->size;
offset = frag->page_offset;
while (len) {
buffer_info = &tx_ring->buffer_info[i];
size = min(len, max_per_txd);
/* Workaround for premature desc write-backs
* in TSO mode. Append 4-byte sentinel desc */
if (mss && f == (nr_frags-1) && size == len && size > 8)
size -= 4;
buffer_info->length = size;
buffer_info->time_stamp = jiffies;
buffer_info->dma =
pci_map_page(adapter->pdev,
frag->page,
offset,
size,
PCI_DMA_TODEVICE);
dma-mapping: add the device argument to dma_mapping_error() Add per-device dma_mapping_ops support for CONFIG_X86_64 as POWER architecture does: This enables us to cleanly fix the Calgary IOMMU issue that some devices are not behind the IOMMU (http://lkml.org/lkml/2008/5/8/423). I think that per-device dma_mapping_ops support would be also helpful for KVM people to support PCI passthrough but Andi thinks that this makes it difficult to support the PCI passthrough (see the above thread). So I CC'ed this to KVM camp. Comments are appreciated. A pointer to dma_mapping_ops to struct dev_archdata is added. If the pointer is non NULL, DMA operations in asm/dma-mapping.h use it. If it's NULL, the system-wide dma_ops pointer is used as before. If it's useful for KVM people, I plan to implement a mechanism to register a hook called when a new pci (or dma capable) device is created (it works with hot plugging). It enables IOMMUs to set up an appropriate dma_mapping_ops per device. The major obstacle is that dma_mapping_error doesn't take a pointer to the device unlike other DMA operations. So x86 can't have dma_mapping_ops per device. Note all the POWER IOMMUs use the same dma_mapping_error function so this is not a problem for POWER but x86 IOMMUs use different dma_mapping_error functions. The first patch adds the device argument to dma_mapping_error. The patch is trivial but large since it touches lots of drivers and dma-mapping.h in all the architecture. This patch: dma_mapping_error() doesn't take a pointer to the device unlike other DMA operations. So we can't have dma_mapping_ops per device. Note that POWER already has dma_mapping_ops per device but all the POWER IOMMUs use the same dma_mapping_error function. x86 IOMMUs use device argument. [akpm@linux-foundation.org: fix sge] [akpm@linux-foundation.org: fix svc_rdma] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix bnx2x] [akpm@linux-foundation.org: fix s2io] [akpm@linux-foundation.org: fix pasemi_mac] [akpm@linux-foundation.org: fix sdhci] [akpm@linux-foundation.org: build fix] [akpm@linux-foundation.org: fix sparc] [akpm@linux-foundation.org: fix ibmvscsi] Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Muli Ben-Yehuda <muli@il.ibm.com> Cc: Andi Kleen <andi@firstfloor.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: Avi Kivity <avi@qumranet.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2008-07-25 19:44:49 -07:00
if (pci_dma_mapping_error(adapter->pdev,
buffer_info->dma)) {
dev_err(&adapter->pdev->dev,
"TX DMA page map failed\n");
adapter->tx_dma_failed++;
return -1;
}
buffer_info->next_to_watch = i;
len -= size;
offset += size;
count++;
i++;
if (i == tx_ring->count)
i = 0;
}
}
if (i == 0)
i = tx_ring->count - 1;
else
i--;
tx_ring->buffer_info[i].skb = skb;
tx_ring->buffer_info[first].next_to_watch = i;
return count;
}
static void e1000_tx_queue(struct e1000_adapter *adapter,
int tx_flags, int count)
{
struct e1000_ring *tx_ring = adapter->tx_ring;
struct e1000_tx_desc *tx_desc = NULL;
struct e1000_buffer *buffer_info;
u32 txd_upper = 0, txd_lower = E1000_TXD_CMD_IFCS;
unsigned int i;
if (tx_flags & E1000_TX_FLAGS_TSO) {
txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D |
E1000_TXD_CMD_TSE;
txd_upper |= E1000_TXD_POPTS_TXSM << 8;
if (tx_flags & E1000_TX_FLAGS_IPV4)
txd_upper |= E1000_TXD_POPTS_IXSM << 8;
}
if (tx_flags & E1000_TX_FLAGS_CSUM) {
txd_lower |= E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D;
txd_upper |= E1000_TXD_POPTS_TXSM << 8;
}
if (tx_flags & E1000_TX_FLAGS_VLAN) {
txd_lower |= E1000_TXD_CMD_VLE;
txd_upper |= (tx_flags & E1000_TX_FLAGS_VLAN_MASK);
}
i = tx_ring->next_to_use;
while (count--) {
buffer_info = &tx_ring->buffer_info[i];
tx_desc = E1000_TX_DESC(*tx_ring, i);
tx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
tx_desc->lower.data =
cpu_to_le32(txd_lower | buffer_info->length);
tx_desc->upper.data = cpu_to_le32(txd_upper);
i++;
if (i == tx_ring->count)
i = 0;
}
tx_desc->lower.data |= cpu_to_le32(adapter->txd_cmd);
/*
* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
* such as IA-64).
*/
wmb();
tx_ring->next_to_use = i;
writel(i, adapter->hw.hw_addr + tx_ring->tail);
/*
* we need this if more than one processor can write to our tail
* at a time, it synchronizes IO on IA64/Altix systems
*/
mmiowb();
}
#define MINIMUM_DHCP_PACKET_SIZE 282
static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter,
struct sk_buff *skb)
{
struct e1000_hw *hw = &adapter->hw;
u16 length, offset;
if (vlan_tx_tag_present(skb)) {
if (!((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id)
&& (adapter->hw.mng_cookie.status &
E1000_MNG_DHCP_COOKIE_STATUS_VLAN)))
return 0;
}
if (skb->len <= MINIMUM_DHCP_PACKET_SIZE)
return 0;
if (((struct ethhdr *) skb->data)->h_proto != htons(ETH_P_IP))
return 0;
{
const struct iphdr *ip = (struct iphdr *)((u8 *)skb->data+14);
struct udphdr *udp;
if (ip->protocol != IPPROTO_UDP)
return 0;
udp = (struct udphdr *)((u8 *)ip + (ip->ihl << 2));
if (ntohs(udp->dest) != 67)
return 0;
offset = (u8 *)udp + 8 - skb->data;
length = skb->len - offset;
return e1000e_mng_write_dhcp_info(hw, (u8 *)udp + 8, length);
}
return 0;
}
static int __e1000_maybe_stop_tx(struct net_device *netdev, int size)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
netif_stop_queue(netdev);
/*
* Herbert's original patch had:
* smp_mb__after_netif_stop_queue();
* but since that doesn't exist yet, just open code it.
*/
smp_mb();
/*
* We need to check again in a case another CPU has just
* made room available.
*/
if (e1000_desc_unused(adapter->tx_ring) < size)
return -EBUSY;
/* A reprieve! */
netif_start_queue(netdev);
++adapter->restart_queue;
return 0;
}
static int e1000_maybe_stop_tx(struct net_device *netdev, int size)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
if (e1000_desc_unused(adapter->tx_ring) >= size)
return 0;
return __e1000_maybe_stop_tx(netdev, size);
}
#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_ring *tx_ring = adapter->tx_ring;
unsigned int first;
unsigned int max_per_txd = E1000_MAX_PER_TXD;
unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
unsigned int tx_flags = 0;
unsigned int len = skb->len - skb->data_len;
unsigned long irq_flags;
unsigned int nr_frags;
unsigned int mss;
int count = 0;
int tso;
unsigned int f;
if (test_bit(__E1000_DOWN, &adapter->state)) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
if (skb->len <= 0) {
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
mss = skb_shinfo(skb)->gso_size;
/*
* The controller does a simple calculation to
* make sure there is enough room in the FIFO before
* initiating the DMA for each buffer. The calc is:
* 4 = ceil(buffer len/mss). To make sure we don't
* overrun the FIFO, adjust the max buffer len if mss
* drops.
*/
if (mss) {
u8 hdr_len;
max_per_txd = min(mss << 2, max_per_txd);
max_txd_pwr = fls(max_per_txd) - 1;
/*
* TSO Workaround for 82571/2/3 Controllers -- if skb->data
* points to just header, pull a few bytes of payload from
* frags into skb->data
*/
hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
/*
* we do this workaround for ES2LAN, but it is un-necessary,
* avoiding it could save a lot of cycles
*/
if (skb->data_len && (hdr_len == len)) {
unsigned int pull_size;
pull_size = min((unsigned int)4, skb->data_len);
if (!__pskb_pull_tail(skb, pull_size)) {
e_err("__pskb_pull_tail failed.\n");
dev_kfree_skb_any(skb);
return NETDEV_TX_OK;
}
len = skb->len - skb->data_len;
}
}
/* reserve a descriptor for the offload context */
if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL))
count++;
count++;
count += TXD_USE_COUNT(len, max_txd_pwr);
nr_frags = skb_shinfo(skb)->nr_frags;
for (f = 0; f < nr_frags; f++)
count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size,
max_txd_pwr);
if (adapter->hw.mac.tx_pkt_filtering)
e1000_transfer_dhcp_info(adapter, skb);
if (!spin_trylock_irqsave(&adapter->tx_queue_lock, irq_flags))
/* Collision - tell upper layer to requeue */
return NETDEV_TX_LOCKED;
/*
* need: count + 2 desc gap to keep tail from touching
* head, otherwise try next time
*/
if (e1000_maybe_stop_tx(netdev, count + 2)) {
spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags);
return NETDEV_TX_BUSY;
}
if (adapter->vlgrp && vlan_tx_tag_present(skb)) {
tx_flags |= E1000_TX_FLAGS_VLAN;
tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
}
first = tx_ring->next_to_use;
tso = e1000_tso(adapter, skb);
if (tso < 0) {
dev_kfree_skb_any(skb);
spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags);
return NETDEV_TX_OK;
}
if (tso)
tx_flags |= E1000_TX_FLAGS_TSO;
else if (e1000_tx_csum(adapter, skb))
tx_flags |= E1000_TX_FLAGS_CSUM;
/*
* Old method was to assume IPv4 packet by default if TSO was enabled.
* 82571 hardware supports TSO capabilities for IPv6 as well...
* no longer assume, we must.
*/
if (skb->protocol == htons(ETH_P_IP))
tx_flags |= E1000_TX_FLAGS_IPV4;
count = e1000_tx_map(adapter, skb, first, max_per_txd, nr_frags, mss);
if (count < 0) {
/* handle pci_map_single() error in e1000_tx_map */
dev_kfree_skb_any(skb);
spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags);
return NETDEV_TX_OK;
}
e1000_tx_queue(adapter, tx_flags, count);
netdev->trans_start = jiffies;
/* Make sure there is space in the ring for the next send. */
e1000_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 2);
spin_unlock_irqrestore(&adapter->tx_queue_lock, irq_flags);
return NETDEV_TX_OK;
}
/**
* e1000_tx_timeout - Respond to a Tx Hang
* @netdev: network interface device structure
**/
static void e1000_tx_timeout(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
/* Do the reset outside of interrupt context */
adapter->tx_timeout_count++;
schedule_work(&adapter->reset_task);
}
static void e1000_reset_task(struct work_struct *work)
{
struct e1000_adapter *adapter;
adapter = container_of(work, struct e1000_adapter, reset_task);
e1000e_reinit_locked(adapter);
}
/**
* e1000_get_stats - Get System Network Statistics
* @netdev: network interface device structure
*
* Returns the address of the device statistics structure.
* The statistics are actually updated from the timer callback.
**/
static struct net_device_stats *e1000_get_stats(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
/* only return the current stats */
return &adapter->net_stats;
}
/**
* e1000_change_mtu - Change the Maximum Transfer Unit
* @netdev: network interface device structure
* @new_mtu: new value for maximum frame size
*
* Returns 0 on success, negative on failure
**/
static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
if ((new_mtu < ETH_ZLEN + ETH_FCS_LEN + VLAN_HLEN) ||
(max_frame > MAX_JUMBO_FRAME_SIZE)) {
e_err("Invalid MTU setting\n");
return -EINVAL;
}
/* Jumbo frame size limits */
if (max_frame > ETH_FRAME_LEN + ETH_FCS_LEN) {
if (!(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) {
e_err("Jumbo Frames not supported.\n");
return -EINVAL;
}
if (adapter->hw.phy.type == e1000_phy_ife) {
e_err("Jumbo Frames not supported.\n");
return -EINVAL;
}
}
#define MAX_STD_JUMBO_FRAME_SIZE 9234
if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
e_err("MTU > 9216 not supported.\n");
return -EINVAL;
}
while (test_and_set_bit(__E1000_RESETTING, &adapter->state))
msleep(1);
/* e1000e_down has a dependency on max_frame_size */
adapter->max_frame_size = max_frame;
if (netif_running(netdev))
e1000e_down(adapter);
/*
* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
* means we reserve 2 more, this pushes us to allocate from the next
* larger slab size.
* i.e. RXBUFFER_2048 --> size-4096 slab
* However with the new *_jumbo_rx* routines, jumbo receives will use
* fragmented skbs
*/
if (max_frame <= 256)
adapter->rx_buffer_len = 256;
else if (max_frame <= 512)
adapter->rx_buffer_len = 512;
else if (max_frame <= 1024)
adapter->rx_buffer_len = 1024;
else if (max_frame <= 2048)
adapter->rx_buffer_len = 2048;
else
adapter->rx_buffer_len = 4096;
/* adjust allocation if LPE protects us, and we aren't using SBP */
if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN) ||
(max_frame == ETH_FRAME_LEN + VLAN_HLEN + ETH_FCS_LEN))
adapter->rx_buffer_len = ETH_FRAME_LEN + VLAN_HLEN
+ ETH_FCS_LEN;
e_info("changing MTU from %d to %d\n", netdev->mtu, new_mtu);
netdev->mtu = new_mtu;
if (netif_running(netdev))
e1000e_up(adapter);
else
e1000e_reset(adapter);
clear_bit(__E1000_RESETTING, &adapter->state);
return 0;
}
static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
int cmd)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
struct mii_ioctl_data *data = if_mii(ifr);
if (adapter->hw.phy.media_type != e1000_media_type_copper)
return -EOPNOTSUPP;
switch (cmd) {
case SIOCGMIIPHY:
data->phy_id = adapter->hw.phy.addr;
break;
case SIOCGMIIREG:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
switch (data->reg_num & 0x1F) {
case MII_BMCR:
data->val_out = adapter->phy_regs.bmcr;
break;
case MII_BMSR:
data->val_out = adapter->phy_regs.bmsr;
break;
case MII_PHYSID1:
data->val_out = (adapter->hw.phy.id >> 16);
break;
case MII_PHYSID2:
data->val_out = (adapter->hw.phy.id & 0xFFFF);
break;
case MII_ADVERTISE:
data->val_out = adapter->phy_regs.advertise;
break;
case MII_LPA:
data->val_out = adapter->phy_regs.lpa;
break;
case MII_EXPANSION:
data->val_out = adapter->phy_regs.expansion;
break;
case MII_CTRL1000:
data->val_out = adapter->phy_regs.ctrl1000;
break;
case MII_STAT1000:
data->val_out = adapter->phy_regs.stat1000;
break;
case MII_ESTATUS:
data->val_out = adapter->phy_regs.estatus;
break;
default:
return -EIO;
}
break;
case SIOCSMIIREG:
default:
return -EOPNOTSUPP;
}
return 0;
}
static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
{
switch (cmd) {
case SIOCGMIIPHY:
case SIOCGMIIREG:
case SIOCSMIIREG:
return e1000_mii_ioctl(netdev, ifr, cmd);
default:
return -EOPNOTSUPP;
}
}
static int e1000_suspend(struct pci_dev *pdev, pm_message_t state)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 ctrl, ctrl_ext, rctl, status;
u32 wufc = adapter->wol;
int retval = 0;
netif_device_detach(netdev);
if (netif_running(netdev)) {
WARN_ON(test_bit(__E1000_RESETTING, &adapter->state));
e1000e_down(adapter);
e1000_free_irq(adapter);
}
e1000e_reset_interrupt_capability(adapter);
retval = pci_save_state(pdev);
if (retval)
return retval;
status = er32(STATUS);
if (status & E1000_STATUS_LU)
wufc &= ~E1000_WUFC_LNKC;
if (wufc) {
e1000_setup_rctl(adapter);
e1000_set_multi(netdev);
/* turn on all-multi mode if wake on multicast is enabled */
if (wufc & E1000_WUFC_MC) {
rctl = er32(RCTL);
rctl |= E1000_RCTL_MPE;
ew32(RCTL, rctl);
}
ctrl = er32(CTRL);
/* advertise wake from D3Cold */
#define E1000_CTRL_ADVD3WUC 0x00100000
/* phy power management enable */
#define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
ctrl |= E1000_CTRL_ADVD3WUC |
E1000_CTRL_EN_PHY_PWR_MGMT;
ew32(CTRL, ctrl);
if (adapter->hw.phy.media_type == e1000_media_type_fiber ||
adapter->hw.phy.media_type ==
e1000_media_type_internal_serdes) {
/* keep the laser running in D3 */
ctrl_ext = er32(CTRL_EXT);
ctrl_ext |= E1000_CTRL_EXT_SDP7_DATA;
ew32(CTRL_EXT, ctrl_ext);
}
if (adapter->flags & FLAG_IS_ICH)
e1000e_disable_gig_wol_ich8lan(&adapter->hw);
/* Allow time for pending master requests to run */
e1000e_disable_pcie_master(&adapter->hw);
ew32(WUC, E1000_WUC_PME_EN);
ew32(WUFC, wufc);
pci_enable_wake(pdev, PCI_D3hot, 1);
pci_enable_wake(pdev, PCI_D3cold, 1);
} else {
ew32(WUC, 0);
ew32(WUFC, 0);
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
}
/* make sure adapter isn't asleep if manageability is enabled */
if (adapter->flags & FLAG_MNG_PT_ENABLED) {
pci_enable_wake(pdev, PCI_D3hot, 1);
pci_enable_wake(pdev, PCI_D3cold, 1);
}
if (adapter->hw.phy.type == e1000_phy_igp_3)
e1000e_igp3_phy_powerdown_workaround_ich8lan(&adapter->hw);
/*
* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
*/
e1000_release_hw_control(adapter);
pci_disable_device(pdev);
/*
* The pci-e switch on some quad port adapters will report a
* correctable error when the MAC transitions from D0 to D3. To
* prevent this we need to mask off the correctable errors on the
* downstream port of the pci-e switch.
*/
if (adapter->flags & FLAG_IS_QUAD_PORT) {
struct pci_dev *us_dev = pdev->bus->self;
int pos = pci_find_capability(us_dev, PCI_CAP_ID_EXP);
u16 devctl;
pci_read_config_word(us_dev, pos + PCI_EXP_DEVCTL, &devctl);
pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL,
(devctl & ~PCI_EXP_DEVCTL_CERE));
pci_set_power_state(pdev, pci_choose_state(pdev, state));
pci_write_config_word(us_dev, pos + PCI_EXP_DEVCTL, devctl);
} else {
pci_set_power_state(pdev, pci_choose_state(pdev, state));
}
return 0;
}
static void e1000e_disable_l1aspm(struct pci_dev *pdev)
{
int pos;
u16 val;
/*
* 82573 workaround - disable L1 ASPM on mobile chipsets
*
* L1 ASPM on various mobile (ich7) chipsets do not behave properly
* resulting in lost data or garbage information on the pci-e link
* level. This could result in (false) bad EEPROM checksum errors,
* long ping times (up to 2s) or even a system freeze/hang.
*
* Unfortunately this feature saves about 1W power consumption when
* active.
*/
pos = pci_find_capability(pdev, PCI_CAP_ID_EXP);
pci_read_config_word(pdev, pos + PCI_EXP_LNKCTL, &val);
if (val & 0x2) {
dev_warn(&pdev->dev, "Disabling L1 ASPM\n");
val &= ~0x2;
pci_write_config_word(pdev, pos + PCI_EXP_LNKCTL, val);
}
}
#ifdef CONFIG_PM
static int e1000_resume(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
u32 err;
pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
e1000e_disable_l1aspm(pdev);
err = pci_enable_device_mem(pdev);
if (err) {
dev_err(&pdev->dev,
"Cannot enable PCI device from suspend\n");
return err;
}
pci_set_master(pdev);
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
e1000e_set_interrupt_capability(adapter);
if (netif_running(netdev)) {
err = e1000_request_irq(adapter);
if (err)
return err;
}
e1000e_power_up_phy(adapter);
e1000e_reset(adapter);
ew32(WUS, ~0);
e1000_init_manageability(adapter);
if (netif_running(netdev))
e1000e_up(adapter);
netif_device_attach(netdev);
/*
* If the controller has AMT, do not set DRV_LOAD until the interface
* is up. For all other cases, let the f/w know that the h/w is now
* under the control of the driver.
*/
if (!(adapter->flags & FLAG_HAS_AMT))
e1000_get_hw_control(adapter);
return 0;
}
#endif
static void e1000_shutdown(struct pci_dev *pdev)
{
e1000_suspend(pdev, PMSG_SUSPEND);
}
#ifdef CONFIG_NET_POLL_CONTROLLER
/*
* Polling 'interrupt' - used by things like netconsole to send skbs
* without having to re-enable interrupts. It's not called while
* the interrupt routine is executing.
*/
static void e1000_netpoll(struct net_device *netdev)
{
struct e1000_adapter *adapter = netdev_priv(netdev);
disable_irq(adapter->pdev->irq);
e1000_intr(adapter->pdev->irq, netdev);
enable_irq(adapter->pdev->irq);
}
#endif
/**
* e1000_io_error_detected - called when PCI error is detected
* @pdev: Pointer to PCI device
* @state: The current pci connection state
*
* This function is called after a PCI bus error affecting
* this device has been detected.
*/
static pci_ers_result_t e1000_io_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev_priv(netdev);
netif_device_detach(netdev);
if (netif_running(netdev))
e1000e_down(adapter);
pci_disable_device(pdev);
/* Request a slot slot reset. */
return PCI_ERS_RESULT_NEED_RESET;
}
/**
* e1000_io_slot_reset - called after the pci bus has been reset.
* @pdev: Pointer to PCI device
*
* Restart the card from scratch, as if from a cold-boot. Implementation
* resembles the first-half of the e1000_resume routine.
*/
static pci_ers_result_t e1000_io_slot_reset(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev_priv(netdev);
struct e1000_hw *hw = &adapter->hw;
int err;
e1000e_disable_l1aspm(pdev);
err = pci_enable_device_mem(pdev);
if (err) {
dev_err(&pdev->dev,
"Cannot re-enable PCI device after reset.\n");
return PCI_ERS_RESULT_DISCONNECT;
}
pci_set_master(pdev);
pci_restore_state(pdev);
pci_enable_wake(pdev, PCI_D3hot, 0);
pci_enable_wake(pdev, PCI_D3cold, 0);
e1000e_reset(adapter);
ew32(WUS, ~0);
return PCI_ERS_RESULT_RECOVERED;
}
/**
* e1000_io_resume - called when traffic can start flowing again.
* @pdev: Pointer to PCI device
*
* This callback is called when the error recovery driver tells us that
* its OK to resume normal operation. Implementation resembles the
* second-half of the e1000_resume routine.
*/
static void e1000_io_resume(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev_priv(netdev);
e1000_init_manageability(adapter);
if (netif_running(netdev)) {
if (e1000e_up(adapter)) {
dev_err(&pdev->dev,
"can't bring device back up after reset\n");
return;
}
}
netif_device_attach(netdev);
/*
* If the controller has AMT, do not set DRV_LOAD until the interface
* is up. For all other cases, let the f/w know that the h/w is now
* under the control of the driver.
*/
if (!(adapter->flags & FLAG_HAS_AMT))
e1000_get_hw_control(adapter);
}
static void e1000_print_device_info(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
struct net_device *netdev = adapter->netdev;
u32 pba_num;
/* print bus type/speed/width info */
e_info("(PCI Express:2.5GB/s:%s) %pM\n",
/* bus width */
((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
"Width x1"),
/* MAC address */
netdev->dev_addr);
e_info("Intel(R) PRO/%s Network Connection\n",
(hw->phy.type == e1000_phy_ife) ? "10/100" : "1000");
e1000e_read_pba_num(hw, &pba_num);
e_info("MAC: %d, PHY: %d, PBA No: %06x-%03x\n",
hw->mac.type, hw->phy.type, (pba_num >> 8), (pba_num & 0xff));
}
static void e1000_eeprom_checks(struct e1000_adapter *adapter)
{
struct e1000_hw *hw = &adapter->hw;
int ret_val;
u16 buf = 0;
if (hw->mac.type != e1000_82573)
return;
ret_val = e1000_read_nvm(hw, NVM_INIT_CONTROL2_REG, 1, &buf);
if (!(le16_to_cpu(buf) & (1 << 0))) {
/* Deep Smart Power Down (DSPD) */
dev_warn(&adapter->pdev->dev,
"Warning: detected DSPD enabled in EEPROM\n");
}
ret_val = e1000_read_nvm(hw, NVM_INIT_3GIO_3, 1, &buf);
if (le16_to_cpu(buf) & (3 << 2)) {
/* ASPM enable */
dev_warn(&adapter->pdev->dev,
"Warning: detected ASPM enabled in EEPROM\n");
}
}
static const struct net_device_ops e1000e_netdev_ops = {
.ndo_open = e1000_open,
.ndo_stop = e1000_close,
.ndo_start_xmit = e1000_xmit_frame,
.ndo_get_stats = e1000_get_stats,
.ndo_set_multicast_list = e1000_set_multi,
.ndo_set_mac_address = e1000_set_mac,
.ndo_change_mtu = e1000_change_mtu,
.ndo_do_ioctl = e1000_ioctl,
.ndo_tx_timeout = e1000_tx_timeout,
.ndo_validate_addr = eth_validate_addr,
.ndo_vlan_rx_register = e1000_vlan_rx_register,
.ndo_vlan_rx_add_vid = e1000_vlan_rx_add_vid,
.ndo_vlan_rx_kill_vid = e1000_vlan_rx_kill_vid,
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = e1000_netpoll,
#endif
};
/**
* e1000_probe - Device Initialization Routine
* @pdev: PCI device information struct
* @ent: entry in e1000_pci_tbl
*
* Returns 0 on success, negative on failure
*
* e1000_probe initializes an adapter identified by a pci_dev structure.
* The OS initialization, configuring of the adapter private structure,
* and a hardware reset occur.
**/
static int __devinit e1000_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct net_device *netdev;
struct e1000_adapter *adapter;
struct e1000_hw *hw;
const struct e1000_info *ei = e1000_info_tbl[ent->driver_data];
resource_size_t mmio_start, mmio_len;
resource_size_t flash_start, flash_len;
static int cards_found;
int i, err, pci_using_dac;
u16 eeprom_data = 0;
u16 eeprom_apme_mask = E1000_EEPROM_APME;
e1000e_disable_l1aspm(pdev);
err = pci_enable_device_mem(pdev);
if (err)
return err;
pci_using_dac = 0;
err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
if (!err) {
err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
if (!err)
pci_using_dac = 1;
} else {
err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
if (err) {
err = pci_set_consistent_dma_mask(pdev,
DMA_32BIT_MASK);
if (err) {
dev_err(&pdev->dev, "No usable DMA "
"configuration, aborting\n");
goto err_dma;
}
}
}
err = pci_request_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM),
e1000e_driver_name);
if (err)
goto err_pci_reg;
pci_set_master(pdev);
/* PCI config space info */
err = pci_save_state(pdev);
if (err)
goto err_alloc_etherdev;
err = -ENOMEM;
netdev = alloc_etherdev(sizeof(struct e1000_adapter));
if (!netdev)
goto err_alloc_etherdev;
SET_NETDEV_DEV(netdev, &pdev->dev);
pci_set_drvdata(pdev, netdev);
adapter = netdev_priv(netdev);
hw = &adapter->hw;
adapter->netdev = netdev;
adapter->pdev = pdev;
adapter->ei = ei;
adapter->pba = ei->pba;
adapter->flags = ei->flags;
adapter->flags2 = ei->flags2;
adapter->hw.adapter = adapter;
adapter->hw.mac.type = ei->mac;
adapter->msg_enable = (1 << NETIF_MSG_DRV | NETIF_MSG_PROBE) - 1;
mmio_start = pci_resource_start(pdev, 0);
mmio_len = pci_resource_len(pdev, 0);
err = -EIO;
adapter->hw.hw_addr = ioremap(mmio_start, mmio_len);
if (!adapter->hw.hw_addr)
goto err_ioremap;
if ((adapter->flags & FLAG_HAS_FLASH) &&
(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
flash_start = pci_resource_start(pdev, 1);
flash_len = pci_resource_len(pdev, 1);
adapter->hw.flash_address = ioremap(flash_start, flash_len);
if (!adapter->hw.flash_address)
goto err_flashmap;
}
/* construct the net_device struct */
netdev->netdev_ops = &e1000e_netdev_ops;
e1000e_set_ethtool_ops(netdev);
netdev->watchdog_timeo = 5 * HZ;
netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
netdev->mem_start = mmio_start;
netdev->mem_end = mmio_start + mmio_len;
adapter->bd_number = cards_found++;
e1000e_check_options(adapter);
/* setup adapter struct */
err = e1000_sw_init(adapter);
if (err)
goto err_sw_init;
err = -EIO;
memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
err = ei->get_variants(adapter);
if (err)
goto err_hw_init;
if ((adapter->flags & FLAG_IS_ICH) &&
(adapter->flags & FLAG_READ_ONLY_NVM))
e1000e_write_protect_nvm_ich8lan(&adapter->hw);
hw->mac.ops.get_bus_info(&adapter->hw);
adapter->hw.phy.autoneg_wait_to_complete = 0;
/* Copper options */
if (adapter->hw.phy.media_type == e1000_media_type_copper) {
adapter->hw.phy.mdix = AUTO_ALL_MODES;
adapter->hw.phy.disable_polarity_correction = 0;
adapter->hw.phy.ms_type = e1000_ms_hw_default;
}
if (e1000_check_reset_block(&adapter->hw))
e_info("PHY reset is blocked due to SOL/IDER session.\n");
netdev->features = NETIF_F_SG |
NETIF_F_HW_CSUM |
NETIF_F_HW_VLAN_TX |
NETIF_F_HW_VLAN_RX;
if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER)
netdev->features |= NETIF_F_HW_VLAN_FILTER;
netdev->features |= NETIF_F_TSO;
netdev->features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_TSO;
netdev->vlan_features |= NETIF_F_TSO6;
netdev->vlan_features |= NETIF_F_HW_CSUM;
netdev->vlan_features |= NETIF_F_SG;
if (pci_using_dac)
netdev->features |= NETIF_F_HIGHDMA;
/*
* We should not be using LLTX anymore, but we are still Tx faster with
* it.
*/
netdev->features |= NETIF_F_LLTX;
if (e1000e_enable_mng_pass_thru(&adapter->hw))
adapter->flags |= FLAG_MNG_PT_ENABLED;
/*
* before reading the NVM, reset the controller to
* put the device in a known good starting state
*/
adapter->hw.mac.ops.reset_hw(&adapter->hw);
/*
* systems with ASPM and others may see the checksum fail on the first
* attempt. Let's give it a few tries
*/
for (i = 0;; i++) {
if (e1000_validate_nvm_checksum(&adapter->hw) >= 0)
break;
if (i == 2) {
e_err("The NVM Checksum Is Not Valid\n");
err = -EIO;
goto err_eeprom;
}
}
e1000_eeprom_checks(adapter);
/* copy the MAC address out of the NVM */
if (e1000e_read_mac_addr(&adapter->hw))
e_err("NVM Read Error while reading MAC address\n");
memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len);
memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len);
if (!is_valid_ether_addr(netdev->perm_addr)) {
e_err("Invalid MAC Address: %pM\n", netdev->perm_addr);
err = -EIO;
goto err_eeprom;
}
init_timer(&adapter->watchdog_timer);
adapter->watchdog_timer.function = &e1000_watchdog;
adapter->watchdog_timer.data = (unsigned long) adapter;
init_timer(&adapter->phy_info_timer);
adapter->phy_info_timer.function = &e1000_update_phy_info;
adapter->phy_info_timer.data = (unsigned long) adapter;
INIT_WORK(&adapter->reset_task, e1000_reset_task);
INIT_WORK(&adapter->watchdog_task, e1000_watchdog_task);
INIT_WORK(&adapter->downshift_task, e1000e_downshift_workaround);
INIT_WORK(&adapter->update_phy_task, e1000e_update_phy_task);
/* Initialize link parameters. User can change them with ethtool */
adapter->hw.mac.autoneg = 1;
adapter->fc_autoneg = 1;
adapter->hw.fc.original_type = e1000_fc_default;
adapter->hw.fc.type = e1000_fc_default;
adapter->hw.phy.autoneg_advertised = 0x2f;
/* ring size defaults */
adapter->rx_ring->count = 256;
adapter->tx_ring->count = 256;
/*
* Initial Wake on LAN setting - If APM wake is enabled in
* the EEPROM, enable the ACPI Magic Packet filter
*/
if (adapter->flags & FLAG_APME_IN_WUC) {
/* APME bit in EEPROM is mapped to WUC.APME */
eeprom_data = er32(WUC);
eeprom_apme_mask = E1000_WUC_APME;
} else if (adapter->flags & FLAG_APME_IN_CTRL3) {
if (adapter->flags & FLAG_APME_CHECK_PORT_B &&
(adapter->hw.bus.func == 1))
e1000_read_nvm(&adapter->hw,
NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
else
e1000_read_nvm(&adapter->hw,
NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
}
/* fetch WoL from EEPROM */
if (eeprom_data & eeprom_apme_mask)
adapter->eeprom_wol |= E1000_WUFC_MAG;
/*
* now that we have the eeprom settings, apply the special cases
* where the eeprom may be wrong or the board simply won't support
* wake on lan on a particular port
*/
if (!(adapter->flags & FLAG_HAS_WOL))
adapter->eeprom_wol = 0;
/* initialize the wol settings based on the eeprom settings */
adapter->wol = adapter->eeprom_wol;
device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
/* reset the hardware with the new settings */
e1000e_reset(adapter);
/*
* If the controller has AMT, do not set DRV_LOAD until the interface
* is up. For all other cases, let the f/w know that the h/w is now
* under the control of the driver.
*/
if (!(adapter->flags & FLAG_HAS_AMT))
e1000_get_hw_control(adapter);
/* tell the stack to leave us alone until e1000_open() is called */
netif_carrier_off(netdev);
netif_tx_stop_all_queues(netdev);
strcpy(netdev->name, "eth%d");
err = register_netdev(netdev);
if (err)
goto err_register;
e1000_print_device_info(adapter);
return 0;
err_register:
if (!(adapter->flags & FLAG_HAS_AMT))
e1000_release_hw_control(adapter);
err_eeprom:
if (!e1000_check_reset_block(&adapter->hw))
e1000_phy_hw_reset(&adapter->hw);
err_hw_init:
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
err_sw_init:
if (adapter->hw.flash_address)
iounmap(adapter->hw.flash_address);
e1000e_reset_interrupt_capability(adapter);
err_flashmap:
iounmap(adapter->hw.hw_addr);
err_ioremap:
free_netdev(netdev);
err_alloc_etherdev:
pci_release_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM));
err_pci_reg:
err_dma:
pci_disable_device(pdev);
return err;
}
/**
* e1000_remove - Device Removal Routine
* @pdev: PCI device information struct
*
* e1000_remove is called by the PCI subsystem to alert the driver
* that it should release a PCI device. The could be caused by a
* Hot-Plug event, or because the driver is going to be removed from
* memory.
**/
static void __devexit e1000_remove(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev_priv(netdev);
/*
* flush_scheduled work may reschedule our watchdog task, so
* explicitly disable watchdog tasks from being rescheduled
*/
set_bit(__E1000_DOWN, &adapter->state);
del_timer_sync(&adapter->watchdog_timer);
del_timer_sync(&adapter->phy_info_timer);
flush_scheduled_work();
/*
* Release control of h/w to f/w. If f/w is AMT enabled, this
* would have already happened in close and is redundant.
*/
e1000_release_hw_control(adapter);
unregister_netdev(netdev);
if (!e1000_check_reset_block(&adapter->hw))
e1000_phy_hw_reset(&adapter->hw);
e1000e_reset_interrupt_capability(adapter);
kfree(adapter->tx_ring);
kfree(adapter->rx_ring);
iounmap(adapter->hw.hw_addr);
if (adapter->hw.flash_address)
iounmap(adapter->hw.flash_address);
pci_release_selected_regions(pdev,
pci_select_bars(pdev, IORESOURCE_MEM));
free_netdev(netdev);
pci_disable_device(pdev);
}
/* PCI Error Recovery (ERS) */
static struct pci_error_handlers e1000_err_handler = {
.error_detected = e1000_io_error_detected,
.slot_reset = e1000_io_slot_reset,
.resume = e1000_io_resume,
};
static struct pci_device_id e1000_pci_tbl[] = {
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_COPPER), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_FIBER), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER), board_82571 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI), board_82572 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_COPPER), board_82572 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_FIBER), board_82572 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82572EI_SERDES), board_82572 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E), board_82573 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573E_IAMT), board_82573 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82573L), board_82573 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_82574L), board_82574 },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_DPT),
board_80003es2lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_COPPER_SPT),
board_80003es2lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_DPT),
board_80003es2lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_80003ES2LAN_SERDES_SPT),
board_80003es2lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE), board_ich8lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_G), board_ich8lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IFE_GT), board_ich8lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_AMT), board_ich8lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_C), board_ich8lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M), board_ich8lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH8_IGP_M_AMT), board_ich8lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_G), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_GT), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_AMT), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_C), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_BM), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_AMT), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LM), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LF), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_V), board_ich9lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LM), board_ich10lan },
{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_D_BM_LF), board_ich10lan },
{ } /* terminate list */
};
MODULE_DEVICE_TABLE(pci, e1000_pci_tbl);
/* PCI Device API Driver */
static struct pci_driver e1000_driver = {
.name = e1000e_driver_name,
.id_table = e1000_pci_tbl,
.probe = e1000_probe,
.remove = __devexit_p(e1000_remove),
#ifdef CONFIG_PM
/* Power Management Hooks */
.suspend = e1000_suspend,
.resume = e1000_resume,
#endif
.shutdown = e1000_shutdown,
.err_handler = &e1000_err_handler
};
/**
* e1000_init_module - Driver Registration Routine
*
* e1000_init_module is the first routine called when the driver is
* loaded. All it does is register with the PCI subsystem.
**/
static int __init e1000_init_module(void)
{
int ret;
printk(KERN_INFO "%s: Intel(R) PRO/1000 Network Driver - %s\n",
e1000e_driver_name, e1000e_driver_version);
printk(KERN_INFO "%s: Copyright (c) 1999-2008 Intel Corporation.\n",
e1000e_driver_name);
ret = pci_register_driver(&e1000_driver);
pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name,
PM_QOS_DEFAULT_VALUE);
return ret;
}
module_init(e1000_init_module);
/**
* e1000_exit_module - Driver Exit Cleanup Routine
*
* e1000_exit_module is called just before the driver is removed
* from memory.
**/
static void __exit e1000_exit_module(void)
{
pci_unregister_driver(&e1000_driver);
pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name);
}
module_exit(e1000_exit_module);
MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
MODULE_DESCRIPTION("Intel(R) PRO/1000 Network Driver");
MODULE_LICENSE("GPL");
MODULE_VERSION(DRV_VERSION);
/* e1000_main.c */