myri10ge: add multiqueue TX

Add multiqueue TX support to myri10ge.

[ Removed reference to deprecated CONFIG_NETDEVICES_MULTIQUEUE and
  NETIF_F_MULTI_QUEUE ]

Signed-off-by: Brice Goglin <brice@myri.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
Brice Goglin 2008-09-28 15:34:21 +00:00 committed by David S. Miller
parent c8f2d9bc12
commit 236bb5e649

View File

@ -102,6 +102,8 @@ MODULE_LICENSE("Dual BSD/GPL");
#define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE) #define MYRI10GE_ALLOC_SIZE ((1 << MYRI10GE_ALLOC_ORDER) * PAGE_SIZE)
#define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1) #define MYRI10GE_MAX_FRAGS_PER_FRAME (MYRI10GE_MAX_ETHER_MTU/MYRI10GE_ALLOC_SIZE + 1)
#define MYRI10GE_MAX_SLICES 32
struct myri10ge_rx_buffer_state { struct myri10ge_rx_buffer_state {
struct page *page; struct page *page;
int page_offset; int page_offset;
@ -138,6 +140,8 @@ struct myri10ge_rx_buf {
struct myri10ge_tx_buf { struct myri10ge_tx_buf {
struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */ struct mcp_kreq_ether_send __iomem *lanai; /* lanai ptr for sendq */
__be32 __iomem *send_go; /* "go" doorbell ptr */
__be32 __iomem *send_stop; /* "stop" doorbell ptr */
struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */ struct mcp_kreq_ether_send *req_list; /* host shadow of sendq */
char *req_bytes; char *req_bytes;
struct myri10ge_tx_buffer_state *info; struct myri10ge_tx_buffer_state *info;
@ -149,6 +153,7 @@ struct myri10ge_tx_buf {
int done ____cacheline_aligned; /* transmit slots completed */ int done ____cacheline_aligned; /* transmit slots completed */
int pkt_done; /* packets completed */ int pkt_done; /* packets completed */
int wake_queue; int wake_queue;
int queue_active;
}; };
struct myri10ge_rx_done { struct myri10ge_rx_done {
@ -418,6 +423,12 @@ myri10ge_send_cmd(struct myri10ge_priv *mgp, u32 cmd,
return -ENOSYS; return -ENOSYS;
} else if (result == MXGEFW_CMD_ERROR_UNALIGNED) { } else if (result == MXGEFW_CMD_ERROR_UNALIGNED) {
return -E2BIG; return -E2BIG;
} else if (result == MXGEFW_CMD_ERROR_RANGE &&
cmd == MXGEFW_CMD_ENABLE_RSS_QUEUES &&
(data->
data1 & MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES) !=
0) {
return -ERANGE;
} else { } else {
dev_err(&mgp->pdev->dev, dev_err(&mgp->pdev->dev,
"command %d failed, result = %d\n", "command %d failed, result = %d\n",
@ -947,9 +958,24 @@ static int myri10ge_reset(struct myri10ge_priv *mgp)
*/ */
cmd.data0 = mgp->num_slices; cmd.data0 = mgp->num_slices;
cmd.data1 = 1; /* use MSI-X */ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
if (mgp->dev->real_num_tx_queues > 1)
cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0); &cmd, 0);
/* Firmware older than 1.4.32 only supports multiple
* RX queues, so if we get an error, first retry using a
* single TX queue before giving up */
if (status != 0 && mgp->dev->real_num_tx_queues > 1) {
mgp->dev->real_num_tx_queues = 1;
cmd.data0 = mgp->num_slices;
cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
status = myri10ge_send_cmd(mgp,
MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0);
}
if (status != 0) { if (status != 0) {
dev_err(&mgp->pdev->dev, dev_err(&mgp->pdev->dev,
"failed to set number of slices\n"); "failed to set number of slices\n");
@ -1317,6 +1343,7 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
{ {
struct pci_dev *pdev = ss->mgp->pdev; struct pci_dev *pdev = ss->mgp->pdev;
struct myri10ge_tx_buf *tx = &ss->tx; struct myri10ge_tx_buf *tx = &ss->tx;
struct netdev_queue *dev_queue;
struct sk_buff *skb; struct sk_buff *skb;
int idx, len; int idx, len;
@ -1350,11 +1377,31 @@ myri10ge_tx_done(struct myri10ge_slice_state *ss, int mcp_index)
PCI_DMA_TODEVICE); PCI_DMA_TODEVICE);
} }
} }
dev_queue = netdev_get_tx_queue(ss->dev, ss - ss->mgp->ss);
/*
* Make a minimal effort to prevent the NIC from polling an
* idle tx queue. If we can't get the lock we leave the queue
* active. In this case, either a thread was about to start
* using the queue anyway, or we lost a race and the NIC will
* waste some of its resources polling an inactive queue for a
* while.
*/
if ((ss->mgp->dev->real_num_tx_queues > 1) &&
__netif_tx_trylock(dev_queue)) {
if (tx->req == tx->done) {
tx->queue_active = 0;
put_be32(htonl(1), tx->send_stop);
}
__netif_tx_unlock(dev_queue);
}
/* start the queue if we've stopped it */ /* start the queue if we've stopped it */
if (netif_queue_stopped(ss->dev) if (netif_tx_queue_stopped(dev_queue)
&& tx->req - tx->done < (tx->mask >> 1)) { && tx->req - tx->done < (tx->mask >> 1)) {
tx->wake_queue++; tx->wake_queue++;
netif_wake_queue(ss->dev); netif_tx_wake_queue(dev_queue);
} }
} }
@ -1482,9 +1529,9 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
u32 send_done_count; u32 send_done_count;
int i; int i;
/* an interrupt on a non-zero slice is implicitly valid /* an interrupt on a non-zero receive-only slice is implicitly
* since MSI-X irqs are not shared */ * valid since MSI-X irqs are not shared */
if (ss != mgp->ss) { if ((mgp->dev->real_num_tx_queues == 1) && (ss != mgp->ss)) {
netif_rx_schedule(ss->dev, &ss->napi); netif_rx_schedule(ss->dev, &ss->napi);
return (IRQ_HANDLED); return (IRQ_HANDLED);
} }
@ -1526,7 +1573,9 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
barrier(); barrier();
} }
myri10ge_check_statblock(mgp); /* Only slice 0 updates stats */
if (ss == mgp->ss)
myri10ge_check_statblock(mgp);
put_be32(htonl(3), ss->irq_claim + 1); put_be32(htonl(3), ss->irq_claim + 1);
return (IRQ_HANDLED); return (IRQ_HANDLED);
@ -1884,6 +1933,7 @@ static int myri10ge_allocate_rings(struct myri10ge_slice_state *ss)
/* ensure req_list entries are aligned to 8 bytes */ /* ensure req_list entries are aligned to 8 bytes */
ss->tx.req_list = (struct mcp_kreq_ether_send *) ss->tx.req_list = (struct mcp_kreq_ether_send *)
ALIGN((unsigned long)ss->tx.req_bytes, 8); ALIGN((unsigned long)ss->tx.req_bytes, 8);
ss->tx.queue_active = 0;
bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow); bytes = rx_ring_entries * sizeof(*ss->rx_small.shadow);
ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL); ss->rx_small.shadow = kzalloc(bytes, GFP_KERNEL);
@ -2201,11 +2251,14 @@ static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
int status; int status;
ss = &mgp->ss[slice]; ss = &mgp->ss[slice];
cmd.data0 = 0; /* single slice for now */ status = 0;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET, &cmd, 0); if (slice == 0 || (mgp->dev->real_num_tx_queues > 1)) {
ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *) cmd.data0 = slice;
(mgp->sram + cmd.data0); status = myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SEND_OFFSET,
&cmd, 0);
ss->tx.lanai = (struct mcp_kreq_ether_send __iomem *)
(mgp->sram + cmd.data0);
}
cmd.data0 = slice; cmd.data0 = slice;
status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET, status |= myri10ge_send_cmd(mgp, MXGEFW_CMD_GET_SMALL_RX_OFFSET,
&cmd, 0); &cmd, 0);
@ -2217,6 +2270,10 @@ static int myri10ge_get_txrx(struct myri10ge_priv *mgp, int slice)
ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *) ss->rx_big.lanai = (struct mcp_kreq_ether_recv __iomem *)
(mgp->sram + cmd.data0); (mgp->sram + cmd.data0);
ss->tx.send_go = (__iomem __be32 *)
(mgp->sram + MXGEFW_ETH_SEND_GO + 64 * slice);
ss->tx.send_stop = (__iomem __be32 *)
(mgp->sram + MXGEFW_ETH_SEND_STOP + 64 * slice);
return status; return status;
} }
@ -2230,7 +2287,7 @@ static int myri10ge_set_stats(struct myri10ge_priv *mgp, int slice)
ss = &mgp->ss[slice]; ss = &mgp->ss[slice];
cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus); cmd.data0 = MYRI10GE_LOWPART_TO_U32(ss->fw_stats_bus);
cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus); cmd.data1 = MYRI10GE_HIGHPART_TO_U32(ss->fw_stats_bus);
cmd.data2 = sizeof(struct mcp_irq_data); cmd.data2 = sizeof(struct mcp_irq_data) | (slice << 16);
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0); status = myri10ge_send_cmd(mgp, MXGEFW_CMD_SET_STATS_DMA_V2, &cmd, 0);
if (status == -ENOSYS) { if (status == -ENOSYS) {
dma_addr_t bus = ss->fw_stats_bus; dma_addr_t bus = ss->fw_stats_bus;
@ -2271,7 +2328,9 @@ static int myri10ge_open(struct net_device *dev)
if (mgp->num_slices > 1) { if (mgp->num_slices > 1) {
cmd.data0 = mgp->num_slices; cmd.data0 = mgp->num_slices;
cmd.data1 = 1; /* use MSI-X */ cmd.data1 = MXGEFW_SLICE_INTR_MODE_ONE_PER_SLICE;
if (mgp->dev->real_num_tx_queues > 1)
cmd.data1 |= MXGEFW_SLICE_ENABLE_MULTIPLE_TX_QUEUES;
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES, status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ENABLE_RSS_QUEUES,
&cmd, 0); &cmd, 0);
if (status != 0) { if (status != 0) {
@ -2292,6 +2351,7 @@ static int myri10ge_open(struct net_device *dev)
printk(KERN_ERR printk(KERN_ERR
"myri10ge: %s: failed to setup rss tables\n", "myri10ge: %s: failed to setup rss tables\n",
dev->name); dev->name);
goto abort_with_nothing;
} }
/* just enable an identity mapping */ /* just enable an identity mapping */
@ -2362,7 +2422,11 @@ static int myri10ge_open(struct net_device *dev)
status = myri10ge_allocate_rings(ss); status = myri10ge_allocate_rings(ss);
if (status != 0) if (status != 0)
goto abort_with_rings; goto abort_with_rings;
if (slice == 0)
/* only firmware which supports multiple TX queues
* supports setting up the tx stats on non-zero
* slices */
if (slice == 0 || mgp->dev->real_num_tx_queues > 1)
status = myri10ge_set_stats(mgp, slice); status = myri10ge_set_stats(mgp, slice);
if (status) { if (status) {
printk(KERN_ERR printk(KERN_ERR
@ -2428,7 +2492,8 @@ static int myri10ge_open(struct net_device *dev)
mgp->running = MYRI10GE_ETH_RUNNING; mgp->running = MYRI10GE_ETH_RUNNING;
mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ; mgp->watchdog_timer.expires = jiffies + myri10ge_watchdog_timeout * HZ;
add_timer(&mgp->watchdog_timer); add_timer(&mgp->watchdog_timer);
netif_wake_queue(dev); netif_tx_wake_all_queues(dev);
return 0; return 0;
abort_with_rings: abort_with_rings:
@ -2461,7 +2526,8 @@ static int myri10ge_close(struct net_device *dev)
napi_disable(&mgp->ss[i].napi); napi_disable(&mgp->ss[i].napi);
} }
netif_carrier_off(dev); netif_carrier_off(dev);
netif_stop_queue(dev);
netif_tx_stop_all_queues(dev);
old_down_cnt = mgp->down_cnt; old_down_cnt = mgp->down_cnt;
mb(); mb();
status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0); status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_DOWN, &cmd, 0);
@ -2566,18 +2632,23 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev)
struct mcp_kreq_ether_send *req; struct mcp_kreq_ether_send *req;
struct myri10ge_tx_buf *tx; struct myri10ge_tx_buf *tx;
struct skb_frag_struct *frag; struct skb_frag_struct *frag;
struct netdev_queue *netdev_queue;
dma_addr_t bus; dma_addr_t bus;
u32 low; u32 low;
__be32 high_swapped; __be32 high_swapped;
unsigned int len; unsigned int len;
int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments; int idx, last_idx, avail, frag_cnt, frag_idx, count, mss, max_segments;
u16 pseudo_hdr_offset, cksum_offset; u16 pseudo_hdr_offset, cksum_offset, queue;
int cum_len, seglen, boundary, rdma_count; int cum_len, seglen, boundary, rdma_count;
u8 flags, odd_flag; u8 flags, odd_flag;
/* always transmit through slot 0 */ queue = skb_get_queue_mapping(skb);
ss = mgp->ss; queue &= (mgp->num_slices - 1);
ss = &mgp->ss[queue];
netdev_queue = netdev_get_tx_queue(mgp->dev, queue);
tx = &ss->tx; tx = &ss->tx;
again: again:
req = tx->req_list; req = tx->req_list;
avail = tx->mask - 1 - (tx->req - tx->done); avail = tx->mask - 1 - (tx->req - tx->done);
@ -2593,7 +2664,7 @@ again:
if ((unlikely(avail < max_segments))) { if ((unlikely(avail < max_segments))) {
/* we are out of transmit resources */ /* we are out of transmit resources */
tx->stop_queue++; tx->stop_queue++;
netif_stop_queue(dev); netif_tx_stop_queue(netdev_queue);
return 1; return 1;
} }
@ -2786,10 +2857,16 @@ again:
idx = ((count - 1) + tx->req) & tx->mask; idx = ((count - 1) + tx->req) & tx->mask;
tx->info[idx].last = 1; tx->info[idx].last = 1;
myri10ge_submit_req(tx, tx->req_list, count); myri10ge_submit_req(tx, tx->req_list, count);
/* if using multiple tx queues, make sure NIC polls the
* current slice */
if ((mgp->dev->real_num_tx_queues > 1) && tx->queue_active == 0) {
tx->queue_active = 1;
put_be32(htonl(1), tx->send_go);
}
tx->pkt_start++; tx->pkt_start++;
if ((avail - count) < MXGEFW_MAX_SEND_DESC) { if ((avail - count) < MXGEFW_MAX_SEND_DESC) {
tx->stop_queue++; tx->stop_queue++;
netif_stop_queue(dev); netif_tx_stop_queue(netdev_queue);
} }
dev->trans_start = jiffies; dev->trans_start = jiffies;
return 0; return 0;
@ -3367,20 +3444,21 @@ static void myri10ge_watchdog(struct work_struct *work)
for (i = 0; i < mgp->num_slices; i++) { for (i = 0; i < mgp->num_slices; i++) {
tx = &mgp->ss[i].tx; tx = &mgp->ss[i].tx;
printk(KERN_INFO printk(KERN_INFO
"myri10ge: %s: (%d): %d %d %d %d %d\n", "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
mgp->dev->name, i, tx->req, tx->done, mgp->dev->name, i, tx->queue_active, tx->req,
tx->pkt_start, tx->pkt_done, tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats-> (int)ntohl(mgp->ss[i].fw_stats->
send_done_count)); send_done_count));
msleep(2000); msleep(2000);
printk(KERN_INFO printk(KERN_INFO
"myri10ge: %s: (%d): %d %d %d %d %d\n", "myri10ge: %s: (%d): %d %d %d %d %d %d\n",
mgp->dev->name, i, tx->req, tx->done, mgp->dev->name, i, tx->queue_active, tx->req,
tx->pkt_start, tx->pkt_done, tx->done, tx->pkt_start, tx->pkt_done,
(int)ntohl(mgp->ss[i].fw_stats-> (int)ntohl(mgp->ss[i].fw_stats->
send_done_count)); send_done_count));
} }
} }
rtnl_lock(); rtnl_lock();
myri10ge_close(mgp->dev); myri10ge_close(mgp->dev);
status = myri10ge_load_firmware(mgp, 1); status = myri10ge_load_firmware(mgp, 1);
@ -3435,10 +3513,14 @@ static void myri10ge_watchdog_timer(unsigned long arg)
/* nic seems like it might be stuck.. */ /* nic seems like it might be stuck.. */
if (rx_pause_cnt != mgp->watchdog_pause) { if (rx_pause_cnt != mgp->watchdog_pause) {
if (net_ratelimit()) if (net_ratelimit())
printk(KERN_WARNING "myri10ge %s:" printk(KERN_WARNING
"myri10ge %s slice %d:"
"TX paused, check link partner\n", "TX paused, check link partner\n",
mgp->dev->name); mgp->dev->name, i);
} else { } else {
printk(KERN_WARNING
"myri10ge %s slice %d stuck:",
mgp->dev->name, i);
reset_needed = 1; reset_needed = 1;
} }
} }
@ -3653,7 +3735,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
int status = -ENXIO; int status = -ENXIO;
int dac_enabled; int dac_enabled;
netdev = alloc_etherdev(sizeof(*mgp)); netdev = alloc_etherdev_mq(sizeof(*mgp), MYRI10GE_MAX_SLICES);
if (netdev == NULL) { if (netdev == NULL) {
dev_err(dev, "Could not allocate ethernet device\n"); dev_err(dev, "Could not allocate ethernet device\n");
return -ENOMEM; return -ENOMEM;
@ -3758,7 +3840,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
dev_err(&pdev->dev, "failed to alloc slice state\n"); dev_err(&pdev->dev, "failed to alloc slice state\n");
goto abort_with_firmware; goto abort_with_firmware;
} }
netdev->real_num_tx_queues = mgp->num_slices;
status = myri10ge_reset(mgp); status = myri10ge_reset(mgp);
if (status != 0) { if (status != 0) {
dev_err(&pdev->dev, "failed reset\n"); dev_err(&pdev->dev, "failed reset\n");
@ -3782,6 +3864,7 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
netdev->set_multicast_list = myri10ge_set_multicast_list; netdev->set_multicast_list = myri10ge_set_multicast_list;
netdev->set_mac_address = myri10ge_set_mac_address; netdev->set_mac_address = myri10ge_set_mac_address;
netdev->features = mgp->features; netdev->features = mgp->features;
if (dac_enabled) if (dac_enabled)
netdev->features |= NETIF_F_HIGHDMA; netdev->features |= NETIF_F_HIGHDMA;
@ -3937,8 +4020,7 @@ static __init int myri10ge_init_module(void)
printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name, printk(KERN_INFO "%s: Version %s\n", myri10ge_driver.name,
MYRI10GE_VERSION_STR); MYRI10GE_VERSION_STR);
if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_SRC_PORT || if (myri10ge_rss_hash > MXGEFW_RSS_HASH_TYPE_MAX) {
myri10ge_rss_hash < MXGEFW_RSS_HASH_TYPE_IPV4) {
printk(KERN_ERR printk(KERN_ERR
"%s: Illegal rssh hash type %d, defaulting to source port\n", "%s: Illegal rssh hash type %d, defaulting to source port\n",
myri10ge_driver.name, myri10ge_rss_hash); myri10ge_driver.name, myri10ge_rss_hash);
@ -3947,6 +4029,8 @@ static __init int myri10ge_init_module(void)
#if (defined CONFIG_DCA) || (defined CONFIG_DCA_MODULE) #if (defined CONFIG_DCA) || (defined CONFIG_DCA_MODULE)
dca_register_notify(&myri10ge_dca_notifier); dca_register_notify(&myri10ge_dca_notifier);
#endif #endif
if (myri10ge_max_slices > MYRI10GE_MAX_SLICES)
myri10ge_max_slices = MYRI10GE_MAX_SLICES;
return pci_register_driver(&myri10ge_driver); return pci_register_driver(&myri10ge_driver);
} }