From da2e5ae56164b86823c1bff5b4d28430ca4a7108 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 13 Jul 2015 08:07:08 -0400 Subject: [PATCH 1/8] NTB: Fix ntb_transport out-of-order RX update It was possible for a synchronous update of the RX index in the error case to get ahead of the asynchronous RX index update in the normal case. Change the RX processing to preserve an RX completion order. There were two error cases. First, if a buffer is not present to receive data, there would be no queue entry to preserve the RX completion order. Instead of dropping the RX frame, leave the RX frame in the ring. Schedule RX processing when RX entries are enqueued, in case there are RX frames waiting in the ring to be received. Second, if a buffer is too small to receive data, drop the frame in the ring, mark the RX entry as done, and indicate the error in the RX entry length. Check for a negative length in the receive callback in ntb_netdev, and count occurrences as rx_length_errors. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/net/ntb_netdev.c | 7 ++ drivers/ntb/ntb_transport.c | 175 +++++++++++++++++++++--------------- 2 files changed, 110 insertions(+), 72 deletions(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 3cc316cb7e6b..5f1ee7c05f68 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -102,6 +102,12 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, netdev_dbg(ndev, "%s: %d byte payload received\n", __func__, len); + if (len < 0) { + ndev->stats.rx_errors++; + ndev->stats.rx_length_errors++; + goto enqueue_again; + } + skb_put(skb, len); skb->protocol = eth_type_trans(skb, ndev); skb->ip_summed = CHECKSUM_NONE; @@ -121,6 +127,7 @@ static void ntb_netdev_rx_handler(struct ntb_transport_qp *qp, void *qp_data, return; } +enqueue_again: rc = ntb_transport_rx_enqueue(qp, skb, skb->data, ndev->mtu + ETH_HLEN); if (rc) { dev_kfree_skb(skb); diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index efe3ad4122f2..98e58c765f2e 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -142,10 +142,11 @@ struct ntb_transport_qp { void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, void *data, int len); + struct list_head rx_post_q; struct list_head rx_pend_q; struct list_head rx_free_q; - spinlock_t ntb_rx_pend_q_lock; - spinlock_t ntb_rx_free_q_lock; + /* ntb_rx_q_lock: synchronize access to rx_XXXX_q */ + spinlock_t ntb_rx_q_lock; void *rx_buff; unsigned int rx_index; unsigned int rx_max_entry; @@ -534,6 +535,27 @@ out: return entry; } +static struct ntb_queue_entry *ntb_list_mv(spinlock_t *lock, + struct list_head *list, + struct list_head *to_list) +{ + struct ntb_queue_entry *entry; + unsigned long flags; + + spin_lock_irqsave(lock, flags); + + if (list_empty(list)) { + entry = NULL; + } else { + entry = list_first_entry(list, struct ntb_queue_entry, entry); + list_move_tail(&entry->entry, to_list); + } + + spin_unlock_irqrestore(lock, flags); + + return entry; +} + static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num) { @@ -941,10 +963,10 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, INIT_DELAYED_WORK(&qp->link_work, ntb_qp_link_work); INIT_WORK(&qp->link_cleanup, ntb_qp_link_cleanup_work); - spin_lock_init(&qp->ntb_rx_pend_q_lock); - spin_lock_init(&qp->ntb_rx_free_q_lock); + spin_lock_init(&qp->ntb_rx_q_lock); spin_lock_init(&qp->ntb_tx_free_q_lock); + INIT_LIST_HEAD(&qp->rx_post_q); INIT_LIST_HEAD(&qp->rx_pend_q); INIT_LIST_HEAD(&qp->rx_free_q); INIT_LIST_HEAD(&qp->tx_free_q); @@ -1107,22 +1129,47 @@ static void ntb_transport_free(struct ntb_client *self, struct ntb_dev *ndev) kfree(nt); } +static void ntb_complete_rxc(struct ntb_transport_qp *qp) +{ + struct ntb_queue_entry *entry; + void *cb_data; + unsigned int len; + unsigned long irqflags; + + spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags); + + while (!list_empty(&qp->rx_post_q)) { + entry = list_first_entry(&qp->rx_post_q, + struct ntb_queue_entry, entry); + if (!(entry->flags & DESC_DONE_FLAG)) + break; + + entry->rx_hdr->flags = 0; + iowrite32(entry->index, &qp->rx_info->entry); + + cb_data = entry->cb_data; + len = entry->len; + + list_move_tail(&entry->entry, &qp->rx_free_q); + + spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags); + + if (qp->rx_handler && qp->client_ready) + qp->rx_handler(qp, qp->cb_data, cb_data, len); + + spin_lock_irqsave(&qp->ntb_rx_q_lock, irqflags); + } + + spin_unlock_irqrestore(&qp->ntb_rx_q_lock, irqflags); +} + static void ntb_rx_copy_callback(void *data) { struct ntb_queue_entry *entry = data; - struct ntb_transport_qp *qp = entry->qp; - void *cb_data = entry->cb_data; - unsigned int len = entry->len; - struct ntb_payload_header *hdr = entry->rx_hdr; - hdr->flags = 0; + entry->flags |= DESC_DONE_FLAG; - iowrite32(entry->index, &qp->rx_info->entry); - - ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); - - if (qp->rx_handler && qp->client_ready) - qp->rx_handler(qp, qp->cb_data, cb_data, len); + ntb_complete_rxc(entry->qp); } static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) @@ -1138,19 +1185,18 @@ static void ntb_memcpy_rx(struct ntb_queue_entry *entry, void *offset) ntb_rx_copy_callback(entry); } -static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, - size_t len) +static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset) { struct dma_async_tx_descriptor *txd; struct ntb_transport_qp *qp = entry->qp; struct dma_chan *chan = qp->dma_chan; struct dma_device *device; - size_t pay_off, buff_off; + size_t pay_off, buff_off, len; struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; void *buf = entry->buf; - entry->len = len; + len = entry->len; if (!chan) goto err; @@ -1226,7 +1272,6 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) struct ntb_payload_header *hdr; struct ntb_queue_entry *entry; void *offset; - int rc; offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index; hdr = offset + qp->rx_max_frame - sizeof(struct ntb_payload_header); @@ -1255,65 +1300,43 @@ static int ntb_process_rxc(struct ntb_transport_qp *qp) return -EIO; } - entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q); if (!entry) { dev_dbg(&qp->ndev->pdev->dev, "no receive buffer\n"); qp->rx_err_no_buf++; - - rc = -ENOMEM; - goto err; + return -EAGAIN; } + entry->rx_hdr = hdr; + entry->index = qp->rx_index; + if (hdr->len > entry->len) { dev_dbg(&qp->ndev->pdev->dev, "receive buffer overflow! Wanted %d got %d\n", hdr->len, entry->len); qp->rx_err_oflow++; - rc = -EIO; - goto err; + entry->len = -EIO; + entry->flags |= DESC_DONE_FLAG; + + ntb_complete_rxc(qp); + } else { + dev_dbg(&qp->ndev->pdev->dev, + "RX OK index %u ver %u size %d into buf size %d\n", + qp->rx_index, hdr->ver, hdr->len, entry->len); + + qp->rx_bytes += hdr->len; + qp->rx_pkts++; + + entry->len = hdr->len; + + ntb_async_rx(entry, offset); } - dev_dbg(&qp->ndev->pdev->dev, - "RX OK index %u ver %u size %d into buf size %d\n", - qp->rx_index, hdr->ver, hdr->len, entry->len); - - qp->rx_bytes += hdr->len; - qp->rx_pkts++; - - entry->index = qp->rx_index; - entry->rx_hdr = hdr; - - ntb_async_rx(entry, offset, hdr->len); - qp->rx_index++; qp->rx_index %= qp->rx_max_entry; return 0; - -err: - /* FIXME: if this syncrhonous update of the rx_index gets ahead of - * asyncrhonous ntb_rx_copy_callback of previous entry, there are three - * scenarios: - * - * 1) The peer might miss this update, but observe the update - * from the memcpy completion callback. In this case, the buffer will - * not be freed on the peer to be reused for a different packet. The - * successful rx of a later packet would clear the condition, but the - * condition could persist if several rx fail in a row. - * - * 2) The peer may observe this update before the asyncrhonous copy of - * prior packets is completed. The peer may overwrite the buffers of - * the prior packets before they are copied. - * - * 3) Both: the peer may observe the update, and then observe the index - * decrement by the asynchronous completion callback. Who knows what - * badness that will cause. - */ - hdr->flags = 0; - iowrite32(qp->rx_index, &qp->rx_info->entry); - - return rc; } static void ntb_transport_rxc_db(unsigned long data) @@ -1333,7 +1356,7 @@ static void ntb_transport_rxc_db(unsigned long data) break; } - if (qp->dma_chan) + if (i && qp->dma_chan) dma_async_issue_pending(qp->dma_chan); if (i == qp->rx_max_entry) { @@ -1609,7 +1632,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev, goto err1; entry->qp = qp; - ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); } @@ -1634,7 +1657,7 @@ err2: while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); err1: - while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); if (qp->dma_chan) dma_release_channel(qp->dma_chan); @@ -1689,11 +1712,16 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) qp->tx_handler = NULL; qp->event_handler = NULL; - while ((entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q))) + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q))) kfree(entry); - while ((entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q))) { - dev_warn(&pdev->dev, "Freeing item from a non-empty queue\n"); + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) { + dev_warn(&pdev->dev, "Freeing item from non-empty rx_pend_q\n"); + kfree(entry); + } + + while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) { + dev_warn(&pdev->dev, "Freeing item from non-empty rx_post_q\n"); kfree(entry); } @@ -1724,14 +1752,14 @@ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsigned int *len) if (!qp || qp->client_ready) return NULL; - entry = ntb_list_rm(&qp->ntb_rx_pend_q_lock, &qp->rx_pend_q); + entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q); if (!entry) return NULL; buf = entry->cb_data; *len = entry->len; - ntb_list_add(&qp->ntb_rx_free_q_lock, &entry->entry, &qp->rx_free_q); + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_free_q); return buf; } @@ -1757,15 +1785,18 @@ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, if (!qp) return -EINVAL; - entry = ntb_list_rm(&qp->ntb_rx_free_q_lock, &qp->rx_free_q); + entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q); if (!entry) return -ENOMEM; entry->cb_data = cb; entry->buf = data; entry->len = len; + entry->flags = 0; - ntb_list_add(&qp->ntb_rx_pend_q_lock, &entry->entry, &qp->rx_pend_q); + ntb_list_add(&qp->ntb_rx_q_lock, &entry->entry, &qp->rx_pend_q); + + tasklet_schedule(&qp->rxc_db_work); return 0; } From c8650fd03d320e9c39f44435a583933cacea5259 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:09 -0400 Subject: [PATCH 2/8] NTB: Fix transport stats for multiple devices Currently the debugfs does not have files for all NTB transport queue pairs. When there are multiple NTBs present in a system, the QP names of the last transport clobber the names of previously added transport QPs. Only the last added QPs can be observed via debugfs. Create a directory per NTB transport to associate the QPs with that transport. Name the directory the same as the PCI device. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 98e58c765f2e..25e973ff64cf 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -212,6 +212,8 @@ struct ntb_transport_ctx { bool link_is_up; struct delayed_work link_work; struct work_struct link_cleanup; + + struct dentry *debugfs_node_dir; }; enum { @@ -945,12 +947,12 @@ static int ntb_transport_init_queue(struct ntb_transport_ctx *nt, qp->tx_max_frame = min(transport_mtu, tx_size / 2); qp->tx_max_entry = tx_size / qp->tx_max_frame; - if (nt_debugfs_dir) { + if (nt->debugfs_node_dir) { char debugfs_name[4]; snprintf(debugfs_name, 4, "qp%d", qp_num); qp->debugfs_dir = debugfs_create_dir(debugfs_name, - nt_debugfs_dir); + nt->debugfs_node_dir); qp->debugfs_stats = debugfs_create_file("stats", S_IRUSR, qp->debugfs_dir, qp, @@ -1053,6 +1055,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev) goto err2; } + if (nt_debugfs_dir) { + nt->debugfs_node_dir = + debugfs_create_dir(pci_name(ndev->pdev), + nt_debugfs_dir); + } + for (i = 0; i < qp_count; i++) { rc = ntb_transport_init_queue(nt, i); if (rc) From da4eb27a2c2efd034bdd645650114b82c479329c Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:10 -0400 Subject: [PATCH 3/8] NTB: ntb_netdev not covering all receive errors ntb_netdev is allowing the link to come up even when -ENOMEM is returned from ntb_transport_rx_enqueue. Fix to cover all possible errors. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/net/ntb_netdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ntb_netdev.c b/drivers/net/ntb_netdev.c index 5f1ee7c05f68..d8757bf9ad75 100644 --- a/drivers/net/ntb_netdev.c +++ b/drivers/net/ntb_netdev.c @@ -191,7 +191,7 @@ static int ntb_netdev_open(struct net_device *ndev) rc = ntb_transport_rx_enqueue(dev->qp, skb, skb->data, ndev->mtu + ETH_HLEN); - if (rc == -EINVAL) { + if (rc) { dev_kfree_skb(skb); goto err; } From 260bee9451b4f0f5f9845c5b3024f0bfb8de8f22 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 13 Jul 2015 08:07:11 -0400 Subject: [PATCH 4/8] NTB: Fix oops in debugfs when transport is half-up When the remote side is not up, we do not have all the context for the transport, and that causes NULL ptr access. Have the debugfs reads check to see if transport is up before we make access. Signed-off-by: Dave Jiang Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 25e973ff64cf..a049f96fab8d 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -439,13 +439,17 @@ static ssize_t debugfs_read(struct file *filp, char __user *ubuf, size_t count, char *buf; ssize_t ret, out_offset, out_count; + qp = filp->private_data; + + if (!qp || !qp->link_is_up) + return 0; + out_count = 1000; buf = kmalloc(out_count, GFP_KERNEL); if (!buf) return -ENOMEM; - qp = filp->private_data; out_offset = 0; out_offset += snprintf(buf + out_offset, out_count - out_offset, "NTB QP stats\n"); From 8b5a22d8f18496f5921ccb92554a7051cbfd9b0c Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 13 Jul 2015 08:07:12 -0400 Subject: [PATCH 5/8] NTB: Schedule to receive on QP link up Schedule to receive on QP link up, to make sure that the doorbell is properly cleared for interrupts. Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index a049f96fab8d..b82171e3e07d 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -895,6 +895,8 @@ static void ntb_qp_link_work(struct work_struct *work) if (qp->event_handler) qp->event_handler(qp->cb_data, qp->link_is_up); + + tasklet_schedule(&qp->rxc_db_work); } else if (nt->link_is_up) schedule_delayed_work(&qp->link_work, msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT)); From 8c9edf63e75f036b42afb4502deb20bbfb5004b4 Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 13 Jul 2015 08:07:13 -0400 Subject: [PATCH 6/8] NTB: Fix zero size or integer overflow in ntb_set_mw A plain 32 bit integer will overflow for values over 4GiB. Change the plain integer size to the appropriate size type in ntb_set_mw. Change the type of the size parameter and two local variables used for size. Even if there is no overflow, a size of zero is invalid here. Reported-by: Juyoung Jung Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index b82171e3e07d..bc556e2d7f62 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -629,13 +629,16 @@ static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw) } static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, - unsigned int size) + resource_size_t size) { struct ntb_transport_mw *mw = &nt->mw_vec[num_mw]; struct pci_dev *pdev = nt->ndev->pdev; - unsigned int xlat_size, buff_size; + size_t xlat_size, buff_size; int rc; + if (!size) + return -EINVAL; + xlat_size = round_up(size, mw->xlat_align_size); buff_size = round_up(size, mw->xlat_align); @@ -655,7 +658,7 @@ static int ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, if (!mw->virt_addr) { mw->xlat_size = 0; mw->buff_size = 0; - dev_err(&pdev->dev, "Unable to alloc MW buff of size %d\n", + dev_err(&pdev->dev, "Unable to alloc MW buff of size %zu\n", buff_size); return -ENOMEM; } From 30a4bb1e5a9d7e283af6e29da09362104b67d7aa Mon Sep 17 00:00:00 2001 From: Allen Hubbe Date: Mon, 13 Jul 2015 08:07:14 -0400 Subject: [PATCH 7/8] NTB: Fix dereference before check Remove early dereference of a pointer that is checked later in the code. Reported-by: Dan Carpenter Signed-off-by: Allen Hubbe Signed-off-by: Jon Mason --- drivers/ntb/ntb_transport.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index bc556e2d7f62..1c6386d5f79c 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1692,7 +1692,6 @@ EXPORT_SYMBOL_GPL(ntb_transport_create_queue); */ void ntb_transport_free_queue(struct ntb_transport_qp *qp) { - struct ntb_transport_ctx *nt = qp->transport; struct pci_dev *pdev; struct ntb_queue_entry *entry; u64 qp_bit; @@ -1745,7 +1744,7 @@ void ntb_transport_free_queue(struct ntb_transport_qp *qp) while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q))) kfree(entry); - nt->qp_bitmap_free |= qp_bit; + qp->transport->qp_bitmap_free |= qp_bit; dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num); } From e15f940908e474da03349cb55a107fe89310a02c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 24 Jul 2015 16:35:59 -0700 Subject: [PATCH 8/8] ntb: avoid format string in dev_set_name Avoid any chance of format string expansion when calling dev_set_name. Signed-off-by: Kees Cook Signed-off-by: Jon Mason --- drivers/ntb/ntb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/ntb/ntb.c b/drivers/ntb/ntb.c index 23435f2a5486..2e2530743831 100644 --- a/drivers/ntb/ntb.c +++ b/drivers/ntb/ntb.c @@ -114,7 +114,7 @@ int ntb_register_device(struct ntb_dev *ntb) ntb->dev.bus = &ntb_bus; ntb->dev.parent = &ntb->pdev->dev; ntb->dev.release = ntb_dev_release; - dev_set_name(&ntb->dev, pci_name(ntb->pdev)); + dev_set_name(&ntb->dev, "%s", pci_name(ntb->pdev)); ntb->ctx = NULL; ntb->ctx_ops = NULL;