From 6f4abbaa1bd3c87f3190a6c5c07ee5e55d7ea322 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Wed, 19 Jan 2022 09:05:51 +0800 Subject: [PATCH 01/33] drivers/virtio: Enable virtio mem for ARM64 This enables virtio-mem device support by allowing to enable the corresponding kernel config option (CONFIG_VIRTIO_MEM) on the architecture. Signed-off-by: Gavin Shan Acked-by: David Hildenbrand Acked-by: Jonathan Cameron Acked-by: Michael S. Tsirkin Link: https://lore.kernel.org/r/20220119010551.181405-1-gshan@redhat.com Signed-off-by: Michael S. Tsirkin Signed-off-by: Gavin Shan Acked-by: David Hildenbrand Acked-by: Jonathan Cameron Acked-by: Michael S. Tsirkin --- drivers/virtio/Kconfig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 492fc26f0b65..b5adf6abd241 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -105,7 +105,7 @@ config VIRTIO_BALLOON config VIRTIO_MEM tristate "Virtio mem driver" - depends on X86_64 + depends on X86_64 || ARM64 depends on VIRTIO depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE @@ -115,8 +115,9 @@ config VIRTIO_MEM This driver provides access to virtio-mem paravirtualized memory devices, allowing to hotplug and hotunplug memory. - This driver was only tested under x86-64, but should theoretically - work on all architectures that support memory hotplug and hotremove. + This driver was only tested under x86-64 and arm64, but should + theoretically work on all architectures that support memory hotplug + and hotremove. If unsure, say M. From d3bb267bbdcba199568f1325743d9d501dea0560 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Fri, 21 Jan 2022 16:31:08 +0100 Subject: [PATCH 02/33] vhost: cache avail index in vhost_enable_notify() In vhost_enable_notify() we enable the notifications and we read the avail index to check if new buffers have become available in the meantime. We are not caching the avail index, so when the device will call vhost_get_vq_desc(), it will find the old value in the cache and it will read the avail index again. It would be better to refresh the cache every time we read avail index, so let's change vhost_enable_notify() caching the value in `avail_idx` and compare it with `last_avail_idx` to check if there are new buffers available. We don't expect a significant performance boost because the above path is not very common, indeed vhost_enable_notify() is often called with unlikely(), expecting that avail index has not been updated. We ran virtio-test/vhost-test and noticed minimal improvement as expected. To stress the patch more, we modified vhost_test.ko to call vhost_enable_notify()/vhost_disable_notify() on every cycle when calling vhost_get_vq_desc(); in this case we observed a more evident improvement, with a reduction of the test execution time of about 3.7%. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20220121153108.187291-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi --- drivers/vhost/vhost.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1768362115c6..d02173fb290c 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2550,8 +2550,9 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) &vq->avail->idx, r); return false; } + vq->avail_idx = vhost16_to_cpu(vq, avail_idx); - return vhost16_to_cpu(vq, avail_idx) != vq->avail_idx; + return vq->avail_idx != vq->last_avail_idx; } EXPORT_SYMBOL_GPL(vhost_enable_notify); From 8897d6d0fcc9ce54f53c0d14de2ada35e9527e8b Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 22 Feb 2022 19:54:24 +0800 Subject: [PATCH 03/33] vDPA/ifcvf: make use of virtio pci modern IO helpers in ifcvf This commit discards ifcvf_ioreadX()/writeX(), use virtio pci modern IO helpers instead Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20220222115428.998334-2-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_base.c | 104 +++++++++++--------------------- drivers/vdpa/ifcvf/ifcvf_base.h | 1 + drivers/vdpa/ifcvf/ifcvf_main.c | 2 +- 3 files changed, 36 insertions(+), 71 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 7d41dfe48ade..b9fdc5258611 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -10,42 +10,6 @@ #include "ifcvf_base.h" -static inline u8 ifc_ioread8(u8 __iomem *addr) -{ - return ioread8(addr); -} -static inline u16 ifc_ioread16 (__le16 __iomem *addr) -{ - return ioread16(addr); -} - -static inline u32 ifc_ioread32(__le32 __iomem *addr) -{ - return ioread32(addr); -} - -static inline void ifc_iowrite8(u8 value, u8 __iomem *addr) -{ - iowrite8(value, addr); -} - -static inline void ifc_iowrite16(u16 value, __le16 __iomem *addr) -{ - iowrite16(value, addr); -} - -static inline void ifc_iowrite32(u32 value, __le32 __iomem *addr) -{ - iowrite32(value, addr); -} - -static void ifc_iowrite64_twopart(u64 val, - __le32 __iomem *lo, __le32 __iomem *hi) -{ - ifc_iowrite32((u32)val, lo); - ifc_iowrite32(val >> 32, hi); -} - struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw) { return container_of(hw, struct ifcvf_adapter, vf); @@ -158,11 +122,11 @@ next: return -EIO; } - hw->nr_vring = ifc_ioread16(&hw->common_cfg->num_queues); + hw->nr_vring = vp_ioread16(&hw->common_cfg->num_queues); for (i = 0; i < hw->nr_vring; i++) { - ifc_iowrite16(i, &hw->common_cfg->queue_select); - notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off); + vp_iowrite16(i, &hw->common_cfg->queue_select); + notify_off = vp_ioread16(&hw->common_cfg->queue_notify_off); hw->vring[i].notify_addr = hw->notify_base + notify_off * hw->notify_off_multiplier; hw->vring[i].notify_pa = hw->notify_base_pa + @@ -181,12 +145,12 @@ next: u8 ifcvf_get_status(struct ifcvf_hw *hw) { - return ifc_ioread8(&hw->common_cfg->device_status); + return vp_ioread8(&hw->common_cfg->device_status); } void ifcvf_set_status(struct ifcvf_hw *hw, u8 status) { - ifc_iowrite8(status, &hw->common_cfg->device_status); + vp_iowrite8(status, &hw->common_cfg->device_status); } void ifcvf_reset(struct ifcvf_hw *hw) @@ -214,11 +178,11 @@ u64 ifcvf_get_hw_features(struct ifcvf_hw *hw) u32 features_lo, features_hi; u64 features; - ifc_iowrite32(0, &cfg->device_feature_select); - features_lo = ifc_ioread32(&cfg->device_feature); + vp_iowrite32(0, &cfg->device_feature_select); + features_lo = vp_ioread32(&cfg->device_feature); - ifc_iowrite32(1, &cfg->device_feature_select); - features_hi = ifc_ioread32(&cfg->device_feature); + vp_iowrite32(1, &cfg->device_feature_select); + features_hi = vp_ioread32(&cfg->device_feature); features = ((u64)features_hi << 32) | features_lo; @@ -271,12 +235,12 @@ void ifcvf_read_dev_config(struct ifcvf_hw *hw, u64 offset, WARN_ON(offset + length > hw->config_size); do { - old_gen = ifc_ioread8(&hw->common_cfg->config_generation); + old_gen = vp_ioread8(&hw->common_cfg->config_generation); p = dst; for (i = 0; i < length; i++) - *p++ = ifc_ioread8(hw->dev_cfg + offset + i); + *p++ = vp_ioread8(hw->dev_cfg + offset + i); - new_gen = ifc_ioread8(&hw->common_cfg->config_generation); + new_gen = vp_ioread8(&hw->common_cfg->config_generation); } while (old_gen != new_gen); } @@ -289,18 +253,18 @@ void ifcvf_write_dev_config(struct ifcvf_hw *hw, u64 offset, p = src; WARN_ON(offset + length > hw->config_size); for (i = 0; i < length; i++) - ifc_iowrite8(*p++, hw->dev_cfg + offset + i); + vp_iowrite8(*p++, hw->dev_cfg + offset + i); } static void ifcvf_set_features(struct ifcvf_hw *hw, u64 features) { struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; - ifc_iowrite32(0, &cfg->guest_feature_select); - ifc_iowrite32((u32)features, &cfg->guest_feature); + vp_iowrite32(0, &cfg->guest_feature_select); + vp_iowrite32((u32)features, &cfg->guest_feature); - ifc_iowrite32(1, &cfg->guest_feature_select); - ifc_iowrite32(features >> 32, &cfg->guest_feature); + vp_iowrite32(1, &cfg->guest_feature_select); + vp_iowrite32(features >> 32, &cfg->guest_feature); } static int ifcvf_config_features(struct ifcvf_hw *hw) @@ -329,7 +293,7 @@ u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid) ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg; q_pair_id = qid / hw->nr_vring; avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; - last_avail_idx = ifc_ioread16(avail_idx_addr); + last_avail_idx = vp_ioread16(avail_idx_addr); return last_avail_idx; } @@ -344,7 +308,7 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) q_pair_id = qid / hw->nr_vring; avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2]; hw->vring[qid].last_avail_idx = num; - ifc_iowrite16(num, avail_idx_addr); + vp_iowrite16(num, avail_idx_addr); return 0; } @@ -357,9 +321,9 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw) ifcvf = vf_to_adapter(hw); cfg = hw->common_cfg; - ifc_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config); + vp_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config); - if (ifc_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) { + if (vp_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) { IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n"); return -EINVAL; } @@ -368,17 +332,17 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw) if (!hw->vring[i].ready) break; - ifc_iowrite16(i, &cfg->queue_select); - ifc_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, + vp_iowrite16(i, &cfg->queue_select); + vp_iowrite64_twopart(hw->vring[i].desc, &cfg->queue_desc_lo, &cfg->queue_desc_hi); - ifc_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, + vp_iowrite64_twopart(hw->vring[i].avail, &cfg->queue_avail_lo, &cfg->queue_avail_hi); - ifc_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, + vp_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, &cfg->queue_used_hi); - ifc_iowrite16(hw->vring[i].size, &cfg->queue_size); - ifc_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector); + vp_iowrite16(hw->vring[i].size, &cfg->queue_size); + vp_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector); - if (ifc_ioread16(&cfg->queue_msix_vector) == + if (vp_ioread16(&cfg->queue_msix_vector) == VIRTIO_MSI_NO_VECTOR) { IFCVF_ERR(ifcvf->pdev, "No msix vector for queue %u\n", i); @@ -386,7 +350,7 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw) } ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx); - ifc_iowrite16(1, &cfg->queue_enable); + vp_iowrite16(1, &cfg->queue_enable); } return 0; @@ -398,14 +362,14 @@ static void ifcvf_hw_disable(struct ifcvf_hw *hw) u32 i; cfg = hw->common_cfg; - ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config); + vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config); for (i = 0; i < hw->nr_vring; i++) { - ifc_iowrite16(i, &cfg->queue_select); - ifc_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector); + vp_iowrite16(i, &cfg->queue_select); + vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector); } - ifc_ioread16(&cfg->queue_msix_vector); + vp_ioread16(&cfg->queue_msix_vector); } int ifcvf_start_hw(struct ifcvf_hw *hw) @@ -433,5 +397,5 @@ void ifcvf_stop_hw(struct ifcvf_hw *hw) void ifcvf_notify_queue(struct ifcvf_hw *hw, u16 qid) { - ifc_iowrite16(qid, hw->vring[qid].notify_addr); + vp_iowrite16(qid, hw->vring[qid].notify_addr); } diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index c486873f370a..25c591a3eae2 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index d1a6b5ab543c..43b7180256c6 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -348,7 +348,7 @@ static u32 ifcvf_vdpa_get_generation(struct vdpa_device *vdpa_dev) { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - return ioread8(&vf->common_cfg->config_generation); + return vp_ioread8(&vf->common_cfg->config_generation); } static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev) From cce0ab2b2a39072d81f98017f7b076f3410ef740 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 22 Feb 2022 19:54:25 +0800 Subject: [PATCH 04/33] vhost_vdpa: don't setup irq offloading when irq_num < 0 When irq number is negative(e.g., -EINVAL), the virtqueue may be disabled or the virtqueues are sharing a device irq. In such case, we should not setup irq offloading for a virtqueue. Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20220222115428.998334-3-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vdpa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ec5249e8c32d..05f5fd2af58f 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -97,8 +97,11 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) return; irq = ops->get_vq_irq(vdpa, qid); + if (irq < 0) + return; + irq_bypass_unregister_producer(&vq->call_ctx.producer); - if (!vq->call_ctx.ctx || irq < 0) + if (!vq->call_ctx.ctx) return; vq->call_ctx.producer.token = vq->call_ctx.ctx; From ad5c5690de57f0bd3888ecade4685d4181a4e85c Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 22 Feb 2022 19:54:26 +0800 Subject: [PATCH 05/33] vDPA/ifcvf: implement device MSIX vector allocator This commit implements a MSIX vector allocation helper for vqs and config interrupts. Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20220222115428.998334-4-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_main.c | 31 ++++++++++++++++++++++++++----- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 43b7180256c6..964f7ac142ba 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -58,23 +58,44 @@ static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) ifcvf_free_irq_vectors(pdev); } -static int ifcvf_request_irq(struct ifcvf_adapter *adapter) +/* ifcvf MSIX vectors allocator, this helper tries to allocate + * vectors for all virtqueues and the config interrupt. + * It returns the number of allocated vectors, negative + * return value when fails. + */ +static int ifcvf_alloc_vectors(struct ifcvf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct ifcvf_hw *vf = &adapter->vf; - int vector, i, ret, irq; - u16 max_intr; + int max_intr, ret; /* all queues and config interrupt */ max_intr = vf->nr_vring + 1; + ret = pci_alloc_irq_vectors(pdev, 1, max_intr, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); - ret = pci_alloc_irq_vectors(pdev, max_intr, - max_intr, PCI_IRQ_MSIX); if (ret < 0) { IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n"); return ret; } + if (ret < max_intr) + IFCVF_INFO(pdev, + "Requested %u vectors, however only %u allocated, lower performance\n", + max_intr, ret); + + return ret; +} + +static int ifcvf_request_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int vector, nvectors, i, ret, irq; + + nvectors = ifcvf_alloc_vectors(adapter); + if (nvectors <= 0) + return -EFAULT; + snprintf(vf->config_msix_name, 256, "ifcvf[%s]-config\n", pci_name(pdev)); vector = 0; From 9b3e814834009a7d197ab6f93d6e061c0c4ee7e6 Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 22 Feb 2022 19:54:27 +0800 Subject: [PATCH 06/33] vDPA/ifcvf: implement shared IRQ feature On some platforms/devices, there may not be enough MSI vectors allocated for the virtqueues and config changes. In such a case, the interrupt sources(virtqueues, config changes) must share an IRQ/vector, to avoid initialization failures, keep the device functional. This commit handles three cases: (1) number of the allocated vectors == the number of virtqueues + 1 (config changes), every virtqueue and the config interrupt has a separated vector/IRQ, the best and the most likely case. (2) number of the allocated vectors is less than the best case, but greater than 1. In this case, all virtqueues share a vector/IRQ, the config interrupt has a separated vector/IRQ (3) only one vector is allocated, in this case, the virtqueues and the config interrupt share a vector/IRQ. The worst and most unlikely case. Otherwise, it needs to fail. This commit introduces some helper functions: ifcvf_set_vq_vector() and ifcvf_set_config_vector() sets virtqueue vector and config vector in the device config space, so that the device can send interrupt DMA. Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20220222115428.998334-5-lingshan.zhu@intel.com Signed-off-by: Tom Rix Link: https://lore.kernel.org/r/20220315124130.1710030-1-trix@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_base.c | 48 +++--- drivers/vdpa/ifcvf/ifcvf_base.h | 15 +- drivers/vdpa/ifcvf/ifcvf_main.c | 294 ++++++++++++++++++++++++++++---- 3 files changed, 300 insertions(+), 57 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index b9fdc5258611..8aba3ab4a2f3 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -15,6 +15,26 @@ struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw) return container_of(hw, struct ifcvf_adapter, vf); } +u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + + vp_iowrite16(qid, &cfg->queue_select); + vp_iowrite16(vector, &cfg->queue_msix_vector); + + return vp_ioread16(&cfg->queue_msix_vector); +} + +u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector) +{ + struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg; + + cfg = hw->common_cfg; + vp_iowrite16(vector, &cfg->msix_config); + + return vp_ioread16(&cfg->msix_config); +} + static void __iomem *get_cap_addr(struct ifcvf_hw *hw, struct virtio_pci_cap *cap) { @@ -131,6 +151,7 @@ next: notify_off * hw->notify_off_multiplier; hw->vring[i].notify_pa = hw->notify_base_pa + notify_off * hw->notify_off_multiplier; + hw->vring[i].irq = -EINVAL; } hw->lm_cfg = hw->base[IFCVF_LM_BAR]; @@ -140,6 +161,9 @@ next: hw->common_cfg, hw->notify_base, hw->isr, hw->dev_cfg, hw->notify_off_multiplier); + hw->vqs_reused_irq = -EINVAL; + hw->config_irq = -EINVAL; + return 0; } @@ -321,13 +345,6 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw) ifcvf = vf_to_adapter(hw); cfg = hw->common_cfg; - vp_iowrite16(IFCVF_MSI_CONFIG_OFF, &cfg->msix_config); - - if (vp_ioread16(&cfg->msix_config) == VIRTIO_MSI_NO_VECTOR) { - IFCVF_ERR(ifcvf->pdev, "No msix vector for device config\n"); - return -EINVAL; - } - for (i = 0; i < hw->nr_vring; i++) { if (!hw->vring[i].ready) break; @@ -340,15 +357,6 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw) vp_iowrite64_twopart(hw->vring[i].used, &cfg->queue_used_lo, &cfg->queue_used_hi); vp_iowrite16(hw->vring[i].size, &cfg->queue_size); - vp_iowrite16(i + IFCVF_MSI_QUEUE_OFF, &cfg->queue_msix_vector); - - if (vp_ioread16(&cfg->queue_msix_vector) == - VIRTIO_MSI_NO_VECTOR) { - IFCVF_ERR(ifcvf->pdev, - "No msix vector for queue %u\n", i); - return -EINVAL; - } - ifcvf_set_vq_state(hw, i, hw->vring[i].last_avail_idx); vp_iowrite16(1, &cfg->queue_enable); } @@ -362,14 +370,10 @@ static void ifcvf_hw_disable(struct ifcvf_hw *hw) u32 i; cfg = hw->common_cfg; - vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->msix_config); - + ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR); for (i = 0; i < hw->nr_vring; i++) { - vp_iowrite16(i, &cfg->queue_select); - vp_iowrite16(VIRTIO_MSI_NO_VECTOR, &cfg->queue_msix_vector); + ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR); } - - vp_ioread16(&cfg->queue_msix_vector); } int ifcvf_start_hw(struct ifcvf_hw *hw) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index 25c591a3eae2..dcd31accfce5 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -28,8 +28,6 @@ #define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE #define IFCVF_QUEUE_MAX 32768 -#define IFCVF_MSI_CONFIG_OFF 0 -#define IFCVF_MSI_QUEUE_OFF 1 #define IFCVF_PCI_MAX_RESOURCE 6 #define IFCVF_LM_CFG_SIZE 0x40 @@ -43,6 +41,13 @@ #define ifcvf_private_to_vf(adapter) \ (&((struct ifcvf_adapter *)adapter)->vf) +/* all vqs and config interrupt has its own vector */ +#define MSIX_VECTOR_PER_VQ_AND_CONFIG 1 +/* all vqs share a vector, and config interrupt has a separate vector */ +#define MSIX_VECTOR_SHARED_VQ_AND_CONFIG 2 +/* all vqs and config interrupt share a vector */ +#define MSIX_VECTOR_DEV_SHARED 3 + struct vring_info { u64 desc; u64 avail; @@ -77,9 +82,11 @@ struct ifcvf_hw { void __iomem * const *base; char config_msix_name[256]; struct vdpa_callback config_cb; - unsigned int config_irq; + int config_irq; + int vqs_reused_irq; /* virtio-net or virtio-blk device config size */ u32 config_size; + u8 msix_vector_status; }; struct ifcvf_adapter { @@ -124,4 +131,6 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num); struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw); int ifcvf_probed_virtio_net(struct ifcvf_hw *hw); u32 ifcvf_get_config_size(struct ifcvf_hw *hw); +u16 ifcvf_set_vq_vector(struct ifcvf_hw *hw, u16 qid, int vector); +u16 ifcvf_set_config_vector(struct ifcvf_hw *hw, int vector); #endif /* _IFCVF_H_ */ diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 964f7ac142ba..4366320fb68d 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -27,7 +27,7 @@ static irqreturn_t ifcvf_config_changed(int irq, void *arg) return IRQ_HANDLED; } -static irqreturn_t ifcvf_intr_handler(int irq, void *arg) +static irqreturn_t ifcvf_vq_intr_handler(int irq, void *arg) { struct vring_info *vring = arg; @@ -37,24 +37,98 @@ static irqreturn_t ifcvf_intr_handler(int irq, void *arg) return IRQ_HANDLED; } +static irqreturn_t ifcvf_vqs_reused_intr_handler(int irq, void *arg) +{ + struct ifcvf_hw *vf = arg; + struct vring_info *vring; + int i; + + for (i = 0; i < vf->nr_vring; i++) { + vring = &vf->vring[i]; + if (vring->cb.callback) + vring->cb.callback(vring->cb.private); + } + + return IRQ_HANDLED; +} + +static irqreturn_t ifcvf_dev_intr_handler(int irq, void *arg) +{ + struct ifcvf_hw *vf = arg; + u8 isr; + + isr = vp_ioread8(vf->isr); + if (isr & VIRTIO_PCI_ISR_CONFIG) + ifcvf_config_changed(irq, arg); + + return ifcvf_vqs_reused_intr_handler(irq, arg); +} + static void ifcvf_free_irq_vectors(void *data) { pci_free_irq_vectors(data); } -static void ifcvf_free_irq(struct ifcvf_adapter *adapter, int queues) +static void ifcvf_free_per_vq_irq(struct ifcvf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct ifcvf_hw *vf = &adapter->vf; int i; + for (i = 0; i < vf->nr_vring; i++) { + if (vf->vring[i].irq != -EINVAL) { + devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); + vf->vring[i].irq = -EINVAL; + } + } +} - for (i = 0; i < queues; i++) { - devm_free_irq(&pdev->dev, vf->vring[i].irq, &vf->vring[i]); - vf->vring[i].irq = -EINVAL; +static void ifcvf_free_vqs_reused_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + + if (vf->vqs_reused_irq != -EINVAL) { + devm_free_irq(&pdev->dev, vf->vqs_reused_irq, vf); + vf->vqs_reused_irq = -EINVAL; } - devm_free_irq(&pdev->dev, vf->config_irq, vf); +} + +static void ifcvf_free_vq_irq(struct ifcvf_adapter *adapter) +{ + struct ifcvf_hw *vf = &adapter->vf; + + if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG) + ifcvf_free_per_vq_irq(adapter); + else + ifcvf_free_vqs_reused_irq(adapter); +} + +static void ifcvf_free_config_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + + if (vf->config_irq == -EINVAL) + return; + + /* If the irq is shared by all vqs and the config interrupt, + * it is already freed in ifcvf_free_vq_irq, so here only + * need to free config irq when msix_vector_status != MSIX_VECTOR_DEV_SHARED + */ + if (vf->msix_vector_status != MSIX_VECTOR_DEV_SHARED) { + devm_free_irq(&pdev->dev, vf->config_irq, vf); + vf->config_irq = -EINVAL; + } +} + +static void ifcvf_free_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + + ifcvf_free_vq_irq(adapter); + ifcvf_free_config_irq(adapter); ifcvf_free_irq_vectors(pdev); } @@ -86,47 +160,200 @@ static int ifcvf_alloc_vectors(struct ifcvf_adapter *adapter) return ret; } -static int ifcvf_request_irq(struct ifcvf_adapter *adapter) +static int ifcvf_request_per_vq_irq(struct ifcvf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct ifcvf_hw *vf = &adapter->vf; - int vector, nvectors, i, ret, irq; + int i, vector, ret, irq; - nvectors = ifcvf_alloc_vectors(adapter); - if (nvectors <= 0) - return -EFAULT; + vf->vqs_reused_irq = -EINVAL; + for (i = 0; i < vf->nr_vring; i++) { + snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", pci_name(pdev), i); + vector = i; + irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, irq, + ifcvf_vq_intr_handler, 0, + vf->vring[i].msix_name, + &vf->vring[i]); + if (ret) { + IFCVF_ERR(pdev, "Failed to request irq for vq %d\n", i); + goto err; + } + + vf->vring[i].irq = irq; + ret = ifcvf_set_vq_vector(vf, i, vector); + if (ret == VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(pdev, "No msix vector for vq %u\n", i); + goto err; + } + } + + return 0; +err: + ifcvf_free_irq(adapter); + + return -EFAULT; +} + +static int ifcvf_request_vqs_reused_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int i, vector, ret, irq; + + vector = 0; + snprintf(vf->vring[0].msix_name, 256, "ifcvf[%s]-vqs-reused-irq\n", pci_name(pdev)); + irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, irq, + ifcvf_vqs_reused_intr_handler, 0, + vf->vring[0].msix_name, vf); + if (ret) { + IFCVF_ERR(pdev, "Failed to request reused irq for the device\n"); + goto err; + } + + vf->vqs_reused_irq = irq; + for (i = 0; i < vf->nr_vring; i++) { + vf->vring[i].irq = -EINVAL; + ret = ifcvf_set_vq_vector(vf, i, vector); + if (ret == VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(pdev, "No msix vector for vq %u\n", i); + goto err; + } + } + + return 0; +err: + ifcvf_free_irq(adapter); + + return -EFAULT; +} + +static int ifcvf_request_dev_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int i, vector, ret, irq; + + vector = 0; + snprintf(vf->vring[0].msix_name, 256, "ifcvf[%s]-dev-irq\n", pci_name(pdev)); + irq = pci_irq_vector(pdev, vector); + ret = devm_request_irq(&pdev->dev, irq, + ifcvf_dev_intr_handler, 0, + vf->vring[0].msix_name, vf); + if (ret) { + IFCVF_ERR(pdev, "Failed to request irq for the device\n"); + goto err; + } + + vf->vqs_reused_irq = irq; + for (i = 0; i < vf->nr_vring; i++) { + vf->vring[i].irq = -EINVAL; + ret = ifcvf_set_vq_vector(vf, i, vector); + if (ret == VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(pdev, "No msix vector for vq %u\n", i); + goto err; + } + } + + vf->config_irq = irq; + ret = ifcvf_set_config_vector(vf, vector); + if (ret == VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(pdev, "No msix vector for device config\n"); + goto err; + } + + return 0; +err: + ifcvf_free_irq(adapter); + + return -EFAULT; + +} + +static int ifcvf_request_vq_irq(struct ifcvf_adapter *adapter) +{ + struct ifcvf_hw *vf = &adapter->vf; + int ret; + + if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG) + ret = ifcvf_request_per_vq_irq(adapter); + else + ret = ifcvf_request_vqs_reused_irq(adapter); + + return ret; +} + +static int ifcvf_request_config_irq(struct ifcvf_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct ifcvf_hw *vf = &adapter->vf; + int config_vector, ret; + + if (vf->msix_vector_status == MSIX_VECTOR_DEV_SHARED) + return 0; + + if (vf->msix_vector_status == MSIX_VECTOR_PER_VQ_AND_CONFIG) + /* vector 0 ~ vf->nr_vring for vqs, num vf->nr_vring vector for config interrupt */ + config_vector = vf->nr_vring; + + if (vf->msix_vector_status == MSIX_VECTOR_SHARED_VQ_AND_CONFIG) + /* vector 0 for vqs and 1 for config interrupt */ + config_vector = 1; snprintf(vf->config_msix_name, 256, "ifcvf[%s]-config\n", pci_name(pdev)); - vector = 0; - vf->config_irq = pci_irq_vector(pdev, vector); + vf->config_irq = pci_irq_vector(pdev, config_vector); ret = devm_request_irq(&pdev->dev, vf->config_irq, ifcvf_config_changed, 0, vf->config_msix_name, vf); if (ret) { IFCVF_ERR(pdev, "Failed to request config irq\n"); + goto err; + } + + ret = ifcvf_set_config_vector(vf, config_vector); + if (ret == VIRTIO_MSI_NO_VECTOR) { + IFCVF_ERR(pdev, "No msix vector for device config\n"); + goto err; + } + + return 0; +err: + ifcvf_free_irq(adapter); + + return -EFAULT; +} + +static int ifcvf_request_irq(struct ifcvf_adapter *adapter) +{ + struct ifcvf_hw *vf = &adapter->vf; + int nvectors, ret, max_intr; + + nvectors = ifcvf_alloc_vectors(adapter); + if (nvectors <= 0) + return -EFAULT; + + vf->msix_vector_status = MSIX_VECTOR_PER_VQ_AND_CONFIG; + max_intr = vf->nr_vring + 1; + if (nvectors < max_intr) + vf->msix_vector_status = MSIX_VECTOR_SHARED_VQ_AND_CONFIG; + + if (nvectors == 1) { + vf->msix_vector_status = MSIX_VECTOR_DEV_SHARED; + ret = ifcvf_request_dev_irq(adapter); + return ret; } - for (i = 0; i < vf->nr_vring; i++) { - snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n", - pci_name(pdev), i); - vector = i + IFCVF_MSI_QUEUE_OFF; - irq = pci_irq_vector(pdev, vector); - ret = devm_request_irq(&pdev->dev, irq, - ifcvf_intr_handler, 0, - vf->vring[i].msix_name, - &vf->vring[i]); - if (ret) { - IFCVF_ERR(pdev, - "Failed to request irq for vq %d\n", i); - ifcvf_free_irq(adapter, i); + ret = ifcvf_request_vq_irq(adapter); + if (ret) + return ret; - return ret; - } + ret = ifcvf_request_config_irq(adapter); - vf->vring[i].irq = irq; - } + if (ret) + return ret; return 0; } @@ -284,7 +511,7 @@ static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev) if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) { ifcvf_stop_datapath(adapter); - ifcvf_free_irq(adapter, vf->nr_vring); + ifcvf_free_irq(adapter); } ifcvf_reset_vring(adapter); @@ -431,7 +658,10 @@ static int ifcvf_vdpa_get_vq_irq(struct vdpa_device *vdpa_dev, { struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev); - return vf->vring[qid].irq; + if (vf->vqs_reused_irq < 0) + return vf->vring[qid].irq; + else + return -EINVAL; } static struct vdpa_notification_area ifcvf_get_vq_notification(struct vdpa_device *vdpa_dev, From 6f84622db395456f071910a1851fda66325855bd Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Tue, 22 Feb 2022 19:54:28 +0800 Subject: [PATCH 07/33] vDPA/ifcvf: cacheline alignment for ifcvf_hw This commit introduces a new cacheline aligned layout for ifcvf_hw. Signed-off-by: Zhu Lingshan Link: https://lore.kernel.org/r/20220222115428.998334-6-lingshan.zhu@intel.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/ifcvf/ifcvf_base.c | 4 ---- drivers/vdpa/ifcvf/ifcvf_base.h | 10 +++++----- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_base.c b/drivers/vdpa/ifcvf/ifcvf_base.c index 8aba3ab4a2f3..48c4dadb0c7c 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.c +++ b/drivers/vdpa/ifcvf/ifcvf_base.c @@ -340,10 +340,8 @@ int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num) static int ifcvf_hw_enable(struct ifcvf_hw *hw) { struct virtio_pci_common_cfg __iomem *cfg; - struct ifcvf_adapter *ifcvf; u32 i; - ifcvf = vf_to_adapter(hw); cfg = hw->common_cfg; for (i = 0; i < hw->nr_vring; i++) { if (!hw->vring[i].ready) @@ -366,10 +364,8 @@ static int ifcvf_hw_enable(struct ifcvf_hw *hw) static void ifcvf_hw_disable(struct ifcvf_hw *hw) { - struct virtio_pci_common_cfg __iomem *cfg; u32 i; - cfg = hw->common_cfg; ifcvf_set_config_vector(hw, VIRTIO_MSI_NO_VECTOR); for (i = 0; i < hw->nr_vring; i++) { ifcvf_set_vq_vector(hw, i, VIRTIO_MSI_NO_VECTOR); diff --git a/drivers/vdpa/ifcvf/ifcvf_base.h b/drivers/vdpa/ifcvf/ifcvf_base.h index dcd31accfce5..115b61f4924b 100644 --- a/drivers/vdpa/ifcvf/ifcvf_base.h +++ b/drivers/vdpa/ifcvf/ifcvf_base.h @@ -66,16 +66,18 @@ struct ifcvf_hw { u8 __iomem *isr; /* Live migration */ u8 __iomem *lm_cfg; - u16 nr_vring; /* Notification bar number */ u8 notify_bar; + u8 msix_vector_status; + /* virtio-net or virtio-blk device config size */ + u32 config_size; /* Notificaiton bar address */ void __iomem *notify_base; phys_addr_t notify_base_pa; u32 notify_off_multiplier; + u32 dev_type; u64 req_features; u64 hw_features; - u32 dev_type; struct virtio_pci_common_cfg __iomem *common_cfg; void __iomem *dev_cfg; struct vring_info vring[IFCVF_MAX_QUEUES]; @@ -84,9 +86,7 @@ struct ifcvf_hw { struct vdpa_callback config_cb; int config_irq; int vqs_reused_irq; - /* virtio-net or virtio-blk device config size */ - u32 config_size; - u8 msix_vector_status; + u16 nr_vring; }; struct ifcvf_adapter { From 504c1cabe325df65c18ef38365ddd1a41c6b591b Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Tue, 25 Jan 2022 21:22:21 +0800 Subject: [PATCH 08/33] mm/balloon_compaction: make balloon page compaction callbacks static Since commit b1123ea6d3b3 ("mm: balloon: use general non-lru movable page feature"), these functions are called via balloon_aops callbacks. They're not called directly outside this file. So make them static and clean up the relevant code. Signed-off-by: Miaohe Lin Link: https://lore.kernel.org/r/20220125132221.2220-1-linmiaohe@huawei.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Muchun Song --- include/linux/balloon_compaction.h | 22 ---------------------- mm/balloon_compaction.c | 6 +++--- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index 338aa27e4773..edb7f6d41faa 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -80,12 +80,6 @@ static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) #ifdef CONFIG_BALLOON_COMPACTION extern const struct address_space_operations balloon_aops; -extern bool balloon_page_isolate(struct page *page, - isolate_mode_t mode); -extern void balloon_page_putback(struct page *page); -extern int balloon_page_migrate(struct address_space *mapping, - struct page *newpage, - struct page *page, enum migrate_mode mode); /* * balloon_page_insert - insert a page into the balloon's page list and make @@ -155,22 +149,6 @@ static inline void balloon_page_delete(struct page *page) list_del(&page->lru); } -static inline bool balloon_page_isolate(struct page *page) -{ - return false; -} - -static inline void balloon_page_putback(struct page *page) -{ - return; -} - -static inline int balloon_page_migrate(struct page *newpage, - struct page *page, enum migrate_mode mode) -{ - return 0; -} - static inline gfp_t balloon_mapping_gfp_mask(void) { return GFP_HIGHUSER; diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c index 907fefde2572..4b8eab4b3f45 100644 --- a/mm/balloon_compaction.c +++ b/mm/balloon_compaction.c @@ -203,7 +203,7 @@ EXPORT_SYMBOL_GPL(balloon_page_dequeue); #ifdef CONFIG_BALLOON_COMPACTION -bool balloon_page_isolate(struct page *page, isolate_mode_t mode) +static bool balloon_page_isolate(struct page *page, isolate_mode_t mode) { struct balloon_dev_info *b_dev_info = balloon_page_device(page); @@ -217,7 +217,7 @@ bool balloon_page_isolate(struct page *page, isolate_mode_t mode) return true; } -void balloon_page_putback(struct page *page) +static void balloon_page_putback(struct page *page) { struct balloon_dev_info *b_dev_info = balloon_page_device(page); unsigned long flags; @@ -230,7 +230,7 @@ void balloon_page_putback(struct page *page) /* move_to_new_page() counterpart for a ballooned page */ -int balloon_page_migrate(struct address_space *mapping, +static int balloon_page_migrate(struct address_space *mapping, struct page *newpage, struct page *page, enum migrate_mode mode) { From 90a6951b58e935124eeb7ecd9fbc2426f841ac0c Mon Sep 17 00:00:00 2001 From: Gautam Dawar Date: Tue, 15 Feb 2022 11:04:29 +0530 Subject: [PATCH 09/33] Add definition of VIRTIO_F_IN_ORDER feature bit This patch adds the definition of VIRTIO_F_IN_ORDER feature bit in the relevant header file to make it available in QEMU's linux standard header file virtio_config.h, which is updated using scripts/update-linux-headers.sh Signed-off-by: Gautam Dawar Link: https://lore.kernel.org/r/20220215053430.24650-1-gdawar@xilinx.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- include/uapi/linux/virtio_config.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index b5eda06f0d57..f0fb0ae021c0 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -82,6 +82,12 @@ /* This feature indicates support for the packed virtqueue layout. */ #define VIRTIO_F_RING_PACKED 34 +/* + * Inorder feature indicates that all buffers are used by the device + * in the same order in which they have been made available. + */ +#define VIRTIO_F_IN_ORDER 35 + /* * This feature indicates that memory accesses by the driver and the * device are ordered in a way described by the platform. From 13d640a3e9a3ac7ec694843d3d3b785e85fb8cb8 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:14 +0800 Subject: [PATCH 10/33] virtio_crypto: Introduce VIRTIO_CRYPTO_NOSPC Base on the lastest virtio crypto spec, define VIRTIO_CRYPTO_NOSPC. Reviewed-by: Gonglei Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-2-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_crypto.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index a03932f10565..1166a49084b0 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -408,6 +408,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_BADMSG 2 #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ +#define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) From 24e19590628b58578748eeaec8140bf9c9dc00d9 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:15 +0800 Subject: [PATCH 11/33] virtio-crypto: introduce akcipher service Introduce asymmetric service definition, asymmetric operations and several well known algorithms. Co-developed-by: lei he Signed-off-by: lei he Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-3-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei --- include/uapi/linux/virtio_crypto.h | 81 +++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/virtio_crypto.h b/include/uapi/linux/virtio_crypto.h index 1166a49084b0..71a54a6849ca 100644 --- a/include/uapi/linux/virtio_crypto.h +++ b/include/uapi/linux/virtio_crypto.h @@ -37,6 +37,7 @@ #define VIRTIO_CRYPTO_SERVICE_HASH 1 #define VIRTIO_CRYPTO_SERVICE_MAC 2 #define VIRTIO_CRYPTO_SERVICE_AEAD 3 +#define VIRTIO_CRYPTO_SERVICE_AKCIPHER 4 #define VIRTIO_CRYPTO_OPCODE(service, op) (((service) << 8) | (op)) @@ -57,6 +58,10 @@ struct virtio_crypto_ctrl_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x02) #define VIRTIO_CRYPTO_AEAD_DESTROY_SESSION \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x03) +#define VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x04) +#define VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x05) __le32 opcode; __le32 algo; __le32 flag; @@ -180,6 +185,58 @@ struct virtio_crypto_aead_create_session_req { __u8 padding[32]; }; +struct virtio_crypto_rsa_session_para { +#define VIRTIO_CRYPTO_RSA_RAW_PADDING 0 +#define VIRTIO_CRYPTO_RSA_PKCS1_PADDING 1 + __le32 padding_algo; + +#define VIRTIO_CRYPTO_RSA_NO_HASH 0 +#define VIRTIO_CRYPTO_RSA_MD2 1 +#define VIRTIO_CRYPTO_RSA_MD3 2 +#define VIRTIO_CRYPTO_RSA_MD4 3 +#define VIRTIO_CRYPTO_RSA_MD5 4 +#define VIRTIO_CRYPTO_RSA_SHA1 5 +#define VIRTIO_CRYPTO_RSA_SHA256 6 +#define VIRTIO_CRYPTO_RSA_SHA384 7 +#define VIRTIO_CRYPTO_RSA_SHA512 8 +#define VIRTIO_CRYPTO_RSA_SHA224 9 + __le32 hash_algo; +}; + +struct virtio_crypto_ecdsa_session_para { +#define VIRTIO_CRYPTO_CURVE_UNKNOWN 0 +#define VIRTIO_CRYPTO_CURVE_NIST_P192 1 +#define VIRTIO_CRYPTO_CURVE_NIST_P224 2 +#define VIRTIO_CRYPTO_CURVE_NIST_P256 3 +#define VIRTIO_CRYPTO_CURVE_NIST_P384 4 +#define VIRTIO_CRYPTO_CURVE_NIST_P521 5 + __le32 curve_id; + __le32 padding; +}; + +struct virtio_crypto_akcipher_session_para { +#define VIRTIO_CRYPTO_NO_AKCIPHER 0 +#define VIRTIO_CRYPTO_AKCIPHER_RSA 1 +#define VIRTIO_CRYPTO_AKCIPHER_DSA 2 +#define VIRTIO_CRYPTO_AKCIPHER_ECDSA 3 + __le32 algo; + +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC 1 +#define VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE 2 + __le32 keytype; + __le32 keylen; + + union { + struct virtio_crypto_rsa_session_para rsa; + struct virtio_crypto_ecdsa_session_para ecdsa; + } u; +}; + +struct virtio_crypto_akcipher_create_session_req { + struct virtio_crypto_akcipher_session_para para; + __u8 padding[36]; +}; + struct virtio_crypto_alg_chain_session_para { #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER 1 #define VIRTIO_CRYPTO_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH 2 @@ -247,6 +304,8 @@ struct virtio_crypto_op_ctrl_req { mac_create_session; struct virtio_crypto_aead_create_session_req aead_create_session; + struct virtio_crypto_akcipher_create_session_req + akcipher_create_session; struct virtio_crypto_destroy_session_req destroy_session; __u8 padding[56]; @@ -266,6 +325,14 @@ struct virtio_crypto_op_header { VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x00) #define VIRTIO_CRYPTO_AEAD_DECRYPT \ VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AEAD, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_ENCRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x00) +#define VIRTIO_CRYPTO_AKCIPHER_DECRYPT \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x01) +#define VIRTIO_CRYPTO_AKCIPHER_SIGN \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x02) +#define VIRTIO_CRYPTO_AKCIPHER_VERIFY \ + VIRTIO_CRYPTO_OPCODE(VIRTIO_CRYPTO_SERVICE_AKCIPHER, 0x03) __le32 opcode; /* algo should be service-specific algorithms */ __le32 algo; @@ -390,6 +457,16 @@ struct virtio_crypto_aead_data_req { __u8 padding[32]; }; +struct virtio_crypto_akcipher_para { + __le32 src_data_len; + __le32 dst_data_len; +}; + +struct virtio_crypto_akcipher_data_req { + struct virtio_crypto_akcipher_para para; + __u8 padding[40]; +}; + /* The request of the data virtqueue's packet */ struct virtio_crypto_op_data_req { struct virtio_crypto_op_header header; @@ -399,6 +476,7 @@ struct virtio_crypto_op_data_req { struct virtio_crypto_hash_data_req hash_req; struct virtio_crypto_mac_data_req mac_req; struct virtio_crypto_aead_data_req aead_req; + struct virtio_crypto_akcipher_data_req akcipher_req; __u8 padding[48]; } u; }; @@ -409,6 +487,7 @@ struct virtio_crypto_op_data_req { #define VIRTIO_CRYPTO_NOTSUPP 3 #define VIRTIO_CRYPTO_INVSESS 4 /* Invalid session id */ #define VIRTIO_CRYPTO_NOSPC 5 /* no free session ID */ +#define VIRTIO_CRYPTO_KEY_REJECTED 6 /* Signature verification failed */ /* The accelerator hardware is ready */ #define VIRTIO_CRYPTO_S_HW_READY (1 << 0) @@ -439,7 +518,7 @@ struct virtio_crypto_config { __le32 max_cipher_key_len; /* Maximum length of authenticated key */ __le32 max_auth_key_len; - __le32 reserve; + __le32 akcipher_algo; /* Maximum size of each crypto request's content */ __le64 max_size; }; From 59ca6c93387d325e96577d8bd4c23c78c1491c11 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:16 +0800 Subject: [PATCH 12/33] virtio-crypto: implement RSA algorithm Support rsa & pkcs1pad(rsa,sha1) with priority 150. Test with QEMU built-in backend, it works fine. 1, The self-test framework of crypto layer works fine in guest kernel 2, Test with Linux guest(with asym support), the following script test(note that pkey_XXX is supported only in a newer version of keyutils): - both public key & private key - create/close session - encrypt/decrypt/sign/verify basic driver operation - also test with kernel crypto layer(pkey add/query) All the cases work fine. rm -rf *.der *.pem *.pfx modprobe pkcs8_key_parser # if CONFIG_PKCS8_PRIVATE_KEY_PARSER=m rm -rf /tmp/data dd if=/dev/random of=/tmp/data count=1 bs=226 openssl req -nodes -x509 -newkey rsa:2048 -keyout key.pem -out cert.pem -subj "/C=CN/ST=BJ/L=HD/O=qemu/OU=dev/CN=qemu/emailAddress=qemu@qemu.org" openssl pkcs8 -in key.pem -topk8 -nocrypt -outform DER -out key.der openssl x509 -in cert.pem -inform PEM -outform DER -out cert.der PRIV_KEY_ID=`cat key.der | keyctl padd asymmetric test_priv_key @s` echo "priv key id = "$PRIV_KEY_ID PUB_KEY_ID=`cat cert.der | keyctl padd asymmetric test_pub_key @s` echo "pub key id = "$PUB_KEY_ID keyctl pkey_query $PRIV_KEY_ID 0 keyctl pkey_query $PUB_KEY_ID 0 echo "Enc with priv key..." keyctl pkey_encrypt $PRIV_KEY_ID 0 /tmp/data enc=pkcs1 >/tmp/enc.priv echo "Dec with pub key..." keyctl pkey_decrypt $PRIV_KEY_ID 0 /tmp/enc.priv enc=pkcs1 >/tmp/dec cmp /tmp/data /tmp/dec echo "Sign with priv key..." keyctl pkey_sign $PRIV_KEY_ID 0 /tmp/data enc=pkcs1 hash=sha1 > /tmp/sig echo "Verify with pub key..." keyctl pkey_verify $PRIV_KEY_ID 0 /tmp/data /tmp/sig enc=pkcs1 hash=sha1 echo "Enc with pub key..." keyctl pkey_encrypt $PUB_KEY_ID 0 /tmp/data enc=pkcs1 >/tmp/enc.pub echo "Dec with priv key..." keyctl pkey_decrypt $PRIV_KEY_ID 0 /tmp/enc.pub enc=pkcs1 >/tmp/dec cmp /tmp/data /tmp/dec echo "Verify with pub key..." keyctl pkey_verify $PUB_KEY_ID 0 /tmp/data /tmp/sig enc=pkcs1 hash=sha1 [1 compiling warning during development] Reported-by: kernel test robot Co-developed-by: lei he Signed-off-by: lei he Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-4-pizhenwei@bytedance.com Reviewed-by: Gonglei Signed-off-by: Nathan Chancellor #Kconfig tweaks Link: https://lore.kernel.org/r/20220308205309.2192502-1-nathan@kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/crypto/virtio/Kconfig | 3 + drivers/crypto/virtio/Makefile | 1 + .../virtio/virtio_crypto_akcipher_algs.c | 585 ++++++++++++++++++ drivers/crypto/virtio/virtio_crypto_common.h | 3 + drivers/crypto/virtio/virtio_crypto_core.c | 6 +- drivers/crypto/virtio/virtio_crypto_mgr.c | 11 + 6 files changed, 608 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/virtio/virtio_crypto_akcipher_algs.c diff --git a/drivers/crypto/virtio/Kconfig b/drivers/crypto/virtio/Kconfig index b894e3a8be4f..5f8915f4a9ff 100644 --- a/drivers/crypto/virtio/Kconfig +++ b/drivers/crypto/virtio/Kconfig @@ -3,8 +3,11 @@ config CRYPTO_DEV_VIRTIO tristate "VirtIO crypto driver" depends on VIRTIO select CRYPTO_AEAD + select CRYPTO_AKCIPHER2 select CRYPTO_SKCIPHER select CRYPTO_ENGINE + select CRYPTO_RSA + select MPILIB help This driver provides support for virtio crypto device. If you choose 'M' here, this module will be called virtio_crypto. diff --git a/drivers/crypto/virtio/Makefile b/drivers/crypto/virtio/Makefile index cbfccccfa135..f2b839473d61 100644 --- a/drivers/crypto/virtio/Makefile +++ b/drivers/crypto/virtio/Makefile @@ -2,5 +2,6 @@ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio_crypto.o virtio_crypto-objs := \ virtio_crypto_algs.o \ + virtio_crypto_akcipher_algs.o \ virtio_crypto_mgr.o \ virtio_crypto_core.o diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c new file mode 100644 index 000000000000..f3ec9420215e --- /dev/null +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -0,0 +1,585 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + /* Asymmetric algorithms supported by virtio crypto device + * + * Authors: zhenwei pi + * lei he + * + * Copyright 2022 Bytedance CO., LTD. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "virtio_crypto_common.h" + +struct virtio_crypto_rsa_ctx { + MPI n; +}; + +struct virtio_crypto_akcipher_ctx { + struct crypto_engine_ctx enginectx; + struct virtio_crypto *vcrypto; + struct crypto_akcipher *tfm; + bool session_valid; + __u64 session_id; + union { + struct virtio_crypto_rsa_ctx rsa_ctx; + }; +}; + +struct virtio_crypto_akcipher_request { + struct virtio_crypto_request base; + struct virtio_crypto_akcipher_ctx *akcipher_ctx; + struct akcipher_request *akcipher_req; + void *src_buf; + void *dst_buf; + uint32_t opcode; +}; + +struct virtio_crypto_akcipher_algo { + uint32_t algonum; + uint32_t service; + unsigned int active_devs; + struct akcipher_alg algo; +}; + +static DEFINE_MUTEX(algs_lock); + +static void virtio_crypto_akcipher_finalize_req( + struct virtio_crypto_akcipher_request *vc_akcipher_req, + struct akcipher_request *req, int err) +{ + virtcrypto_clear_request(&vc_akcipher_req->base); + + crypto_finalize_akcipher_request(vc_akcipher_req->base.dataq->engine, req, err); +} + +static void virtio_crypto_dataq_akcipher_callback(struct virtio_crypto_request *vc_req, int len) +{ + struct virtio_crypto_akcipher_request *vc_akcipher_req = + container_of(vc_req, struct virtio_crypto_akcipher_request, base); + struct akcipher_request *akcipher_req; + int error; + + switch (vc_req->status) { + case VIRTIO_CRYPTO_OK: + error = 0; + break; + case VIRTIO_CRYPTO_INVSESS: + case VIRTIO_CRYPTO_ERR: + error = -EINVAL; + break; + case VIRTIO_CRYPTO_BADMSG: + error = -EBADMSG; + break; + + case VIRTIO_CRYPTO_KEY_REJECTED: + error = -EKEYREJECTED; + break; + + default: + error = -EIO; + break; + } + + akcipher_req = vc_akcipher_req->akcipher_req; + if (vc_akcipher_req->opcode != VIRTIO_CRYPTO_AKCIPHER_VERIFY) + sg_copy_from_buffer(akcipher_req->dst, sg_nents(akcipher_req->dst), + vc_akcipher_req->dst_buf, akcipher_req->dst_len); + virtio_crypto_akcipher_finalize_req(vc_akcipher_req, akcipher_req, error); +} + +static int virtio_crypto_alg_akcipher_init_session(struct virtio_crypto_akcipher_ctx *ctx, + struct virtio_crypto_ctrl_header *header, void *para, + const uint8_t *key, unsigned int keylen) +{ + struct scatterlist outhdr_sg, key_sg, inhdr_sg, *sgs[3]; + struct virtio_crypto *vcrypto = ctx->vcrypto; + uint8_t *pkey; + unsigned int inlen; + int err; + unsigned int num_out = 0, num_in = 0; + + pkey = kmemdup(key, keylen, GFP_ATOMIC); + if (!pkey) + return -ENOMEM; + + spin_lock(&vcrypto->ctrl_lock); + memcpy(&vcrypto->ctrl.header, header, sizeof(vcrypto->ctrl.header)); + memcpy(&vcrypto->ctrl.u, para, sizeof(vcrypto->ctrl.u)); + vcrypto->input.status = cpu_to_le32(VIRTIO_CRYPTO_ERR); + + sg_init_one(&outhdr_sg, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sgs[num_out++] = &outhdr_sg; + + sg_init_one(&key_sg, pkey, keylen); + sgs[num_out++] = &key_sg; + + sg_init_one(&inhdr_sg, &vcrypto->input, sizeof(vcrypto->input)); + sgs[num_out + num_in++] = &inhdr_sg; + + err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); + if (err < 0) + goto out; + + virtqueue_kick(vcrypto->ctrl_vq); + while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && + !virtqueue_is_broken(vcrypto->ctrl_vq)) + cpu_relax(); + + if (le32_to_cpu(vcrypto->input.status) != VIRTIO_CRYPTO_OK) { + err = -EINVAL; + goto out; + } + + ctx->session_id = le64_to_cpu(vcrypto->input.session_id); + ctx->session_valid = true; + err = 0; + +out: + spin_unlock(&vcrypto->ctrl_lock); + kfree_sensitive(pkey); + + if (err < 0) + pr_err("virtio_crypto: Create session failed status: %u\n", + le32_to_cpu(vcrypto->input.status)); + + return err; +} + +static int virtio_crypto_alg_akcipher_close_session(struct virtio_crypto_akcipher_ctx *ctx) +{ + struct scatterlist outhdr_sg, inhdr_sg, *sgs[2]; + struct virtio_crypto_destroy_session_req *destroy_session; + struct virtio_crypto *vcrypto = ctx->vcrypto; + unsigned int num_out = 0, num_in = 0, inlen; + int err; + + spin_lock(&vcrypto->ctrl_lock); + if (!ctx->session_valid) { + err = 0; + goto out; + } + vcrypto->ctrl_status.status = VIRTIO_CRYPTO_ERR; + vcrypto->ctrl.header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_DESTROY_SESSION); + vcrypto->ctrl.header.queue_id = 0; + + destroy_session = &vcrypto->ctrl.u.destroy_session; + destroy_session->session_id = cpu_to_le64(ctx->session_id); + + sg_init_one(&outhdr_sg, &vcrypto->ctrl, sizeof(vcrypto->ctrl)); + sgs[num_out++] = &outhdr_sg; + + sg_init_one(&inhdr_sg, &vcrypto->ctrl_status.status, sizeof(vcrypto->ctrl_status.status)); + sgs[num_out + num_in++] = &inhdr_sg; + + err = virtqueue_add_sgs(vcrypto->ctrl_vq, sgs, num_out, num_in, vcrypto, GFP_ATOMIC); + if (err < 0) + goto out; + + virtqueue_kick(vcrypto->ctrl_vq); + while (!virtqueue_get_buf(vcrypto->ctrl_vq, &inlen) && + !virtqueue_is_broken(vcrypto->ctrl_vq)) + cpu_relax(); + + if (vcrypto->ctrl_status.status != VIRTIO_CRYPTO_OK) { + err = -EINVAL; + goto out; + } + + err = 0; + ctx->session_valid = false; + +out: + spin_unlock(&vcrypto->ctrl_lock); + if (err < 0) { + pr_err("virtio_crypto: Close session failed status: %u, session_id: 0x%llx\n", + vcrypto->ctrl_status.status, destroy_session->session_id); + } + + return err; +} + +static int __virtio_crypto_akcipher_do_req(struct virtio_crypto_akcipher_request *vc_akcipher_req, + struct akcipher_request *req, struct data_queue *data_vq) +{ + struct virtio_crypto_akcipher_ctx *ctx = vc_akcipher_req->akcipher_ctx; + struct virtio_crypto_request *vc_req = &vc_akcipher_req->base; + struct virtio_crypto *vcrypto = ctx->vcrypto; + struct virtio_crypto_op_data_req *req_data = vc_req->req_data; + struct scatterlist *sgs[4], outhdr_sg, inhdr_sg, srcdata_sg, dstdata_sg; + void *src_buf = NULL, *dst_buf = NULL; + unsigned int num_out = 0, num_in = 0; + int node = dev_to_node(&vcrypto->vdev->dev); + unsigned long flags; + int ret = -ENOMEM; + bool verify = vc_akcipher_req->opcode == VIRTIO_CRYPTO_AKCIPHER_VERIFY; + unsigned int src_len = verify ? req->src_len + req->dst_len : req->src_len; + + /* out header */ + sg_init_one(&outhdr_sg, req_data, sizeof(*req_data)); + sgs[num_out++] = &outhdr_sg; + + /* src data */ + src_buf = kcalloc_node(src_len, 1, GFP_KERNEL, node); + if (!src_buf) + goto err; + + if (verify) { + /* for verify operation, both src and dst data work as OUT direction */ + sg_copy_to_buffer(req->src, sg_nents(req->src), src_buf, src_len); + sg_init_one(&srcdata_sg, src_buf, src_len); + sgs[num_out++] = &srcdata_sg; + } else { + sg_copy_to_buffer(req->src, sg_nents(req->src), src_buf, src_len); + sg_init_one(&srcdata_sg, src_buf, src_len); + sgs[num_out++] = &srcdata_sg; + + /* dst data */ + dst_buf = kcalloc_node(req->dst_len, 1, GFP_KERNEL, node); + if (!dst_buf) + goto err; + + sg_init_one(&dstdata_sg, dst_buf, req->dst_len); + sgs[num_out + num_in++] = &dstdata_sg; + } + + vc_akcipher_req->src_buf = src_buf; + vc_akcipher_req->dst_buf = dst_buf; + + /* in header */ + sg_init_one(&inhdr_sg, &vc_req->status, sizeof(vc_req->status)); + sgs[num_out + num_in++] = &inhdr_sg; + + spin_lock_irqsave(&data_vq->lock, flags); + ret = virtqueue_add_sgs(data_vq->vq, sgs, num_out, num_in, vc_req, GFP_ATOMIC); + virtqueue_kick(data_vq->vq); + spin_unlock_irqrestore(&data_vq->lock, flags); + if (ret) + goto err; + + return 0; + +err: + kfree(src_buf); + kfree(dst_buf); + + return -ENOMEM; +} + +static int virtio_crypto_rsa_do_req(struct crypto_engine *engine, void *vreq) +{ + struct akcipher_request *req = container_of(vreq, struct akcipher_request, base); + struct virtio_crypto_akcipher_request *vc_akcipher_req = akcipher_request_ctx(req); + struct virtio_crypto_request *vc_req = &vc_akcipher_req->base; + struct virtio_crypto_akcipher_ctx *ctx = vc_akcipher_req->akcipher_ctx; + struct virtio_crypto *vcrypto = ctx->vcrypto; + struct data_queue *data_vq = vc_req->dataq; + struct virtio_crypto_op_header *header; + struct virtio_crypto_akcipher_data_req *akcipher_req; + int ret; + + vc_req->sgs = NULL; + vc_req->req_data = kzalloc_node(sizeof(*vc_req->req_data), + GFP_KERNEL, dev_to_node(&vcrypto->vdev->dev)); + if (!vc_req->req_data) + return -ENOMEM; + + /* build request header */ + header = &vc_req->req_data->header; + header->opcode = cpu_to_le32(vc_akcipher_req->opcode); + header->algo = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_RSA); + header->session_id = cpu_to_le64(ctx->session_id); + + /* build request akcipher data */ + akcipher_req = &vc_req->req_data->u.akcipher_req; + akcipher_req->para.src_data_len = cpu_to_le32(req->src_len); + akcipher_req->para.dst_data_len = cpu_to_le32(req->dst_len); + + ret = __virtio_crypto_akcipher_do_req(vc_akcipher_req, req, data_vq); + if (ret < 0) { + kfree_sensitive(vc_req->req_data); + vc_req->req_data = NULL; + return ret; + } + + return 0; +} + +static int virtio_crypto_rsa_req(struct akcipher_request *req, uint32_t opcode) +{ + struct crypto_akcipher *atfm = crypto_akcipher_reqtfm(req); + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(atfm); + struct virtio_crypto_akcipher_request *vc_akcipher_req = akcipher_request_ctx(req); + struct virtio_crypto_request *vc_req = &vc_akcipher_req->base; + struct virtio_crypto *vcrypto = ctx->vcrypto; + /* Use the first data virtqueue as default */ + struct data_queue *data_vq = &vcrypto->data_vq[0]; + + vc_req->dataq = data_vq; + vc_req->alg_cb = virtio_crypto_dataq_akcipher_callback; + vc_akcipher_req->akcipher_ctx = ctx; + vc_akcipher_req->akcipher_req = req; + vc_akcipher_req->opcode = opcode; + + return crypto_transfer_akcipher_request_to_engine(data_vq->engine, req); +} + +static int virtio_crypto_rsa_encrypt(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_ENCRYPT); +} + +static int virtio_crypto_rsa_decrypt(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_DECRYPT); +} + +static int virtio_crypto_rsa_sign(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_SIGN); +} + +static int virtio_crypto_rsa_verify(struct akcipher_request *req) +{ + return virtio_crypto_rsa_req(req, VIRTIO_CRYPTO_AKCIPHER_VERIFY); +} + +static int virtio_crypto_rsa_set_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen, + bool private, + int padding_algo, + int hash_algo) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + struct virtio_crypto_rsa_ctx *rsa_ctx = &ctx->rsa_ctx; + struct virtio_crypto *vcrypto; + struct virtio_crypto_ctrl_header header; + struct virtio_crypto_akcipher_session_para para; + struct rsa_key rsa_key = {0}; + int node = virtio_crypto_get_current_node(); + uint32_t keytype; + int ret; + + /* mpi_free will test n, just free it. */ + mpi_free(rsa_ctx->n); + rsa_ctx->n = NULL; + + if (private) { + keytype = VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PRIVATE; + ret = rsa_parse_priv_key(&rsa_key, key, keylen); + } else { + keytype = VIRTIO_CRYPTO_AKCIPHER_KEY_TYPE_PUBLIC; + ret = rsa_parse_pub_key(&rsa_key, key, keylen); + } + + if (ret) + return ret; + + rsa_ctx->n = mpi_read_raw_data(rsa_key.n, rsa_key.n_sz); + if (!rsa_ctx->n) + return -ENOMEM; + + if (!ctx->vcrypto) { + vcrypto = virtcrypto_get_dev_node(node, VIRTIO_CRYPTO_SERVICE_AKCIPHER, + VIRTIO_CRYPTO_AKCIPHER_RSA); + if (!vcrypto) { + pr_err("virtio_crypto: Could not find a virtio device in the system or unsupported algo\n"); + return -ENODEV; + } + + ctx->vcrypto = vcrypto; + } else { + virtio_crypto_alg_akcipher_close_session(ctx); + } + + /* set ctrl header */ + header.opcode = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_CREATE_SESSION); + header.algo = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_RSA); + header.queue_id = 0; + + /* set RSA para */ + para.algo = cpu_to_le32(VIRTIO_CRYPTO_AKCIPHER_RSA); + para.keytype = cpu_to_le32(keytype); + para.keylen = cpu_to_le32(keylen); + para.u.rsa.padding_algo = cpu_to_le32(padding_algo); + para.u.rsa.hash_algo = cpu_to_le32(hash_algo); + + return virtio_crypto_alg_akcipher_init_session(ctx, &header, ¶, key, keylen); +} + +static int virtio_crypto_rsa_raw_set_priv_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 1, + VIRTIO_CRYPTO_RSA_RAW_PADDING, + VIRTIO_CRYPTO_RSA_NO_HASH); +} + + +static int virtio_crypto_p1pad_rsa_sha1_set_priv_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 1, + VIRTIO_CRYPTO_RSA_PKCS1_PADDING, + VIRTIO_CRYPTO_RSA_SHA1); +} + +static int virtio_crypto_rsa_raw_set_pub_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 0, + VIRTIO_CRYPTO_RSA_RAW_PADDING, + VIRTIO_CRYPTO_RSA_NO_HASH); +} + +static int virtio_crypto_p1pad_rsa_sha1_set_pub_key(struct crypto_akcipher *tfm, + const void *key, + unsigned int keylen) +{ + return virtio_crypto_rsa_set_key(tfm, key, keylen, 0, + VIRTIO_CRYPTO_RSA_PKCS1_PADDING, + VIRTIO_CRYPTO_RSA_SHA1); +} + +static unsigned int virtio_crypto_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + struct virtio_crypto_rsa_ctx *rsa_ctx = &ctx->rsa_ctx; + + return mpi_get_size(rsa_ctx->n); +} + +static int virtio_crypto_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + + ctx->tfm = tfm; + ctx->enginectx.op.do_one_request = virtio_crypto_rsa_do_req; + ctx->enginectx.op.prepare_request = NULL; + ctx->enginectx.op.unprepare_request = NULL; + + return 0; +} + +static void virtio_crypto_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct virtio_crypto_akcipher_ctx *ctx = akcipher_tfm_ctx(tfm); + struct virtio_crypto_rsa_ctx *rsa_ctx = &ctx->rsa_ctx; + + virtio_crypto_alg_akcipher_close_session(ctx); + virtcrypto_dev_put(ctx->vcrypto); + mpi_free(rsa_ctx->n); + rsa_ctx->n = NULL; +} + +static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = { + { + .algonum = VIRTIO_CRYPTO_AKCIPHER_RSA, + .service = VIRTIO_CRYPTO_SERVICE_AKCIPHER, + .algo = { + .encrypt = virtio_crypto_rsa_encrypt, + .decrypt = virtio_crypto_rsa_decrypt, + .set_pub_key = virtio_crypto_rsa_raw_set_pub_key, + .set_priv_key = virtio_crypto_rsa_raw_set_priv_key, + .max_size = virtio_crypto_rsa_max_size, + .init = virtio_crypto_rsa_init_tfm, + .exit = virtio_crypto_rsa_exit_tfm, + .reqsize = sizeof(struct virtio_crypto_akcipher_request), + .base = { + .cra_name = "rsa", + .cra_driver_name = "virtio-crypto-rsa", + .cra_priority = 150, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct virtio_crypto_akcipher_ctx), + }, + }, + }, + { + .algonum = VIRTIO_CRYPTO_AKCIPHER_RSA, + .service = VIRTIO_CRYPTO_SERVICE_AKCIPHER, + .algo = { + .encrypt = virtio_crypto_rsa_encrypt, + .decrypt = virtio_crypto_rsa_decrypt, + .sign = virtio_crypto_rsa_sign, + .verify = virtio_crypto_rsa_verify, + .set_pub_key = virtio_crypto_p1pad_rsa_sha1_set_pub_key, + .set_priv_key = virtio_crypto_p1pad_rsa_sha1_set_priv_key, + .max_size = virtio_crypto_rsa_max_size, + .init = virtio_crypto_rsa_init_tfm, + .exit = virtio_crypto_rsa_exit_tfm, + .reqsize = sizeof(struct virtio_crypto_akcipher_request), + .base = { + .cra_name = "pkcs1pad(rsa,sha1)", + .cra_driver_name = "virtio-pkcs1-rsa-with-sha1", + .cra_priority = 150, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct virtio_crypto_akcipher_ctx), + }, + }, + }, +}; + +int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto) +{ + int ret = 0; + int i = 0; + + mutex_lock(&algs_lock); + + for (i = 0; i < ARRAY_SIZE(virtio_crypto_akcipher_algs); i++) { + uint32_t service = virtio_crypto_akcipher_algs[i].service; + uint32_t algonum = virtio_crypto_akcipher_algs[i].algonum; + + if (!virtcrypto_algo_is_supported(vcrypto, service, algonum)) + continue; + + if (virtio_crypto_akcipher_algs[i].active_devs == 0) { + ret = crypto_register_akcipher(&virtio_crypto_akcipher_algs[i].algo); + if (ret) + goto unlock; + } + + virtio_crypto_akcipher_algs[i].active_devs++; + dev_info(&vcrypto->vdev->dev, "Registered akcipher algo %s\n", + virtio_crypto_akcipher_algs[i].algo.base.cra_name); + } + +unlock: + mutex_unlock(&algs_lock); + return ret; +} + +void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto) +{ + int i = 0; + + mutex_lock(&algs_lock); + + for (i = 0; i < ARRAY_SIZE(virtio_crypto_akcipher_algs); i++) { + uint32_t service = virtio_crypto_akcipher_algs[i].service; + uint32_t algonum = virtio_crypto_akcipher_algs[i].algonum; + + if (virtio_crypto_akcipher_algs[i].active_devs == 0 || + !virtcrypto_algo_is_supported(vcrypto, service, algonum)) + continue; + + if (virtio_crypto_akcipher_algs[i].active_devs == 1) + crypto_unregister_akcipher(&virtio_crypto_akcipher_algs[i].algo); + + virtio_crypto_akcipher_algs[i].active_devs--; + } + + mutex_unlock(&algs_lock); +} diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index a24f85c589e7..214f9a6fcf84 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -56,6 +56,7 @@ struct virtio_crypto { u32 mac_algo_l; u32 mac_algo_h; u32 aead_algo; + u32 akcipher_algo; /* Maximum length of cipher key */ u32 max_cipher_key_len; @@ -131,5 +132,7 @@ static inline int virtio_crypto_get_current_node(void) int virtio_crypto_algs_register(struct virtio_crypto *vcrypto); void virtio_crypto_algs_unregister(struct virtio_crypto *vcrypto); +int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto); +void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto); #endif /* _VIRTIO_CRYPTO_COMMON_H */ diff --git a/drivers/crypto/virtio/virtio_crypto_core.c b/drivers/crypto/virtio/virtio_crypto_core.c index 8e977b7627cb..c6f482db0bc0 100644 --- a/drivers/crypto/virtio/virtio_crypto_core.c +++ b/drivers/crypto/virtio/virtio_crypto_core.c @@ -297,6 +297,7 @@ static int virtcrypto_probe(struct virtio_device *vdev) u32 mac_algo_l = 0; u32 mac_algo_h = 0; u32 aead_algo = 0; + u32 akcipher_algo = 0; u32 crypto_services = 0; if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) @@ -348,6 +349,9 @@ static int virtcrypto_probe(struct virtio_device *vdev) mac_algo_h, &mac_algo_h); virtio_cread_le(vdev, struct virtio_crypto_config, aead_algo, &aead_algo); + if (crypto_services & (1 << VIRTIO_CRYPTO_SERVICE_AKCIPHER)) + virtio_cread_le(vdev, struct virtio_crypto_config, + akcipher_algo, &akcipher_algo); /* Add virtio crypto device to global table */ err = virtcrypto_devmgr_add_dev(vcrypto); @@ -374,7 +378,7 @@ static int virtcrypto_probe(struct virtio_device *vdev) vcrypto->mac_algo_h = mac_algo_h; vcrypto->hash_algo = hash_algo; vcrypto->aead_algo = aead_algo; - + vcrypto->akcipher_algo = akcipher_algo; dev_info(&vdev->dev, "max_queues: %u, max_cipher_key_len: %u, max_auth_key_len: %u, max_size 0x%llx\n", diff --git a/drivers/crypto/virtio/virtio_crypto_mgr.c b/drivers/crypto/virtio/virtio_crypto_mgr.c index 6860f8180c7c..1cb92418b321 100644 --- a/drivers/crypto/virtio/virtio_crypto_mgr.c +++ b/drivers/crypto/virtio/virtio_crypto_mgr.c @@ -242,6 +242,12 @@ int virtcrypto_dev_start(struct virtio_crypto *vcrypto) return -EFAULT; } + if (virtio_crypto_akcipher_algs_register(vcrypto)) { + pr_err("virtio_crypto: Failed to register crypto akcipher algs\n"); + virtio_crypto_algs_unregister(vcrypto); + return -EFAULT; + } + return 0; } @@ -258,6 +264,7 @@ int virtcrypto_dev_start(struct virtio_crypto *vcrypto) void virtcrypto_dev_stop(struct virtio_crypto *vcrypto) { virtio_crypto_algs_unregister(vcrypto); + virtio_crypto_akcipher_algs_unregister(vcrypto); } /* @@ -312,6 +319,10 @@ bool virtcrypto_algo_is_supported(struct virtio_crypto *vcrypto, case VIRTIO_CRYPTO_SERVICE_AEAD: algo_mask = vcrypto->aead_algo; break; + + case VIRTIO_CRYPTO_SERVICE_AKCIPHER: + algo_mask = vcrypto->akcipher_algo; + break; } if (!(algo_mask & (1u << algo))) From ea993de113b85557ed34da8f7b4af0629550e023 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Wed, 2 Mar 2022 11:39:17 +0800 Subject: [PATCH 13/33] virtio-crypto: rename skcipher algs Suggested by Gonglei, rename virtio_crypto_algs.c to virtio_crypto_skcipher_algs.c. Also minor changes for function name. Thus the function of source files get clear: skcipher services in virtio_crypto_skcipher_algs.c and akcipher services in virtio_crypto_akcipher_algs.c. Signed-off-by: zhenwei pi Link: https://lore.kernel.org/r/20220302033917.1295334-5-pizhenwei@bytedance.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Gonglei --- drivers/crypto/virtio/Makefile | 2 +- drivers/crypto/virtio/virtio_crypto_common.h | 4 ++-- drivers/crypto/virtio/virtio_crypto_mgr.c | 8 ++++---- ...virtio_crypto_algs.c => virtio_crypto_skcipher_algs.c} | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) rename drivers/crypto/virtio/{virtio_crypto_algs.c => virtio_crypto_skcipher_algs.c} (99%) diff --git a/drivers/crypto/virtio/Makefile b/drivers/crypto/virtio/Makefile index f2b839473d61..bfa6cbae342e 100644 --- a/drivers/crypto/virtio/Makefile +++ b/drivers/crypto/virtio/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio_crypto.o virtio_crypto-objs := \ - virtio_crypto_algs.o \ + virtio_crypto_skcipher_algs.o \ virtio_crypto_akcipher_algs.o \ virtio_crypto_mgr.o \ virtio_crypto_core.o diff --git a/drivers/crypto/virtio/virtio_crypto_common.h b/drivers/crypto/virtio/virtio_crypto_common.h index 214f9a6fcf84..e693d4ee83a6 100644 --- a/drivers/crypto/virtio/virtio_crypto_common.h +++ b/drivers/crypto/virtio/virtio_crypto_common.h @@ -130,8 +130,8 @@ static inline int virtio_crypto_get_current_node(void) return node; } -int virtio_crypto_algs_register(struct virtio_crypto *vcrypto); -void virtio_crypto_algs_unregister(struct virtio_crypto *vcrypto); +int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto); +void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto); int virtio_crypto_akcipher_algs_register(struct virtio_crypto *vcrypto); void virtio_crypto_akcipher_algs_unregister(struct virtio_crypto *vcrypto); diff --git a/drivers/crypto/virtio/virtio_crypto_mgr.c b/drivers/crypto/virtio/virtio_crypto_mgr.c index 1cb92418b321..70e778aac0f2 100644 --- a/drivers/crypto/virtio/virtio_crypto_mgr.c +++ b/drivers/crypto/virtio/virtio_crypto_mgr.c @@ -237,14 +237,14 @@ struct virtio_crypto *virtcrypto_get_dev_node(int node, uint32_t service, */ int virtcrypto_dev_start(struct virtio_crypto *vcrypto) { - if (virtio_crypto_algs_register(vcrypto)) { - pr_err("virtio_crypto: Failed to register crypto algs\n"); + if (virtio_crypto_skcipher_algs_register(vcrypto)) { + pr_err("virtio_crypto: Failed to register crypto skcipher algs\n"); return -EFAULT; } if (virtio_crypto_akcipher_algs_register(vcrypto)) { pr_err("virtio_crypto: Failed to register crypto akcipher algs\n"); - virtio_crypto_algs_unregister(vcrypto); + virtio_crypto_skcipher_algs_unregister(vcrypto); return -EFAULT; } @@ -263,7 +263,7 @@ int virtcrypto_dev_start(struct virtio_crypto *vcrypto) */ void virtcrypto_dev_stop(struct virtio_crypto *vcrypto) { - virtio_crypto_algs_unregister(vcrypto); + virtio_crypto_skcipher_algs_unregister(vcrypto); virtio_crypto_akcipher_algs_unregister(vcrypto); } diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c similarity index 99% rename from drivers/crypto/virtio/virtio_crypto_algs.c rename to drivers/crypto/virtio/virtio_crypto_skcipher_algs.c index 583c0b535d13..a618c46a52b8 100644 --- a/drivers/crypto/virtio/virtio_crypto_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_skcipher_algs.c @@ -613,7 +613,7 @@ static struct virtio_crypto_algo virtio_crypto_algs[] = { { }, } }; -int virtio_crypto_algs_register(struct virtio_crypto *vcrypto) +int virtio_crypto_skcipher_algs_register(struct virtio_crypto *vcrypto) { int ret = 0; int i = 0; @@ -644,7 +644,7 @@ unlock: return ret; } -void virtio_crypto_algs_unregister(struct virtio_crypto *vcrypto) +void virtio_crypto_skcipher_algs_unregister(struct virtio_crypto *vcrypto) { int i = 0; From 1e00e821e4ca6355c7fff9f69a4c5ecd78e348a0 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 21 Feb 2022 14:19:27 +0200 Subject: [PATCH 14/33] net/mlx5: Add support for configuring max device MTU Allow an admin creating a vdpa device to specify the max MTU for the net device. For example, to create a device with max MTU of 1000, the following command can be used: $ vdpa dev add name vdpa-a mgmtdev auxiliary/mlx5_core.sf.1 mtu 1000 This configuration mechanism assumes that vdpa is the sole real user of the function. mlx5_core could theoretically change the mtu of the function using the ip command on the mlx5_core net device but this should not be done. Reviewed-by: Si-Wei Liu Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220221121927.194728-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 32 ++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index d0f91078600e..5d7f3e8000c6 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -2565,6 +2565,28 @@ static int event_handler(struct notifier_block *nb, unsigned long event, void *p return ret; } +static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1); + MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, + mtu + MLX5V_ETH_HARD_MTU); + MLX5_SET(modify_nic_vport_context_in, in, opcode, + MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); + + err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in); + + kvfree(in); + return err; +} + static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, const struct vdpa_dev_set_config *add_config) { @@ -2624,6 +2646,13 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name, init_mvqs(ndev); mutex_init(&ndev->reslock); config = &ndev->config; + + if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) { + err = config_func_mtu(mdev, add_config->net.mtu); + if (err) + goto err_mtu; + } + err = query_mtu(mdev, &mtu); if (err) goto err_mtu; @@ -2741,7 +2770,8 @@ static int mlx5v_probe(struct auxiliary_device *adev, mgtdev->mgtdev.device = mdev->device; mgtdev->mgtdev.id_table = id_table; mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) | - BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP); + BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) | + BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU); mgtdev->mgtdev.max_supported_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1; mgtdev->mgtdev.supported_features = get_supported_features(mdev); From d80dc15bb6e76a6c6b838f683361ceb68950dbbd Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 24 Feb 2022 19:03:54 +0800 Subject: [PATCH 15/33] virtio_ring: rename vring_unmap_state_packed() to vring_unmap_extra_packed() The actual parameter handled by vring_unmap_state_packed() is that vring_desc_extra, so this function should use "extra" instead of "state". Signed-off-by: Xuan Zhuo Link: https://lore.kernel.org/r/20220224110402.108161-2-xuanzhuo@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 962f1477b1fa..7cf3ae057833 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -984,24 +984,24 @@ static struct virtqueue *vring_create_virtqueue_split( * Packed ring specific functions - *_packed(). */ -static void vring_unmap_state_packed(const struct vring_virtqueue *vq, - struct vring_desc_extra *state) +static void vring_unmap_extra_packed(const struct vring_virtqueue *vq, + struct vring_desc_extra *extra) { u16 flags; if (!vq->use_dma_api) return; - flags = state->flags; + flags = extra->flags; if (flags & VRING_DESC_F_INDIRECT) { dma_unmap_single(vring_dma_dev(vq), - state->addr, state->len, + extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } else { dma_unmap_page(vring_dma_dev(vq), - state->addr, state->len, + extra->addr, extra->len, (flags & VRING_DESC_F_WRITE) ? DMA_FROM_DEVICE : DMA_TO_DEVICE); } @@ -1303,8 +1303,7 @@ unmap_release: for (n = 0; n < total_sg; n++) { if (i == err_idx) break; - vring_unmap_state_packed(vq, - &vq->packed.desc_extra[curr]); + vring_unmap_extra_packed(vq, &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; i++; if (i >= vq->packed.vring.num) @@ -1383,8 +1382,8 @@ static void detach_buf_packed(struct vring_virtqueue *vq, if (unlikely(vq->use_dma_api)) { curr = id; for (i = 0; i < state->num; i++) { - vring_unmap_state_packed(vq, - &vq->packed.desc_extra[curr]); + vring_unmap_extra_packed(vq, + &vq->packed.desc_extra[curr]); curr = vq->packed.desc_extra[curr].next; } } From b4282ebc71aacfa69fc998173ca93b09235ce71f Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 24 Feb 2022 19:03:55 +0800 Subject: [PATCH 16/33] virtio_ring: remove flags check for unmap split indirect desc When calling vring_unmap_one_split_indirect(), it will not encounter the situation that the flags contains VRING_DESC_F_INDIRECT. So remove this logic. Signed-off-by: Xuan Zhuo Link: https://lore.kernel.org/r/20220224110402.108161-3-xuanzhuo@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 7cf3ae057833..fadd0a7503e9 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -379,19 +379,11 @@ static void vring_unmap_one_split_indirect(const struct vring_virtqueue *vq, flags = virtio16_to_cpu(vq->vq.vdev, desc->flags); - if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - virtio64_to_cpu(vq->vq.vdev, desc->addr), - virtio32_to_cpu(vq->vq.vdev, desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - virtio64_to_cpu(vq->vq.vdev, desc->addr), - virtio32_to_cpu(vq->vq.vdev, desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } + dma_unmap_page(vring_dma_dev(vq), + virtio64_to_cpu(vq->vq.vdev, desc->addr), + virtio32_to_cpu(vq->vq.vdev, desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); } static unsigned int vring_unmap_one_split(const struct vring_virtqueue *vq, From 920379a465da775ae0bf84d44c16f5432b5be575 Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Thu, 24 Feb 2022 19:03:56 +0800 Subject: [PATCH 17/33] virtio_ring: remove flags check for unmap packed indirect desc When calling vring_unmap_desc_packed(), it will not encounter the situation that the flags contains VRING_DESC_F_INDIRECT. So remove this logic. Signed-off-by: Xuan Zhuo Link: https://lore.kernel.org/r/20220224110402.108161-4-xuanzhuo@linux.alibaba.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_ring.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index fadd0a7503e9..cfb028ca238e 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1009,19 +1009,11 @@ static void vring_unmap_desc_packed(const struct vring_virtqueue *vq, flags = le16_to_cpu(desc->flags); - if (flags & VRING_DESC_F_INDIRECT) { - dma_unmap_single(vring_dma_dev(vq), - le64_to_cpu(desc->addr), - le32_to_cpu(desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } else { - dma_unmap_page(vring_dma_dev(vq), - le64_to_cpu(desc->addr), - le32_to_cpu(desc->len), - (flags & VRING_DESC_F_WRITE) ? - DMA_FROM_DEVICE : DMA_TO_DEVICE); - } + dma_unmap_page(vring_dma_dev(vq), + le64_to_cpu(desc->addr), + le32_to_cpu(desc->len), + (flags & VRING_DESC_F_WRITE) ? + DMA_FROM_DEVICE : DMA_TO_DEVICE); } static struct vring_packed_desc *alloc_indirect_packed(unsigned int total_sg, From 06f05bc52269e1f8286ce0fba03b565528fb1456 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 4 Mar 2022 12:09:18 -0500 Subject: [PATCH 18/33] tools/virtio: fix after premapped buf support Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/dma-mapping.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h index 8f41cd6bd5c0..834a90bd3270 100644 --- a/tools/virtio/linux/dma-mapping.h +++ b/tools/virtio/linux/dma-mapping.h @@ -26,8 +26,8 @@ enum dma_data_direction { #define dma_map_single(d, p, s, dir) (virt_to_phys(p)) #define dma_mapping_error(...) (0) -#define dma_unmap_single(...) do { } while (0) -#define dma_unmap_page(...) do { } while (0) +#define dma_unmap_single(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) +#define dma_unmap_page(d, a, s, r) do { (void)(d); (void)(a); (void)(s); (void)(r); } while (0) #define dma_max_mapping_size(...) SIZE_MAX From f03560a57c1f60db6ac23ffd9714e1c69e2f95c7 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 20 Mar 2022 07:02:14 -0400 Subject: [PATCH 19/33] tools/virtio: compile with -pthread When using pthreads, one has to compile and link with -lpthread, otherwise e.g. glibc is not guaranteed to be reentrant. This replaces -lpthread. Reported-by: Matthew Wilcox Signed-off-by: Michael S. Tsirkin --- tools/virtio/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile index 0d7bbe49359d..1b25cc7c64bb 100644 --- a/tools/virtio/Makefile +++ b/tools/virtio/Makefile @@ -5,7 +5,8 @@ virtio_test: virtio_ring.o virtio_test.o vringh_test: vringh_test.o vringh.o virtio_ring.o CFLAGS += -g -O2 -Werror -Wno-maybe-uninitialized -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h -LDFLAGS += -lpthread +CFLAGS += -pthread +LDFLAGS += -pthread vpath %.c ../../drivers/virtio ../../drivers/vhost mod: ${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V} From 8d65bc9a5be3f23c5e2ab36b6b8ef40095165b18 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 22 Mar 2022 12:43:13 +0100 Subject: [PATCH 20/33] virtio: use virtio_device_ready() in virtio_device_restore() After waking up a suspended VM, the kernel prints the following trace for virtio drivers which do not directly call virtio_device_ready() in the .restore: PM: suspend exit irq 22: nobody cared (try booting with the "irqpoll" option) Call Trace: dump_stack_lvl+0x38/0x49 dump_stack+0x10/0x12 __report_bad_irq+0x3a/0xaf note_interrupt.cold+0xb/0x60 handle_irq_event+0x71/0x80 handle_fasteoi_irq+0x95/0x1e0 __common_interrupt+0x6b/0x110 common_interrupt+0x63/0xe0 asm_common_interrupt+0x1e/0x40 ? __do_softirq+0x75/0x2f3 irq_exit_rcu+0x93/0xe0 sysvec_apic_timer_interrupt+0xac/0xd0 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch_cpu_idle+0x12/0x20 default_idle_call+0x39/0xf0 do_idle+0x1b5/0x210 cpu_startup_entry+0x20/0x30 start_secondary+0xf3/0x100 secondary_startup_64_no_verify+0xc3/0xcb handlers: [<000000008f9bac49>] vp_interrupt [<000000008f9bac49>] vp_interrupt Disabling IRQ #22 This happens because we don't invoke .enable_cbs callback in virtio_device_restore(). That callback is used by some transports (e.g. virtio-pci) to enable interrupts. Let's fix it, by calling virtio_device_ready() as we do in virtio_dev_probe(). This function calls .enable_cts callback and sets DRIVER_OK status bit. This fix also avoids setting DRIVER_OK twice for those drivers that call virtio_device_ready() in the .restore. Fixes: d50497eb4e55 ("virtio_config: introduce a new .enable_cbs method") Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20220322114313.116516-1-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index 22f15f444f75..75c8d560bbd3 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -526,8 +526,9 @@ int virtio_device_restore(struct virtio_device *dev) goto err; } - /* Finally, tell the device we're all set */ - virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); + /* If restore didn't do it, mark device DRIVER_OK ourselves. */ + if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) + virtio_device_ready(dev); virtio_config_enable(dev); From c1ddc42da2b2632545b76be884faf0ca363b3246 Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Mon, 28 Mar 2022 20:53:33 +0300 Subject: [PATCH 21/33] drivers/net/virtio_net: Fixed padded vheader to use v1 with hash. The header v1 provides additional info about RSS. Added changes to computing proper header length. In the next patches, the header may contain RSS hash info for the hash population. Signed-off-by: Andrew Melnychenko Link: https://lore.kernel.org/r/20220328175336.10802-2-andrew@daynix.com Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index a801ea40908f..b9ed7c55d9a0 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -242,13 +242,13 @@ struct virtnet_info { }; struct padded_vnet_hdr { - struct virtio_net_hdr_mrg_rxbuf hdr; + struct virtio_net_hdr_v1_hash hdr; /* * hdr is in a separate sg buffer, and data sg buffer shares same page * with this header sg. This padding makes next sg 16 byte aligned * after the header. */ - char padding[4]; + char padding[12]; }; static bool is_xdp_frame(void *ptr) @@ -396,7 +396,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, hdr_len = vi->hdr_len; if (vi->mergeable_rx_bufs) - hdr_padded_len = sizeof(*hdr); + hdr_padded_len = hdr_len; else hdr_padded_len = sizeof(struct padded_vnet_hdr); @@ -1266,7 +1266,8 @@ static unsigned int get_mergeable_buf_len(struct receive_queue *rq, struct ewma_pkt_len *avg_pkt_len, unsigned int room) { - const size_t hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + struct virtnet_info *vi = rq->vq->vdev->priv; + const size_t hdr_len = vi->hdr_len; unsigned int len; if (room) @@ -2851,7 +2852,7 @@ static void virtnet_del_vqs(struct virtnet_info *vi) */ static unsigned int mergeable_min_buf_len(struct virtnet_info *vi, struct virtqueue *vq) { - const unsigned int hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); + const unsigned int hdr_len = vi->hdr_len; unsigned int rq_size = virtqueue_get_vring_size(vq); unsigned int packet_len = vi->big_packets ? IP_MAX_MTU : vi->dev->max_mtu; unsigned int buf_len = hdr_len + ETH_HLEN + VLAN_HLEN + packet_len; From c7114b1249fa3b5f3a434606ba4cc89c4a27d618 Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Mon, 28 Mar 2022 20:53:34 +0300 Subject: [PATCH 22/33] drivers/net/virtio_net: Added basic RSS support. Added features for RSS. Added initialization, RXHASH feature and ethtool ops. By default RSS/RXHASH is disabled. Virtio RSS "IPv6 extensions" hashes disabled. Added ethtools ops to set key and indirection table. Signed-off-by: Andrew Melnychenko Link: https://lore.kernel.org/r/20220328175336.10802-3-andrew@daynix.com Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 192 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 186 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b9ed7c55d9a0..b5f2bb426a7b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -169,6 +169,24 @@ struct receive_queue { struct xdp_rxq_info xdp_rxq; }; +/* This structure can contain rss message with maximum settings for indirection table and keysize + * Note, that default structure that describes RSS configuration virtio_net_rss_config + * contains same info but can't handle table values. + * In any case, structure would be passed to virtio hw through sg_buf split by parts + * because table sizes may be differ according to the device configuration. + */ +#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40 +#define VIRTIO_NET_RSS_MAX_TABLE_LEN 128 +struct virtio_net_ctrl_rss { + u32 hash_types; + u16 indirection_table_mask; + u16 unclassified_queue; + u16 indirection_table[VIRTIO_NET_RSS_MAX_TABLE_LEN]; + u16 max_tx_vq; + u8 hash_key_length; + u8 key[VIRTIO_NET_RSS_MAX_KEY_SIZE]; +}; + /* Control VQ buffers: protected by the rtnl lock */ struct control_buf { struct virtio_net_ctrl_hdr hdr; @@ -178,6 +196,7 @@ struct control_buf { u8 allmulti; __virtio16 vid; __virtio64 offloads; + struct virtio_net_ctrl_rss rss; }; struct virtnet_info { @@ -206,6 +225,12 @@ struct virtnet_info { /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; + /* Host supports rss and/or hash report */ + bool has_rss; + u8 rss_key_size; + u16 rss_indir_table_size; + u32 rss_hash_types_supported; + /* Has control virtqueue */ bool has_cvq; @@ -2184,6 +2209,57 @@ static void virtnet_get_ringparam(struct net_device *dev, ring->tx_pending = ring->tx_max_pending; } +static bool virtnet_commit_rss_command(struct virtnet_info *vi) +{ + struct net_device *dev = vi->dev; + struct scatterlist sgs[4]; + unsigned int sg_buf_size; + + /* prepare sgs */ + sg_init_table(sgs, 4); + + sg_buf_size = offsetof(struct virtio_net_ctrl_rss, indirection_table); + sg_set_buf(&sgs[0], &vi->ctrl->rss, sg_buf_size); + + sg_buf_size = sizeof(uint16_t) * (vi->ctrl->rss.indirection_table_mask + 1); + sg_set_buf(&sgs[1], vi->ctrl->rss.indirection_table, sg_buf_size); + + sg_buf_size = offsetof(struct virtio_net_ctrl_rss, key) + - offsetof(struct virtio_net_ctrl_rss, max_tx_vq); + sg_set_buf(&sgs[2], &vi->ctrl->rss.max_tx_vq, sg_buf_size); + + sg_buf_size = vi->rss_key_size; + sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, + VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) { + dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); + return false; + } + return true; +} + +static void virtnet_init_default_rss(struct virtnet_info *vi) +{ + u32 indir_val = 0; + int i = 0; + + vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; + vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size + ? vi->rss_indir_table_size - 1 : 0; + vi->ctrl->rss.unclassified_queue = 0; + + for (; i < vi->rss_indir_table_size; ++i) { + indir_val = ethtool_rxfh_indir_default(i, vi->curr_queue_pairs); + vi->ctrl->rss.indirection_table[i] = indir_val; + } + + vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs; + vi->ctrl->rss.hash_key_length = vi->rss_key_size; + + netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); +} + static void virtnet_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) @@ -2412,6 +2488,71 @@ static void virtnet_update_settings(struct virtnet_info *vi) vi->duplex = duplex; } +static u32 virtnet_get_rxfh_key_size(struct net_device *dev) +{ + return ((struct virtnet_info *)netdev_priv(dev))->rss_key_size; +} + +static u32 virtnet_get_rxfh_indir_size(struct net_device *dev) +{ + return ((struct virtnet_info *)netdev_priv(dev))->rss_indir_table_size; +} + +static int virtnet_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc) +{ + struct virtnet_info *vi = netdev_priv(dev); + int i; + + if (indir) { + for (i = 0; i < vi->rss_indir_table_size; ++i) + indir[i] = vi->ctrl->rss.indirection_table[i]; + } + + if (key) + memcpy(key, vi->ctrl->rss.key, vi->rss_key_size); + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + return 0; +} + +static int virtnet_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key, const u8 hfunc) +{ + struct virtnet_info *vi = netdev_priv(dev); + int i; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (indir) { + for (i = 0; i < vi->rss_indir_table_size; ++i) + vi->ctrl->rss.indirection_table[i] = indir[i]; + } + if (key) + memcpy(vi->ctrl->rss.key, key, vi->rss_key_size); + + virtnet_commit_rss_command(vi); + + return 0; +} + +static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, u32 *rule_locs) +{ + struct virtnet_info *vi = netdev_priv(dev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = vi->curr_queue_pairs; + break; + default: + rc = -EOPNOTSUPP; + } + + return rc; +} + static const struct ethtool_ops virtnet_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = virtnet_get_drvinfo, @@ -2427,6 +2568,11 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .set_link_ksettings = virtnet_set_link_ksettings, .set_coalesce = virtnet_set_coalesce, .get_coalesce = virtnet_get_coalesce, + .get_rxfh_key_size = virtnet_get_rxfh_key_size, + .get_rxfh_indir_size = virtnet_get_rxfh_indir_size, + .get_rxfh = virtnet_get_rxfh, + .set_rxfh = virtnet_set_rxfh, + .get_rxnfc = virtnet_get_rxnfc, }; static void virtnet_freeze_down(struct virtio_device *vdev) @@ -2679,6 +2825,16 @@ static int virtnet_set_features(struct net_device *dev, vi->guest_offloads = offloads; } + if ((dev->features ^ features) & NETIF_F_RXHASH) { + if (features & NETIF_F_RXHASH) + vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; + else + vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; + + if (!virtnet_commit_rss_command(vi)) + return -EINVAL; + } + return 0; } @@ -3073,6 +3229,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_MQ, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, "VIRTIO_NET_F_CTRL_VQ"))) { return false; } @@ -3113,13 +3271,14 @@ static int virtnet_probe(struct virtio_device *vdev) u16 max_queue_pairs; int mtu; - /* Find if host supports multiqueue virtio_net device */ - err = virtio_cread_feature(vdev, VIRTIO_NET_F_MQ, - struct virtio_net_config, - max_virtqueue_pairs, &max_queue_pairs); + /* Find if host supports multiqueue/rss virtio_net device */ + max_queue_pairs = 1; + if (virtio_has_feature(vdev, VIRTIO_NET_F_MQ) || virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) + max_queue_pairs = + virtio_cread16(vdev, offsetof(struct virtio_net_config, max_virtqueue_pairs)); /* We need at least 2 queue's */ - if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || + if (max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) max_queue_pairs = 1; @@ -3207,6 +3366,23 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { + vi->has_rss = true; + vi->rss_indir_table_size = + virtio_cread16(vdev, offsetof(struct virtio_net_config, + rss_max_indirection_table_length)); + vi->rss_key_size = + virtio_cread8(vdev, offsetof(struct virtio_net_config, rss_max_key_size)); + + vi->rss_hash_types_supported = + virtio_cread32(vdev, offsetof(struct virtio_net_config, supported_hash_types)); + vi->rss_hash_types_supported &= + ~(VIRTIO_NET_RSS_HASH_TYPE_IP_EX | + VIRTIO_NET_RSS_HASH_TYPE_TCP_EX | + VIRTIO_NET_RSS_HASH_TYPE_UDP_EX); + + dev->hw_features |= NETIF_F_RXHASH; + } if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); @@ -3275,6 +3451,9 @@ static int virtnet_probe(struct virtio_device *vdev) } } + if (vi->has_rss) + virtnet_init_default_rss(vi); + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -3406,7 +3585,8 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, \ VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ - VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY + VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ + VIRTIO_NET_F_RSS static unsigned int features[] = { VIRTNET_FEATURES, From 91f41f01d2195d4d059ad7f141e41d40a45e1e1c Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Mon, 28 Mar 2022 20:53:35 +0300 Subject: [PATCH 23/33] drivers/net/virtio_net: Added RSS hash report. Added features for RSS hash report. If hash is provided - it sets to skb. Added checks if rss and/or hash are enabled together. Signed-off-by: Andrew Melnychenko Link: https://lore.kernel.org/r/20220328175336.10802-4-andrew@daynix.com Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 55 +++++++++++++++++++++++++++++++++++----- 1 file changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b5f2bb426a7b..c9472c30e8a2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -227,6 +227,7 @@ struct virtnet_info { /* Host supports rss and/or hash report */ bool has_rss; + bool has_rss_hash_report; u8 rss_key_size; u16 rss_indir_table_size; u32 rss_hash_types_supported; @@ -1148,6 +1149,35 @@ xdp_xmit: return NULL; } +static void virtio_skb_set_hash(const struct virtio_net_hdr_v1_hash *hdr_hash, + struct sk_buff *skb) +{ + enum pkt_hash_types rss_hash_type; + + if (!hdr_hash || !skb) + return; + + switch ((int)hdr_hash->hash_report) { + case VIRTIO_NET_HASH_REPORT_TCPv4: + case VIRTIO_NET_HASH_REPORT_UDPv4: + case VIRTIO_NET_HASH_REPORT_TCPv6: + case VIRTIO_NET_HASH_REPORT_UDPv6: + case VIRTIO_NET_HASH_REPORT_TCPv6_EX: + case VIRTIO_NET_HASH_REPORT_UDPv6_EX: + rss_hash_type = PKT_HASH_TYPE_L4; + break; + case VIRTIO_NET_HASH_REPORT_IPv4: + case VIRTIO_NET_HASH_REPORT_IPv6: + case VIRTIO_NET_HASH_REPORT_IPv6_EX: + rss_hash_type = PKT_HASH_TYPE_L3; + break; + case VIRTIO_NET_HASH_REPORT_NONE: + default: + rss_hash_type = PKT_HASH_TYPE_NONE; + } + skb_set_hash(skb, (unsigned int)hdr_hash->hash_value, rss_hash_type); +} + static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, void *buf, unsigned int len, void **ctx, unsigned int *xdp_xmit, @@ -1182,6 +1212,8 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, return; hdr = skb_vnet_hdr(skb); + if (dev->features & NETIF_F_RXHASH && vi->has_rss_hash_report) + virtio_skb_set_hash((const struct virtio_net_hdr_v1_hash *)hdr, skb); if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -2232,7 +2264,8 @@ static bool virtnet_commit_rss_command(struct virtnet_info *vi) sg_set_buf(&sgs[3], vi->ctrl->rss.key, sg_buf_size); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, - VIRTIO_NET_CTRL_MQ_RSS_CONFIG, sgs)) { + vi->has_rss ? VIRTIO_NET_CTRL_MQ_RSS_CONFIG + : VIRTIO_NET_CTRL_MQ_HASH_CONFIG, sgs)) { dev_warn(&dev->dev, "VIRTIONET issue with committing RSS sgs\n"); return false; } @@ -3231,6 +3264,8 @@ static bool virtnet_validate_features(struct virtio_device *vdev) VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR, "VIRTIO_NET_F_CTRL_VQ") || VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_RSS, + "VIRTIO_NET_F_CTRL_VQ") || + VIRTNET_FAIL_ON(vdev, VIRTIO_NET_F_HASH_REPORT, "VIRTIO_NET_F_CTRL_VQ"))) { return false; } @@ -3366,8 +3401,13 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; - if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) { + if (virtio_has_feature(vdev, VIRTIO_NET_F_HASH_REPORT)) + vi->has_rss_hash_report = true; + + if (virtio_has_feature(vdev, VIRTIO_NET_F_RSS)) vi->has_rss = true; + + if (vi->has_rss || vi->has_rss_hash_report) { vi->rss_indir_table_size = virtio_cread16(vdev, offsetof(struct virtio_net_config, rss_max_indirection_table_length)); @@ -3383,8 +3423,11 @@ static int virtnet_probe(struct virtio_device *vdev) dev->hw_features |= NETIF_F_RXHASH; } - if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || - virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) + + if (vi->has_rss_hash_report) + vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); + else if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF) || + virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf); else vi->hdr_len = sizeof(struct virtio_net_hdr); @@ -3451,7 +3494,7 @@ static int virtnet_probe(struct virtio_device *vdev) } } - if (vi->has_rss) + if (vi->has_rss || vi->has_rss_hash_report) virtnet_init_default_rss(vi); err = register_netdev(dev); @@ -3586,7 +3629,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_RSS + VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT static unsigned int features[] = { VIRTNET_FEATURES, From c11708209df216aee72adf97d13c72c093a4ffce Mon Sep 17 00:00:00 2001 From: Andrew Melnychenko Date: Mon, 28 Mar 2022 20:53:36 +0300 Subject: [PATCH 24/33] drivers/net/virtio_net: Added RSS hash report control. Now it's possible to control supported hashflows. Added hashflow set/get callbacks. Also, disabling RXH_IP_SRC/DST for TCP would disable then for UDP. TCP and UDP supports only: ethtool -U eth0 rx-flow-hash tcp4 sd RXH_IP_SRC + RXH_IP_DST ethtool -U eth0 rx-flow-hash tcp4 sdfn RXH_IP_SRC + RXH_IP_DST + RXH_L4_B_0_1 + RXH_L4_B_2_3 Disabling happens because VirtioNET hashtype for IP doesn't check L4 proto, it works for all IP packets(TCP, UDP, ICMP, etc.). For TCP and UDP, it's possible to set IP+PORT hashes. But disabling IP hashes will disable them for TCP and UDP simultaneously. It's possible to set IP+PORT for TCP/UDP and disable/enable IP for everything else(UDP, ICMP, etc.). Signed-off-by: Andrew Melnychenko Link: https://lore.kernel.org/r/20220328175336.10802-5-andrew@daynix.com Signed-off-by: Michael S. Tsirkin --- drivers/net/virtio_net.c | 141 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 140 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c9472c30e8a2..17eeb4f807e3 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -231,6 +231,7 @@ struct virtnet_info { u8 rss_key_size; u16 rss_indir_table_size; u32 rss_hash_types_supported; + u32 rss_hash_types_saved; /* Has control virtqueue */ bool has_cvq; @@ -2278,6 +2279,7 @@ static void virtnet_init_default_rss(struct virtnet_info *vi) int i = 0; vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; + vi->rss_hash_types_saved = vi->rss_hash_types_supported; vi->ctrl->rss.indirection_table_mask = vi->rss_indir_table_size ? vi->rss_indir_table_size - 1 : 0; vi->ctrl->rss.unclassified_queue = 0; @@ -2293,6 +2295,121 @@ static void virtnet_init_default_rss(struct virtnet_info *vi) netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size); } +static void virtnet_get_hashflow(const struct virtnet_info *vi, struct ethtool_rxnfc *info) +{ + info->data = 0; + switch (info->flow_type) { + case TCP_V4_FLOW: + if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv4) { + info->data = RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3; + } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { + info->data = RXH_IP_SRC | RXH_IP_DST; + } + break; + case TCP_V6_FLOW: + if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_TCPv6) { + info->data = RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3; + } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { + info->data = RXH_IP_SRC | RXH_IP_DST; + } + break; + case UDP_V4_FLOW: + if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv4) { + info->data = RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3; + } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) { + info->data = RXH_IP_SRC | RXH_IP_DST; + } + break; + case UDP_V6_FLOW: + if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_UDPv6) { + info->data = RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3; + } else if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) { + info->data = RXH_IP_SRC | RXH_IP_DST; + } + break; + case IPV4_FLOW: + if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv4) + info->data = RXH_IP_SRC | RXH_IP_DST; + + break; + case IPV6_FLOW: + if (vi->rss_hash_types_saved & VIRTIO_NET_RSS_HASH_TYPE_IPv6) + info->data = RXH_IP_SRC | RXH_IP_DST; + + break; + default: + info->data = 0; + break; + } +} + +static bool virtnet_set_hashflow(struct virtnet_info *vi, struct ethtool_rxnfc *info) +{ + u32 new_hashtypes = vi->rss_hash_types_saved; + bool is_disable = info->data & RXH_DISCARD; + bool is_l4 = info->data == (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3); + + /* supports only 'sd', 'sdfn' and 'r' */ + if (!((info->data == (RXH_IP_SRC | RXH_IP_DST)) | is_l4 | is_disable)) + return false; + + switch (info->flow_type) { + case TCP_V4_FLOW: + new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_TCPv4); + if (!is_disable) + new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 + | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv4 : 0); + break; + case UDP_V4_FLOW: + new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv4 | VIRTIO_NET_RSS_HASH_TYPE_UDPv4); + if (!is_disable) + new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv4 + | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv4 : 0); + break; + case IPV4_FLOW: + new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv4; + if (!is_disable) + new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv4; + break; + case TCP_V6_FLOW: + new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_TCPv6); + if (!is_disable) + new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 + | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_TCPv6 : 0); + break; + case UDP_V6_FLOW: + new_hashtypes &= ~(VIRTIO_NET_RSS_HASH_TYPE_IPv6 | VIRTIO_NET_RSS_HASH_TYPE_UDPv6); + if (!is_disable) + new_hashtypes |= VIRTIO_NET_RSS_HASH_TYPE_IPv6 + | (is_l4 ? VIRTIO_NET_RSS_HASH_TYPE_UDPv6 : 0); + break; + case IPV6_FLOW: + new_hashtypes &= ~VIRTIO_NET_RSS_HASH_TYPE_IPv6; + if (!is_disable) + new_hashtypes = VIRTIO_NET_RSS_HASH_TYPE_IPv6; + break; + default: + /* unsupported flow */ + return false; + } + + /* if unsupported hashtype was set */ + if (new_hashtypes != (new_hashtypes & vi->rss_hash_types_supported)) + return false; + + if (new_hashtypes != vi->rss_hash_types_saved) { + vi->rss_hash_types_saved = new_hashtypes; + vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; + if (vi->dev->features & NETIF_F_RXHASH) + return virtnet_commit_rss_command(vi); + } + + return true; +} static void virtnet_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) @@ -2578,6 +2695,27 @@ static int virtnet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, switch (info->cmd) { case ETHTOOL_GRXRINGS: info->data = vi->curr_queue_pairs; + break; + case ETHTOOL_GRXFH: + virtnet_get_hashflow(vi, info); + break; + default: + rc = -EOPNOTSUPP; + } + + return rc; +} + +static int virtnet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) +{ + struct virtnet_info *vi = netdev_priv(dev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_SRXFH: + if (!virtnet_set_hashflow(vi, info)) + rc = -EINVAL; + break; default: rc = -EOPNOTSUPP; @@ -2606,6 +2744,7 @@ static const struct ethtool_ops virtnet_ethtool_ops = { .get_rxfh = virtnet_get_rxfh, .set_rxfh = virtnet_set_rxfh, .get_rxnfc = virtnet_get_rxnfc, + .set_rxnfc = virtnet_set_rxnfc, }; static void virtnet_freeze_down(struct virtio_device *vdev) @@ -2860,7 +2999,7 @@ static int virtnet_set_features(struct net_device *dev, if ((dev->features ^ features) & NETIF_F_RXHASH) { if (features & NETIF_F_RXHASH) - vi->ctrl->rss.hash_types = vi->rss_hash_types_supported; + vi->ctrl->rss.hash_types = vi->rss_hash_types_saved; else vi->ctrl->rss.hash_types = VIRTIO_NET_HASH_REPORT_NONE; From 7b79edfb862d6b1ecc66479419ae67a7db2d02e3 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 23 Mar 2022 11:15:23 +0800 Subject: [PATCH 25/33] Revert "virtio-pci: harden INTX interrupts" This reverts commit 080cd7c3ac8701081d143a15ba17dd9475313188. Since the MSI-X interrupts hardening will be reverted in the next patch. We will rework the interrupt hardening in the future. Fixes: 080cd7c3ac87 ("virtio-pci: harden INTX interrupts") Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20220323031524.6555-1-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 23 ++--------------------- drivers/virtio/virtio_pci_common.h | 1 - 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index fdbde1db5ec5..3f51fdb7be45 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -30,16 +30,8 @@ void vp_disable_cbs(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) { - /* - * The below synchronize() guarantees that any - * interrupt for this line arriving after - * synchronize_irq() has completed is guaranteed to see - * intx_soft_enabled == false. - */ - WRITE_ONCE(vp_dev->intx_soft_enabled, false); + if (vp_dev->intx_enabled) synchronize_irq(vp_dev->pci_dev->irq); - } for (i = 0; i < vp_dev->msix_vectors; ++i) disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); @@ -51,16 +43,8 @@ void vp_enable_cbs(struct virtio_device *vdev) struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; - if (vp_dev->intx_enabled) { - disable_irq(vp_dev->pci_dev->irq); - /* - * The above disable_irq() provides TSO ordering and - * as such promotes the below store to store-release. - */ - WRITE_ONCE(vp_dev->intx_soft_enabled, true); - enable_irq(vp_dev->pci_dev->irq); + if (vp_dev->intx_enabled) return; - } for (i = 0; i < vp_dev->msix_vectors; ++i) enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); @@ -113,9 +97,6 @@ static irqreturn_t vp_interrupt(int irq, void *opaque) struct virtio_pci_device *vp_dev = opaque; u8 isr; - if (!READ_ONCE(vp_dev->intx_soft_enabled)) - return IRQ_NONE; - /* reading the ISR has the effect of also clearing it so it's very * important to save off the value. */ isr = ioread8(vp_dev->isr); diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 23f6c5c678d5..d3c6f72c7390 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -63,7 +63,6 @@ struct virtio_pci_device { /* MSI-X support */ int msix_enabled; int intx_enabled; - bool intx_soft_enabled; cpumask_var_t *msix_affinity_masks; /* Name strings for interrupts. This size should be enough, * and I'm too lazy to allocate each name separately. */ From eb4cecb453a19b34d5454b49532e09e9cb0c1529 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 23 Mar 2022 11:15:24 +0800 Subject: [PATCH 26/33] Revert "virtio_pci: harden MSI-X interrupts" This reverts commit 9e35276a5344f74d4a3600fc4100b3dd251d5c56. Issue were reported for the drivers that are using affinity managed IRQ where manually toggling IRQ status is not expected. And we forget to enable the interrupts in the restore path as well. In the future, we will rework on the interrupt hardening. Fixes: 9e35276a5344 ("virtio_pci: harden MSI-X interrupts") Reported-by: Marc Zyngier Reported-by: Stefano Garzarella Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20220323031524.6555-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_common.c | 27 ++++++--------------------- drivers/virtio/virtio_pci_common.h | 6 ++---- drivers/virtio/virtio_pci_legacy.c | 5 ++--- drivers/virtio/virtio_pci_modern.c | 6 ++---- 4 files changed, 12 insertions(+), 32 deletions(-) diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 3f51fdb7be45..d724f676608b 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -24,8 +24,8 @@ MODULE_PARM_DESC(force_legacy, "Force legacy mode for transitional virtio 1 devices"); #endif -/* disable irq handlers */ -void vp_disable_cbs(struct virtio_device *vdev) +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); int i; @@ -34,20 +34,7 @@ void vp_disable_cbs(struct virtio_device *vdev) synchronize_irq(vp_dev->pci_dev->irq); for (i = 0; i < vp_dev->msix_vectors; ++i) - disable_irq(pci_irq_vector(vp_dev->pci_dev, i)); -} - -/* enable irq handlers */ -void vp_enable_cbs(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - int i; - - if (vp_dev->intx_enabled) - return; - - for (i = 0; i < vp_dev->msix_vectors; ++i) - enable_irq(pci_irq_vector(vp_dev->pci_dev, i)); + synchronize_irq(pci_irq_vector(vp_dev->pci_dev, i)); } /* the notify function used when creating a virt queue */ @@ -154,8 +141,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-config", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_config_changed, IRQF_NO_AUTOEN, - vp_dev->msix_names[v], + vp_config_changed, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -174,8 +160,7 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names, "%s-virtqueues", name); err = request_irq(pci_irq_vector(vp_dev->pci_dev, v), - vp_vring_interrupt, IRQF_NO_AUTOEN, - vp_dev->msix_names[v], + vp_vring_interrupt, 0, vp_dev->msix_names[v], vp_dev); if (err) goto error; @@ -352,7 +337,7 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned nvqs, "%s-%s", dev_name(&vp_dev->vdev.dev), names[i]); err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, IRQF_NO_AUTOEN, + vring_interrupt, 0, vp_dev->msix_names[msix_vec], vqs[i]); if (err) diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index d3c6f72c7390..eb17a29fc7ef 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -101,10 +101,8 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev) return container_of(vdev, struct virtio_pci_device, vdev); } -/* disable irq handlers */ -void vp_disable_cbs(struct virtio_device *vdev); -/* enable irq handlers */ -void vp_enable_cbs(struct virtio_device *vdev); +/* wait for pending irq handlers */ +void vp_synchronize_vectors(struct virtio_device *vdev); /* the notify function used when creating a virt queue */ bool vp_notify(struct virtqueue *vq); /* the config->del_vqs() implementation */ diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 34141b9abe27..6f4e34ce96b8 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -98,8 +98,8 @@ static void vp_reset(struct virtio_device *vdev) /* Flush out the status write, and flush in device writes, * including MSi-X interrupts, if any. */ vp_legacy_get_status(&vp_dev->ldev); - /* Disable VQ/configuration callbacks. */ - vp_disable_cbs(vdev); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -185,7 +185,6 @@ static void del_vq(struct virtio_pci_vq_info *info) } static const struct virtio_config_ops virtio_pci_config_ops = { - .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .get_status = vp_get_status, diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 5455bc041fb6..30654d3a0b41 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -172,8 +172,8 @@ static void vp_reset(struct virtio_device *vdev) */ while (vp_modern_get_status(mdev)) msleep(1); - /* Disable VQ/configuration callbacks. */ - vp_disable_cbs(vdev); + /* Flush pending VQ/configuration callbacks. */ + vp_synchronize_vectors(vdev); } static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector) @@ -380,7 +380,6 @@ static bool vp_get_shm_region(struct virtio_device *vdev, } static const struct virtio_config_ops virtio_pci_config_nodev_ops = { - .enable_cbs = vp_enable_cbs, .get = NULL, .set = NULL, .generation = vp_generation, @@ -398,7 +397,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { }; static const struct virtio_config_ops virtio_pci_config_ops = { - .enable_cbs = vp_enable_cbs, .get = vp_get, .set = vp_set, .generation = vp_generation, From 3f63a1d7f6f500b6891b1003cec3e23ea4996a2e Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Wed, 23 Mar 2022 14:07:27 +0000 Subject: [PATCH 27/33] virtio: pci: check bar values read from virtio config space virtio pci config structures may in future have non-standard bar values in the bar field. We should anticipate this by skipping any structures containing such a reserved value. The bar value should never change: check for harmful modified values we re-read it from the config space in vp_modern_map_capability(). Also clean up an existing check to consistently use PCI_STD_NUM_BARS. Signed-off-by: Keir Fraser Link: https://lore.kernel.org/r/20220323140727.3499235-1-keirf@google.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_modern.c | 12 +++++++++--- drivers/virtio/virtio_pci_modern_dev.c | 9 ++++++++- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 30654d3a0b41..a2671a20ef77 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -293,7 +293,7 @@ static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id, for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); pos > 0; pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { - u8 type, cap_len, id; + u8 type, cap_len, id, res_bar; u32 tmp32; u64 res_offset, res_length; @@ -315,9 +315,14 @@ static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id, if (id != required_id) continue; - /* Type, and ID match, looks good */ pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap, - bar), bar); + bar), &res_bar); + if (res_bar >= PCI_STD_NUM_BARS) + continue; + + /* Type and ID match, and the BAR value isn't reserved. + * Looks good. + */ /* Read the lower 32bit of length and offset */ pci_read_config_dword(dev, pos + offsetof(struct virtio_pci_cap, @@ -337,6 +342,7 @@ static int virtio_pci_find_shm_cap(struct pci_dev *dev, u8 required_id, length_hi), &tmp32); res_length |= ((u64)tmp32) << 32; + *bar = res_bar; *offset = res_offset; *len = res_length; diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c index e8b3ff2b9fbc..591738ad3d56 100644 --- a/drivers/virtio/virtio_pci_modern_dev.c +++ b/drivers/virtio/virtio_pci_modern_dev.c @@ -35,6 +35,13 @@ vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off, pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length), &length); + /* Check if the BAR may have changed since we requested the region. */ + if (bar >= PCI_STD_NUM_BARS || !(mdev->modern_bars & (1 << bar))) { + dev_err(&dev->dev, + "virtio_pci: bar unexpectedly changed to %u\n", bar); + return NULL; + } + if (length <= start) { dev_err(&dev->dev, "virtio_pci: bad capability len %u (>%u expected)\n", @@ -120,7 +127,7 @@ static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type, &bar); /* Ignore structures with reserved BAR values */ - if (bar > 0x5) + if (bar >= PCI_STD_NUM_BARS) continue; if (type == cfg_type) { From f1781bedea8cae7f26aa0f20a00017ab746d4d4c Mon Sep 17 00:00:00 2001 From: Michael Qiu Date: Mon, 28 Mar 2022 01:48:12 -0400 Subject: [PATCH 28/33] vdpa/mlx5: re-create forwarding rules after mac modified When MAC Address has been modified in guest, we only re-add the Mac to mpfs, it is not enough, because the guest network will not work correctly: the reply package from outside will go straight away to the host VF net interface. This patch recreate the flow rules, and make it work correctly. Signed-off-by: Michael Qiu Link: https://lore.kernel.org/r/1648446492-17614-1-git-send-email-08005325@163.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Eli Cohen --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 45 ++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 5d7f3e8000c6..c0f0ecb82c6f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1475,7 +1475,7 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) virtio_net_ctrl_ack status = VIRTIO_NET_ERR; struct mlx5_core_dev *pfmdev; size_t read; - u8 mac[ETH_ALEN]; + u8 mac[ETH_ALEN], mac_back[ETH_ALEN]; pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev)); switch (cmd) { @@ -1489,6 +1489,9 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) break; } + if (is_zero_ether_addr(mac)) + break; + if (!is_zero_ether_addr(ndev->config.mac)) { if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n", @@ -1503,7 +1506,47 @@ static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd) break; } + /* backup the original mac address so that if failed to add the forward rules + * we could restore it + */ + memcpy(mac_back, ndev->config.mac, ETH_ALEN); + memcpy(ndev->config.mac, mac, ETH_ALEN); + + /* Need recreate the flow table entry, so that the packet could forward back + */ + remove_fwd_to_tir(ndev); + + if (add_fwd_to_tir(ndev)) { + mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n"); + + /* Although it hardly run here, we still need double check */ + if (is_zero_ether_addr(mac_back)) { + mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n"); + break; + } + + /* Try to restore original mac address to MFPS table, and try to restore + * the forward rule entry. + */ + if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) { + mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n", + ndev->config.mac); + } + + if (mlx5_mpfs_add_mac(pfmdev, mac_back)) { + mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n", + mac_back); + } + + memcpy(ndev->config.mac, mac_back, ETH_ALEN); + + if (add_fwd_to_tir(ndev)) + mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n"); + + break; + } + status = VIRTIO_NET_OK; break; From a61280ddddaa45f95b60dd54c05f8e0e5b6810b7 Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:51 +0800 Subject: [PATCH 29/33] vdpa: support exposing the config size to userspace - GET_CONFIG_SIZE: return the size of the virtio config space. The size contains the fields which are conditional on feature bits. Acked-by: Jason Wang Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-2-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 17 +++++++++++++++++ include/linux/vdpa.h | 3 ++- include/uapi/linux/vhost.h | 4 ++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 05f5fd2af58f..04375722c0e5 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -358,6 +358,20 @@ static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp) return 0; } +static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + const struct vdpa_config_ops *ops = vdpa->config; + u32 size; + + size = ops->get_config_size(vdpa); + + if (copy_to_user(argp, &size, sizeof(size))) + return -EFAULT; + + return 0; +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -495,6 +509,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_IOVA_RANGE: r = vhost_vdpa_get_iova_range(v, argp); break; + case VHOST_VDPA_GET_CONFIG_SIZE: + r = vhost_vdpa_get_config_size(v, argp); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 721089bb4c84..a5269191edda 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -207,7 +207,8 @@ struct vdpa_map_file { * @reset: Reset device * @vdev: vdpa device * Returns integer: success (0) or error (< 0) - * @get_config_size: Get the size of the configuration space + * @get_config_size: Get the size of the configuration space includes + * fields that are conditional on feature bits. * @vdev: vdpa device * Returns size_t: configuration size * @get_config: Read from device specific configuration space diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index c998860d7bbc..bc74e95a273a 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -150,4 +150,8 @@ /* Get the valid iova range */ #define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ struct vhost_vdpa_iova_range) + +/* Get the config size */ +#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) + #endif From 81d46d693173a5c86a9b0c648eca1817ad5c0ae5 Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:52 +0800 Subject: [PATCH 30/33] vdpa: change the type of nvqs to u32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change vdpa_device.nvqs and vhost_vdpa.nvqs to use u32 Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-3-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Longpeng <longpeng2@huawei.com>

Acked-by: Jason Wang <jasowang@redhat.com>
 
Reviewed-by: Stefano Garzarella --- drivers/vdpa/vdpa.c | 6 +++--- drivers/vhost/vdpa.c | 10 ++++++---- include/linux/vdpa.h | 6 +++--- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index 1ea525433a5c..2b75c00b1005 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -232,7 +232,7 @@ static int vdpa_name_match(struct device *dev, const void *data) return (strcmp(dev_name(&vdev->dev), data) == 0); } -static int __vdpa_register_device(struct vdpa_device *vdev, int nvqs) +static int __vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { struct device *dev; @@ -257,7 +257,7 @@ static int __vdpa_register_device(struct vdpa_device *vdev, int nvqs) * * Return: Returns an error when fail to add device to vDPA bus */ -int _vdpa_register_device(struct vdpa_device *vdev, int nvqs) +int _vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { if (!vdev->mdev) return -EINVAL; @@ -274,7 +274,7 @@ EXPORT_SYMBOL_GPL(_vdpa_register_device); * * Return: Returns an error when fail to add to vDPA bus */ -int vdpa_register_device(struct vdpa_device *vdev, int nvqs) +int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs) { int err; diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 04375722c0e5..3b7fc2fe45f3 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -42,7 +42,7 @@ struct vhost_vdpa { struct device dev; struct cdev cdev; atomic_t opened; - int nvqs; + u32 nvqs; int virtio_id; int minor; struct eventfd_ctx *config_ctx; @@ -161,7 +161,8 @@ static long vhost_vdpa_set_status(struct vhost_vdpa *v, u8 __user *statusp) struct vdpa_device *vdpa = v->vdpa; const struct vdpa_config_ops *ops = vdpa->config; u8 status, status_old; - int ret, nvqs = v->nvqs; + u32 nvqs = v->nvqs; + int ret; u16 i; if (copy_from_user(&status, statusp, sizeof(status))) @@ -968,7 +969,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) struct vhost_vdpa *v; struct vhost_dev *dev; struct vhost_virtqueue **vqs; - int nvqs, i, r, opened; + int r, opened; + u32 i, nvqs; v = container_of(inode->i_cdev, struct vhost_vdpa, cdev); @@ -1021,7 +1023,7 @@ err: static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) { - int i; + u32 i; for (i = 0; i < v->nvqs; i++) vhost_vdpa_unsetup_vq_irq(v, i); diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index a5269191edda..8943a209202e 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -83,7 +83,7 @@ struct vdpa_device { unsigned int index; bool features_valid; bool use_va; - int nvqs; + u32 nvqs; struct vdpa_mgmt_dev *mdev; }; @@ -338,10 +338,10 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, dev_struct, member)), name, use_va), \ dev_struct, member) -int vdpa_register_device(struct vdpa_device *vdev, int nvqs); +int vdpa_register_device(struct vdpa_device *vdev, u32 nvqs); void vdpa_unregister_device(struct vdpa_device *vdev); -int _vdpa_register_device(struct vdpa_device *vdev, int nvqs); +int _vdpa_register_device(struct vdpa_device *vdev, u32 nvqs); void _vdpa_unregister_device(struct vdpa_device *vdev); /** From b04d910af330b55e1d5d6eb9ecd53a375a9cf81c Mon Sep 17 00:00:00 2001 From: Longpeng Date: Tue, 15 Mar 2022 11:25:53 +0800 Subject: [PATCH 31/33] vdpa: support exposing the count of vqs to userspace - GET_VQS_COUNT: the count of virtqueues that exposed Signed-off-by: Longpeng Link: https://lore.kernel.org/r/20220315032553.455-4-longpeng2@huawei.com Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: Longpeng <longpeng2@huawei.com>
Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 13 +++++++++++++ include/uapi/linux/vhost.h | 3 +++ 2 files changed, 16 insertions(+) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 3b7fc2fe45f3..4c2f0bd06285 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -373,6 +373,16 @@ static long vhost_vdpa_get_config_size(struct vhost_vdpa *v, u32 __user *argp) return 0; } +static long vhost_vdpa_get_vqs_count(struct vhost_vdpa *v, u32 __user *argp) +{ + struct vdpa_device *vdpa = v->vdpa; + + if (copy_to_user(argp, &vdpa->nvqs, sizeof(vdpa->nvqs))) + return -EFAULT; + + return 0; +} + static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, void __user *argp) { @@ -513,6 +523,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, case VHOST_VDPA_GET_CONFIG_SIZE: r = vhost_vdpa_get_config_size(v, argp); break; + case VHOST_VDPA_GET_VQS_COUNT: + r = vhost_vdpa_get_vqs_count(v, argp); + break; default: r = vhost_dev_ioctl(&v->vdev, cmd, argp); if (r == -ENOIOCTLCMD) diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index bc74e95a273a..5d99e7c242a2 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -154,4 +154,7 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + #endif From 03a91c9af2c42ae14afafb829a4b7e6589ab5892 Mon Sep 17 00:00:00 2001 From: Anirudh Rayabharam Date: Sat, 12 Mar 2022 19:41:21 +0530 Subject: [PATCH 32/33] vhost: handle error while adding split ranges to iotlb vhost_iotlb_add_range_ctx() handles the range [0, ULONG_MAX] by splitting it into two ranges and adding them separately. The return value of adding the first range to the iotlb is currently ignored. Check the return value and bail out in case of an error. Signed-off-by: Anirudh Rayabharam Link: https://lore.kernel.org/r/20220312141121.4981-1-mail@anirudhrb.com Signed-off-by: Michael S. Tsirkin Fixes: e2ae38cf3d91 ("vhost: fix hung thread due to erroneous iotlb entries") Reviewed-by: Stefano Garzarella --- drivers/vhost/iotlb.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/iotlb.c b/drivers/vhost/iotlb.c index 40b098320b2a..5829cf2d0552 100644 --- a/drivers/vhost/iotlb.c +++ b/drivers/vhost/iotlb.c @@ -62,8 +62,12 @@ int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, */ if (start == 0 && last == ULONG_MAX) { u64 mid = last / 2; + int err = vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, + perm, opaque); + + if (err) + return err; - vhost_iotlb_add_range_ctx(iotlb, start, mid, addr, perm, opaque); addr += mid + 1; start = mid + 1; } From ad6dc1daaf29f97f23cc810d60ee01c0e83f4c6b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 21 Mar 2022 16:13:03 +0200 Subject: [PATCH 33/33] vdpa/mlx5: Avoid processing works if workqueue was destroyed If mlx5_vdpa gets unloaded while a VM is running, the workqueue will be destroyed. However, vhost might still have reference to the kick function and might attempt to push new works. This could lead to null pointer dereference. To fix this, set mvdev->wq to NULL just before destroying and verify that the workqueue is not NULL in mlx5_vdpa_kick_vq before attempting to push a new work. Fixes: 5262912ef3cf ("vdpa/mlx5: Add support for control VQ and MAC setting") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20220321141303.9586-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index c0f0ecb82c6f..2f4fb09f1e89 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1712,7 +1712,7 @@ static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) return; if (unlikely(is_ctrl_vq_idx(mvdev, idx))) { - if (!mvdev->cvq.ready) + if (!mvdev->wq || !mvdev->cvq.ready) return; wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC); @@ -2779,9 +2779,12 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device * struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev); struct mlx5_vdpa_dev *mvdev = to_mvdev(dev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); + struct workqueue_struct *wq; mlx5_notifier_unregister(mvdev->mdev, &ndev->nb); - destroy_workqueue(mvdev->wq); + wq = mvdev->wq; + mvdev->wq = NULL; + destroy_workqueue(wq); _vdpa_unregister_device(dev); mgtdev->ndev = NULL; }