From 8b0a9d42301e45d501d751074a6f767fded680b1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 29 Jun 2015 11:16:25 +0200 Subject: [PATCH 01/10] virtio_net: document VIRTIO_NET_CTRL_GUEST_OFFLOADS Document VIRTIO_NET_CTRL_GUEST_OFFLOADS and the relevant feature bits. Will allow ethtool control of the offloads down the road. Reported-by: Yan Vugenfirer Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_net.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 7bbee79ca293..ec32293a00db 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -34,6 +34,7 @@ /* The feature bitmap for virtio net */ #define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ #define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 2 /* Dynamic offload configuration. */ #define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ #define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ #define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ @@ -226,4 +227,19 @@ struct virtio_net_ctrl_mq { #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 +/* + * Control network offloads + * + * Reconfigures the network offloads that Guest can handle. + * + * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. + * + * Command data format matches the feature bit mask exactly. + * + * See VIRTIO_NET_F_GUEST_* for the list of offloads + * that can be enabled/disabled. + */ +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 + #endif /* _LINUX_VIRTIO_NET_H */ From bcfeacab45e6d419c6bafc0e57ea4b1125e23231 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 16 Jun 2015 18:33:35 +0200 Subject: [PATCH 02/10] vhost: use binary search instead of linear in find_region() For default region layouts performance stays the same as linear search i.e. it takes around 210ns average for translate_desc() that inlines find_region(). But it scales better with larger amount of regions, 235ns BS vs 300ns LS with 55 memory regions and it will be about the same values when allowed number of slots is increased to 509 like it has been done in kvm. Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 9e8e004bb1c3..71bb46813031 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "vhost.h" @@ -663,6 +664,16 @@ int vhost_vq_access_ok(struct vhost_virtqueue *vq) } EXPORT_SYMBOL_GPL(vhost_vq_access_ok); +static int vhost_memory_reg_sort_cmp(const void *p1, const void *p2) +{ + const struct vhost_memory_region *r1 = p1, *r2 = p2; + if (r1->guest_phys_addr < r2->guest_phys_addr) + return 1; + if (r1->guest_phys_addr > r2->guest_phys_addr) + return -1; + return 0; +} + static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { struct vhost_memory mem, *newmem, *oldmem; @@ -682,9 +693,11 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) memcpy(newmem, &mem, size); if (copy_from_user(newmem->regions, m->regions, mem.nregions * sizeof *m->regions)) { - kfree(newmem); + kvfree(newmem); return -EFAULT; } + sort(newmem->regions, newmem->nregions, sizeof(*newmem->regions), + vhost_memory_reg_sort_cmp, NULL); if (!memory_access_ok(d, newmem, 0)) { kfree(newmem); @@ -992,17 +1005,22 @@ EXPORT_SYMBOL_GPL(vhost_dev_ioctl); static const struct vhost_memory_region *find_region(struct vhost_memory *mem, __u64 addr, __u32 len) { - struct vhost_memory_region *reg; - int i; + const struct vhost_memory_region *reg; + int start = 0, end = mem->nregions; - /* linear search is not brilliant, but we really have on the order of 6 - * regions in practice */ - for (i = 0; i < mem->nregions; ++i) { - reg = mem->regions + i; - if (reg->guest_phys_addr <= addr && - reg->guest_phys_addr + reg->memory_size - 1 >= addr) - return reg; + while (start < end) { + int slot = start + (end - start) / 2; + reg = mem->regions + slot; + if (addr >= reg->guest_phys_addr) + end = slot; + else + start = slot + 1; } + + reg = mem->regions + start; + if (addr >= reg->guest_phys_addr && + reg->guest_phys_addr + reg->memory_size > addr) + return reg; return NULL; } From 908a5544cd29ed60114ed60bded6dbe8cdd56326 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Jun 2015 10:59:04 +1000 Subject: [PATCH 03/10] virtio scsi: fix unused variable warning drivers/scsi/virtio_scsi.c: In function 'virtscsi_probe': drivers/scsi/virtio_scsi.c:952:11: warning: unused variable 'host_prot' [-Wunused-variable] int err, host_prot; ^ Signed-off-by: Stephen Rothwell Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- drivers/scsi/virtio_scsi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c index f164f24a4a55..55441c7f3e83 100644 --- a/drivers/scsi/virtio_scsi.c +++ b/drivers/scsi/virtio_scsi.c @@ -944,7 +944,7 @@ static int virtscsi_probe(struct virtio_device *vdev) { struct Scsi_Host *shost; struct virtio_scsi *vscsi; - int err, host_prot; + int err; u32 sg_elems, num_targets; u32 cmd_per_lun; u32 num_queues; @@ -1003,6 +1003,8 @@ static int virtscsi_probe(struct virtio_device *vdev) shost->nr_hw_queues = num_queues; if (virtio_has_feature(vdev, VIRTIO_SCSI_F_T10_PI)) { + int host_prot; + host_prot = SHOST_DIF_TYPE1_PROTECTION | SHOST_DIF_TYPE2_PROTECTION | SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE1_PROTECTION | SHOST_DIX_TYPE2_PROTECTION | SHOST_DIX_TYPE3_PROTECTION; From d768f32aec8c0ebb8499ffca89cfed8f5f1a4432 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 2 Jul 2015 09:21:22 +0200 Subject: [PATCH 04/10] virtio: Fix typecast of pointer in vring_init() The virtio_ring.h header is used in userspace programs (ie. QEMU), too. Here we can not assume that sizeof(pointer) is the same as sizeof(long), e.g. when compiling for Windows, so the typecast in vring_init() should be done with (uintptr_t) instead of (unsigned long). Signed-off-by: Thomas Huth Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ring.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 915980ac68df..c07295969b7e 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -31,6 +31,9 @@ * SUCH DAMAGE. * * Copyright Rusty Russell IBM Corporation 2007. */ +#ifndef __KERNEL__ +#include +#endif #include #include @@ -143,7 +146,7 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p, vr->num = num; vr->desc = p; vr->avail = p + num*sizeof(struct vring_desc); - vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__virtio16) + vr->used = (void *)(((uintptr_t)&vr->avail->ring[num] + sizeof(__virtio16) + align-1) & ~(align - 1)); } From 3121bb023e2db4f00ed6678898c09e35ed4b5301 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 2 Jul 2015 10:56:49 +0200 Subject: [PATCH 05/10] virtio: define virtio_pci_cfg_cap in header. We already have VIRTIO_PCI_CAP_PCI_CFG, let's define the structure that goes with it. Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_pci.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h index 75301468359f..90007a1abcab 100644 --- a/include/uapi/linux/virtio_pci.h +++ b/include/uapi/linux/virtio_pci.h @@ -157,6 +157,12 @@ struct virtio_pci_common_cfg { __le32 queue_used_hi; /* read-write */ }; +/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + __u8 pci_cfg_data[4]; /* Data for BAR access. */ +}; + /* Macro versions of offsets for the Old Timers! */ #define VIRTIO_PCI_CAP_VNDR 0 #define VIRTIO_PCI_CAP_NEXT 1 From f2dbda3b4fc2833f0a6240ac28b0e3a17f09893b Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 1 Jul 2015 17:15:37 +0200 Subject: [PATCH 06/10] MAINTAINERS: separate section for s390 virtio drivers The s390-specific virtio drivers have probably more to do with virtio than with kvm today; let's move them out into a separate section to reflect this and to be able to add relevant mailing lists. CC: Christian Borntraeger Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index af802b357b6a..280a5681d5b2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5690,7 +5690,6 @@ S: Supported F: Documentation/s390/kvm.txt F: arch/s390/include/asm/kvm* F: arch/s390/kvm/ -F: drivers/s390/kvm/ KERNEL VIRTUAL MACHINE (KVM) FOR ARM M: Christoffer Dall @@ -10571,6 +10570,15 @@ F: drivers/block/virtio_blk.c F: include/linux/virtio_*.h F: include/uapi/linux/virtio_*.h +VIRTIO DRIVERS FOR S390 +M: Christian Borntraeger +M: Cornelia Huck +L: linux-s390@vger.kernel.org +L: virtualization@lists.linux-foundation.org +L: kvm@vger.kernel.org +S: Supported +F: drivers/s390/kvm/ + VIRTIO HOST (VHOST) M: "Michael S. Tsirkin" L: kvm@vger.kernel.org From 1b568d934eec1c5c99565c41f6c8ca66e9743e96 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 7 Jul 2015 11:41:01 +0200 Subject: [PATCH 07/10] virtio/s390: rename drivers/s390/kvm -> drivers/s390/virtio This more accurately reflects what these drivers actually do. Suggested-by: Paolo Bonzini Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 2 +- drivers/s390/Makefile | 2 +- drivers/s390/{kvm => virtio}/Makefile | 0 drivers/s390/{kvm => virtio}/kvm_virtio.c | 0 drivers/s390/{kvm => virtio}/virtio_ccw.c | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename drivers/s390/{kvm => virtio}/Makefile (100%) rename drivers/s390/{kvm => virtio}/kvm_virtio.c (100%) rename drivers/s390/{kvm => virtio}/virtio_ccw.c (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 280a5681d5b2..fbef7d0a4dd5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10577,7 +10577,7 @@ L: linux-s390@vger.kernel.org L: virtualization@lists.linux-foundation.org L: kvm@vger.kernel.org S: Supported -F: drivers/s390/kvm/ +F: drivers/s390/virtio/ VIRTIO HOST (VHOST) M: "Michael S. Tsirkin" diff --git a/drivers/s390/Makefile b/drivers/s390/Makefile index 95bccfd3f169..e5225ad9c5b1 100644 --- a/drivers/s390/Makefile +++ b/drivers/s390/Makefile @@ -2,7 +2,7 @@ # Makefile for the S/390 specific device drivers # -obj-y += cio/ block/ char/ crypto/ net/ scsi/ kvm/ +obj-y += cio/ block/ char/ crypto/ net/ scsi/ virtio/ drivers-y += drivers/s390/built-in.o diff --git a/drivers/s390/kvm/Makefile b/drivers/s390/virtio/Makefile similarity index 100% rename from drivers/s390/kvm/Makefile rename to drivers/s390/virtio/Makefile diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/virtio/kvm_virtio.c similarity index 100% rename from drivers/s390/kvm/kvm_virtio.c rename to drivers/s390/virtio/kvm_virtio.c diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c similarity index 100% rename from drivers/s390/kvm/virtio_ccw.c rename to drivers/s390/virtio/virtio_ccw.c From ea52bf8eda9832ad30e9f059c5ead8d44f882a53 Mon Sep 17 00:00:00 2001 From: Pierre Morel Date: Thu, 9 Jul 2015 14:58:26 +0200 Subject: [PATCH 08/10] 9p/trans_virtio: reset virtio device on remove On device shutdown/removal, virtio drivers need to trigger a reset on the device; if this is neglected, the virtio core will complain about non-zero device status. This patch resets the status when the 9p virtio driver is removed from the system by calling vdev->config->reset on the virtio_device to send a reset to the host virtio device. Signed-off-by: Pierre Morel Reviewed-by: Cornelia Huck Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/9p/trans_virtio.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 9dd49ca67dbc..6e70ddb158b4 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -704,6 +704,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) mutex_unlock(&virtio_9p_lock); + vdev->config->reset(vdev); vdev->config->del_vqs(vdev); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); From 4de7255f7d2be5e51664c6ac6011ffd6e5463571 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 1 Jul 2015 11:07:09 +0200 Subject: [PATCH 09/10] vhost: extend memory regions allocation to vmalloc with large number of memory regions we could end up with high order allocations and kmalloc could fail if host is under memory pressure. Considering that memory regions array is used on hot path try harder to allocate using kmalloc and if it fails resort to vmalloc. It's still better than just failing vhost_set_memory() and causing guest crash due to it when a new memory hotplugged to guest. I'll still look at QEMU side solution to reduce amount of memory regions it feeds to vhost to make things even better, but it doesn't hurt for kernel to behave smarter and don't crash older QEMU's which could use large amount of memory regions. Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 71bb46813031..a4ac369f6adb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -544,7 +545,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) fput(dev->log_file); dev->log_file = NULL; /* No one will access memory at this point */ - kfree(dev->memory); + kvfree(dev->memory); dev->memory = NULL; WARN_ON(!list_empty(&dev->work_list)); if (dev->worker) { @@ -674,6 +675,18 @@ static int vhost_memory_reg_sort_cmp(const void *p1, const void *p2) return 0; } +static void *vhost_kvzalloc(unsigned long size) +{ + void *n = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); + + if (!n) { + n = vzalloc(size); + if (!n) + return ERR_PTR(-ENOMEM); + } + return n; +} + static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) { struct vhost_memory mem, *newmem, *oldmem; @@ -686,7 +699,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return -EOPNOTSUPP; if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS) return -E2BIG; - newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL); + newmem = vhost_kvzalloc(size + mem.nregions * sizeof(*m->regions)); if (!newmem) return -ENOMEM; @@ -700,7 +713,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) vhost_memory_reg_sort_cmp, NULL); if (!memory_access_ok(d, newmem, 0)) { - kfree(newmem); + kvfree(newmem); return -EFAULT; } oldmem = d->memory; @@ -712,7 +725,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) d->vqs[i]->memory = newmem; mutex_unlock(&d->vqs[i]->mutex); } - kfree(oldmem); + kvfree(oldmem); return 0; } From c9ce42f72fd0ba180fd35539829e4139dca31494 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Thu, 2 Jul 2015 15:08:11 +0200 Subject: [PATCH 10/10] vhost: add max_mem_regions module parameter it became possible to use a bigger amount of memory slots, which is used by memory hotplug for registering hotplugged memory. However QEMU crashes if it's used with more than ~60 pc-dimm devices and vhost-net enabled since host kernel in module vhost-net refuses to accept more than 64 memory regions. Allow to tweak limit via max_mem_regions module paramemter with default value set to 64 slots. Signed-off-by: Igor Mammedov Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index a4ac369f6adb..a9fe859f43c8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -30,8 +30,12 @@ #include "vhost.h" +static ushort max_mem_regions = 64; +module_param(max_mem_regions, ushort, 0444); +MODULE_PARM_DESC(max_mem_regions, + "Maximum number of memory regions in memory map. (default: 64)"); + enum { - VHOST_MEMORY_MAX_NREGIONS = 64, VHOST_MEMORY_F_LOG = 0x1, }; @@ -697,7 +701,7 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) return -EFAULT; if (mem.padding) return -EOPNOTSUPP; - if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS) + if (mem.nregions > max_mem_regions) return -E2BIG; newmem = vhost_kvzalloc(size + mem.nregions * sizeof(*m->regions)); if (!newmem)