kvm/irqchip: Speed up KVM_SET_GSI_ROUTING
When starting lots of dataplane devices the bootup takes very long on Christian's s390 with irqfd patches. With larger setups he is even able to trigger some timeouts in some components. Turns out that the KVM_SET_GSI_ROUTING ioctl takes very long (strace claims up to 0.1 sec) when having multiple CPUs. This is caused by the synchronize_rcu and the HZ=100 of s390. By changing the code to use a private srcu we can speed things up. This patch reduces the boot time till mounting root from 8 to 2 seconds on my s390 guest with 100 disks. Uses of hlist_for_each_entry_rcu, hlist_add_head_rcu, hlist_del_init_rcu are fine because they do not have lockdep checks (hlist_for_each_entry_rcu uses rcu_dereference_raw rather than rcu_dereference, and write-sides do not do rcu lockdep at all). Note that we're hardly relying on the "sleepable" part of srcu. We just want SRCU's faster detection of grace periods. Testing was done by Andrew Theurer using netperf tests STREAM, MAERTS and RR. The difference between results "before" and "after" the patch has mean -0.2% and standard deviation 0.6%. Using a paired t-test on the data points says that there is a 2.5% probability that the patch is the cause of the performance difference (rather than a random fluctuation). (Restricting the t-test to RR, which is the most likely to be affected, changes the numbers to respectively -0.3% mean, 0.7% stdev, and 8% probability that the numbers actually say something about the patch. The probability increases mostly because there are fewer data points). Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Michael S. Tsirkin <mst@redhat.com> Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> # s390 Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
parent
57b5981cd3
commit
719d93cd5f
@ -370,6 +370,7 @@ struct kvm {
|
||||
struct mm_struct *mm; /* userspace tied to this vm */
|
||||
struct kvm_memslots *memslots;
|
||||
struct srcu_struct srcu;
|
||||
struct srcu_struct irq_srcu;
|
||||
#ifdef CONFIG_KVM_APIC_ARCHITECTURE
|
||||
u32 bsp_vcpu_id;
|
||||
#endif
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/eventfd.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "iodev.h"
|
||||
@ -118,19 +119,22 @@ static void
|
||||
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
|
||||
{
|
||||
struct _irqfd_resampler *resampler;
|
||||
struct kvm *kvm;
|
||||
struct _irqfd *irqfd;
|
||||
int idx;
|
||||
|
||||
resampler = container_of(kian, struct _irqfd_resampler, notifier);
|
||||
kvm = resampler->kvm;
|
||||
|
||||
kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||
kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
|
||||
resampler->notifier.gsi, 0, false);
|
||||
|
||||
rcu_read_lock();
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
|
||||
list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
|
||||
eventfd_signal(irqfd->resamplefd, 1);
|
||||
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
|
||||
mutex_lock(&kvm->irqfds.resampler_lock);
|
||||
|
||||
list_del_rcu(&irqfd->resampler_link);
|
||||
synchronize_rcu();
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
|
||||
if (list_empty(&resampler->list)) {
|
||||
list_del(&resampler->link);
|
||||
@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
|
||||
unsigned long flags = (unsigned long)key;
|
||||
struct kvm_kernel_irq_routing_entry *irq;
|
||||
struct kvm *kvm = irqfd->kvm;
|
||||
int idx;
|
||||
|
||||
if (flags & POLLIN) {
|
||||
rcu_read_lock();
|
||||
irq = rcu_dereference(irqfd->irq_entry);
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
|
||||
/* An event has been signaled, inject an interrupt */
|
||||
if (irq)
|
||||
kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
|
||||
false);
|
||||
else
|
||||
schedule_work(&irqfd->inject);
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
if (flags & POLLHUP) {
|
||||
@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
}
|
||||
|
||||
list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
|
||||
synchronize_rcu();
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
|
||||
mutex_unlock(&kvm->irqfds.resampler_lock);
|
||||
}
|
||||
@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
|
||||
* another thread calls kvm_irq_routing_update before
|
||||
* we flush workqueue below (we synchronize with
|
||||
* kvm_irq_routing_update using irqfds.lock).
|
||||
* It is paired with synchronize_rcu done by caller
|
||||
* It is paired with synchronize_srcu done by caller
|
||||
* of that function.
|
||||
*/
|
||||
rcu_assign_pointer(irqfd->irq_entry, NULL);
|
||||
@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
|
||||
|
||||
/*
|
||||
* Change irq_routing and irqfd.
|
||||
* Caller must invoke synchronize_rcu afterwards.
|
||||
* Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
|
||||
*/
|
||||
void kvm_irq_routing_update(struct kvm *kvm,
|
||||
struct kvm_irq_routing_table *irq_rt)
|
||||
|
@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
struct kvm_kernel_irq_routing_entry *e;
|
||||
int ret = -EINVAL;
|
||||
struct kvm_irq_routing_table *irq_rt;
|
||||
int idx;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
|
||||
@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
* Since there's no easy way to do this, we only support injecting MSI
|
||||
* which is limited to 1:1 GSI mapping.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
irq_rt = rcu_dereference(kvm->irq_routing);
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
||||
if (irq < irq_rt->nr_rt_entries)
|
||||
hlist_for_each_entry(e, &irq_rt->map[irq], link) {
|
||||
if (likely(e->type == KVM_IRQ_ROUTING_MSI))
|
||||
@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
|
||||
ret = -EWOULDBLOCK;
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del_rcu(&kimn->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_rcu();
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
}
|
||||
|
||||
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
|
||||
bool mask)
|
||||
{
|
||||
struct kvm_irq_mask_notifier *kimn;
|
||||
int gsi;
|
||||
int idx, gsi;
|
||||
|
||||
rcu_read_lock();
|
||||
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
|
||||
if (kimn->irq == gsi)
|
||||
kimn->func(kimn, mask);
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
#include <linux/kvm_host.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/srcu.h>
|
||||
#include <linux/export.h>
|
||||
#include <trace/events/kvm.h>
|
||||
#include "irq.h"
|
||||
@ -33,19 +34,19 @@
|
||||
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
int gsi;
|
||||
int gsi, idx;
|
||||
|
||||
rcu_read_lock();
|
||||
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi) {
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
return true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
|
||||
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
|
||||
{
|
||||
struct kvm_irq_ack_notifier *kian;
|
||||
int gsi;
|
||||
int gsi, idx;
|
||||
|
||||
trace_kvm_ack_irq(irqchip, pin);
|
||||
|
||||
rcu_read_lock();
|
||||
gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
|
||||
if (gsi != -1)
|
||||
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
|
||||
link)
|
||||
if (kian->gsi == gsi)
|
||||
kian->irq_acked(kian);
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
}
|
||||
|
||||
void kvm_register_irq_ack_notifier(struct kvm *kvm,
|
||||
@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
|
||||
mutex_lock(&kvm->irq_lock);
|
||||
hlist_del_init_rcu(&kian->link);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
synchronize_rcu();
|
||||
synchronize_srcu(&kvm->irq_srcu);
|
||||
#ifdef __KVM_HAVE_IOAPIC
|
||||
kvm_vcpu_request_scan_ioapic(kvm);
|
||||
#endif
|
||||
@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
|
||||
bool line_status)
|
||||
{
|
||||
struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
|
||||
int ret = -1, i = 0;
|
||||
int ret = -1, i = 0, idx;
|
||||
struct kvm_irq_routing_table *irq_rt;
|
||||
|
||||
trace_kvm_set_irq(irq, level, irq_source_id);
|
||||
@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
|
||||
* IOAPIC. So set the bit in both. The guest will ignore
|
||||
* writes to the unused one.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
irq_rt = rcu_dereference(kvm->irq_routing);
|
||||
idx = srcu_read_lock(&kvm->irq_srcu);
|
||||
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
||||
if (irq < irq_rt->nr_rt_entries)
|
||||
hlist_for_each_entry(e, &irq_rt->map[irq], link)
|
||||
irq_set[i++] = *e;
|
||||
rcu_read_unlock();
|
||||
srcu_read_unlock(&kvm->irq_srcu, idx);
|
||||
|
||||
while(i--) {
|
||||
int r;
|
||||
@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
|
||||
kvm_irq_routing_update(kvm, new);
|
||||
mutex_unlock(&kvm->irq_lock);
|
||||
|
||||
synchronize_rcu();
|
||||
synchronize_srcu_expedited(&kvm->irq_srcu);
|
||||
|
||||
new = old;
|
||||
r = 0;
|
||||
|
@ -457,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
||||
|
||||
r = kvm_arch_init_vm(kvm, type);
|
||||
if (r)
|
||||
goto out_err_nodisable;
|
||||
goto out_err_no_disable;
|
||||
|
||||
r = hardware_enable_all();
|
||||
if (r)
|
||||
goto out_err_nodisable;
|
||||
goto out_err_no_disable;
|
||||
|
||||
#ifdef CONFIG_HAVE_KVM_IRQCHIP
|
||||
INIT_HLIST_HEAD(&kvm->mask_notifier_list);
|
||||
@ -473,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
||||
r = -ENOMEM;
|
||||
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
|
||||
if (!kvm->memslots)
|
||||
goto out_err_nosrcu;
|
||||
goto out_err_no_srcu;
|
||||
kvm_init_memslots_id(kvm);
|
||||
if (init_srcu_struct(&kvm->srcu))
|
||||
goto out_err_nosrcu;
|
||||
goto out_err_no_srcu;
|
||||
if (init_srcu_struct(&kvm->irq_srcu))
|
||||
goto out_err_no_irq_srcu;
|
||||
for (i = 0; i < KVM_NR_BUSES; i++) {
|
||||
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
|
||||
GFP_KERNEL);
|
||||
@ -505,10 +507,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
|
||||
return kvm;
|
||||
|
||||
out_err:
|
||||
cleanup_srcu_struct(&kvm->irq_srcu);
|
||||
out_err_no_irq_srcu:
|
||||
cleanup_srcu_struct(&kvm->srcu);
|
||||
out_err_nosrcu:
|
||||
out_err_no_srcu:
|
||||
hardware_disable_all();
|
||||
out_err_nodisable:
|
||||
out_err_no_disable:
|
||||
for (i = 0; i < KVM_NR_BUSES; i++)
|
||||
kfree(kvm->buses[i]);
|
||||
kfree(kvm->memslots);
|
||||
|
Loading…
Reference in New Issue
Block a user