2008-02-22 12:21:36 -05:00
/*
* KVM paravirt_ops implementation
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 , USA .
*
* Copyright ( C ) 2007 , Red Hat , Inc . , Ingo Molnar < mingo @ redhat . com >
* Copyright IBM Corporation , 2007
* Authors : Anthony Liguori < aliguori @ us . ibm . com >
*/
# include <linux/module.h>
# include <linux/kernel.h>
# include <linux/kvm_para.h>
# include <linux/cpu.h>
# include <linux/mm.h>
2008-02-22 12:21:37 -05:00
# include <linux/highmem.h>
2008-02-22 12:21:38 -05:00
# include <linux/hardirq.h>
2010-10-14 11:22:51 +02:00
# include <linux/notifier.h>
# include <linux/reboot.h>
2010-10-14 11:22:52 +02:00
# include <linux/hash.h>
# include <linux/sched.h>
# include <linux/slab.h>
# include <linux/kprobes.h>
2009-02-11 22:45:42 -02:00
# include <asm/timer.h>
2010-10-14 11:22:51 +02:00
# include <asm/cpu.h>
2010-10-14 11:22:52 +02:00
# include <asm/traps.h>
# include <asm/desc.h>
2010-10-14 11:22:54 +02:00
# include <asm/tlbflush.h>
2012-04-04 15:30:33 +03:00
# include <asm/idle.h>
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
# include <asm/apic.h>
# include <asm/apicdef.h>
2012-07-06 13:47:39 -04:00
# include <asm/hypervisor.h>
2008-02-22 12:21:38 -05:00
2010-10-14 11:22:51 +02:00
static int kvmapf = 1 ;
static int parse_no_kvmapf ( char * arg )
{
kvmapf = 0 ;
return 0 ;
}
early_param ( " no-kvmapf " , parse_no_kvmapf ) ;
2011-07-11 15:28:19 -04:00
static int steal_acc = 1 ;
static int parse_no_stealacc ( char * arg )
{
steal_acc = 0 ;
return 0 ;
}
early_param ( " no-steal-acc " , parse_no_stealacc ) ;
2010-10-14 11:22:51 +02:00
static DEFINE_PER_CPU ( struct kvm_vcpu_pv_apf_data , apf_reason ) __aligned ( 64 ) ;
2011-07-11 15:28:19 -04:00
static DEFINE_PER_CPU ( struct kvm_steal_time , steal_time ) __aligned ( 64 ) ;
static int has_steal_clock = 0 ;
2008-02-22 12:21:38 -05:00
2008-02-22 12:21:36 -05:00
/*
* No need for any " IO delay " on KVM
*/
static void kvm_io_delay ( void )
{
}
2010-10-14 11:22:52 +02:00
# define KVM_TASK_SLEEP_HASHBITS 8
# define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
struct kvm_task_sleep_node {
struct hlist_node link ;
wait_queue_head_t wq ;
u32 token ;
int cpu ;
2010-10-14 11:22:54 +02:00
bool halted ;
2010-10-14 11:22:52 +02:00
} ;
static struct kvm_task_sleep_head {
spinlock_t lock ;
struct hlist_head list ;
} async_pf_sleepers [ KVM_TASK_SLEEP_HASHSIZE ] ;
static struct kvm_task_sleep_node * _find_apf_task ( struct kvm_task_sleep_head * b ,
u32 token )
{
struct hlist_node * p ;
hlist_for_each ( p , & b - > list ) {
struct kvm_task_sleep_node * n =
hlist_entry ( p , typeof ( * n ) , link ) ;
if ( n - > token = = token )
return n ;
}
return NULL ;
}
void kvm_async_pf_task_wait ( u32 token )
{
u32 key = hash_32 ( token , KVM_TASK_SLEEP_HASHBITS ) ;
struct kvm_task_sleep_head * b = & async_pf_sleepers [ key ] ;
struct kvm_task_sleep_node n , * e ;
DEFINE_WAIT ( wait ) ;
2010-10-14 11:22:54 +02:00
int cpu , idle ;
cpu = get_cpu ( ) ;
idle = idle_cpu ( cpu ) ;
put_cpu ( ) ;
2010-10-14 11:22:52 +02:00
spin_lock ( & b - > lock ) ;
e = _find_apf_task ( b , token ) ;
if ( e ) {
/* dummy entry exist -> wake up was delivered ahead of PF */
hlist_del ( & e - > link ) ;
kfree ( e ) ;
spin_unlock ( & b - > lock ) ;
return ;
}
n . token = token ;
n . cpu = smp_processor_id ( ) ;
2010-10-14 11:22:54 +02:00
n . halted = idle | | preempt_count ( ) > 1 ;
2010-10-14 11:22:52 +02:00
init_waitqueue_head ( & n . wq ) ;
hlist_add_head ( & n . link , & b - > list ) ;
spin_unlock ( & b - > lock ) ;
for ( ; ; ) {
2010-10-14 11:22:54 +02:00
if ( ! n . halted )
prepare_to_wait ( & n . wq , & wait , TASK_UNINTERRUPTIBLE ) ;
2010-10-14 11:22:52 +02:00
if ( hlist_unhashed ( & n . link ) )
break ;
2010-10-14 11:22:54 +02:00
if ( ! n . halted ) {
local_irq_enable ( ) ;
schedule ( ) ;
local_irq_disable ( ) ;
} else {
/*
* We cannot reschedule . So halt .
*/
native_safe_halt ( ) ;
local_irq_disable ( ) ;
}
2010-10-14 11:22:52 +02:00
}
2010-10-14 11:22:54 +02:00
if ( ! n . halted )
finish_wait ( & n . wq , & wait ) ;
2010-10-14 11:22:52 +02:00
return ;
}
EXPORT_SYMBOL_GPL ( kvm_async_pf_task_wait ) ;
static void apf_task_wake_one ( struct kvm_task_sleep_node * n )
{
hlist_del_init ( & n - > link ) ;
2010-10-14 11:22:54 +02:00
if ( n - > halted )
smp_send_reschedule ( n - > cpu ) ;
else if ( waitqueue_active ( & n - > wq ) )
2010-10-14 11:22:52 +02:00
wake_up ( & n - > wq ) ;
}
static void apf_task_wake_all ( void )
{
int i ;
for ( i = 0 ; i < KVM_TASK_SLEEP_HASHSIZE ; i + + ) {
struct hlist_node * p , * next ;
struct kvm_task_sleep_head * b = & async_pf_sleepers [ i ] ;
spin_lock ( & b - > lock ) ;
hlist_for_each_safe ( p , next , & b - > list ) {
struct kvm_task_sleep_node * n =
hlist_entry ( p , typeof ( * n ) , link ) ;
if ( n - > cpu = = smp_processor_id ( ) )
apf_task_wake_one ( n ) ;
}
spin_unlock ( & b - > lock ) ;
}
}
void kvm_async_pf_task_wake ( u32 token )
{
u32 key = hash_32 ( token , KVM_TASK_SLEEP_HASHBITS ) ;
struct kvm_task_sleep_head * b = & async_pf_sleepers [ key ] ;
struct kvm_task_sleep_node * n ;
if ( token = = ~ 0 ) {
apf_task_wake_all ( ) ;
return ;
}
again :
spin_lock ( & b - > lock ) ;
n = _find_apf_task ( b , token ) ;
if ( ! n ) {
/*
* async PF was not yet handled .
* Add dummy entry for the token .
*/
2012-05-02 15:04:02 +03:00
n = kzalloc ( sizeof ( * n ) , GFP_ATOMIC ) ;
2010-10-14 11:22:52 +02:00
if ( ! n ) {
/*
* Allocation failed ! Busy wait while other cpu
* handles async PF .
*/
spin_unlock ( & b - > lock ) ;
cpu_relax ( ) ;
goto again ;
}
n - > token = token ;
n - > cpu = smp_processor_id ( ) ;
init_waitqueue_head ( & n - > wq ) ;
hlist_add_head ( & n - > link , & b - > list ) ;
} else
apf_task_wake_one ( n ) ;
spin_unlock ( & b - > lock ) ;
return ;
}
EXPORT_SYMBOL_GPL ( kvm_async_pf_task_wake ) ;
u32 kvm_read_and_reset_pf_reason ( void )
{
u32 reason = 0 ;
if ( __get_cpu_var ( apf_reason ) . enabled ) {
reason = __get_cpu_var ( apf_reason ) . reason ;
__get_cpu_var ( apf_reason ) . reason = 0 ;
}
return reason ;
}
EXPORT_SYMBOL_GPL ( kvm_read_and_reset_pf_reason ) ;
dotraplinkage void __kprobes
do_async_page_fault ( struct pt_regs * regs , unsigned long error_code )
{
switch ( kvm_read_and_reset_pf_reason ( ) ) {
default :
do_page_fault ( regs , error_code ) ;
break ;
case KVM_PV_REASON_PAGE_NOT_PRESENT :
/* page is swapped out by the host. */
kvm_async_pf_task_wait ( ( u32 ) read_cr2 ( ) ) ;
break ;
case KVM_PV_REASON_PAGE_READY :
2012-04-04 15:30:33 +03:00
rcu_irq_enter ( ) ;
exit_idle ( ) ;
2010-10-14 11:22:52 +02:00
kvm_async_pf_task_wake ( ( u32 ) read_cr2 ( ) ) ;
2012-04-04 15:30:33 +03:00
rcu_irq_exit ( ) ;
2010-10-14 11:22:52 +02:00
break ;
}
}
2009-07-02 11:40:36 +06:00
static void __init paravirt_ops_setup ( void )
2008-02-22 12:21:36 -05:00
{
pv_info . name = " KVM " ;
pv_info . paravirt_enabled = 1 ;
if ( kvm_para_has_feature ( KVM_FEATURE_NOP_IO_DELAY ) )
pv_cpu_ops . io_delay = kvm_io_delay ;
2009-02-11 22:45:42 -02:00
# ifdef CONFIG_X86_IO_APIC
no_timer_check = 1 ;
# endif
2008-02-22 12:21:36 -05:00
}
2011-07-11 15:28:19 -04:00
static void kvm_register_steal_time ( void )
{
int cpu = smp_processor_id ( ) ;
struct kvm_steal_time * st = & per_cpu ( steal_time , cpu ) ;
if ( ! has_steal_clock )
return ;
memset ( st , 0 , sizeof ( * st ) ) ;
wrmsrl ( MSR_KVM_STEAL_TIME , ( __pa ( st ) | KVM_MSR_ENABLED ) ) ;
printk ( KERN_INFO " kvm-stealtime: cpu %d, msr %lx \n " ,
cpu , __pa ( st ) ) ;
}
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
static DEFINE_PER_CPU ( unsigned long , kvm_apic_eoi ) = KVM_PV_EOI_DISABLED ;
static void kvm_guest_apic_eoi_write ( u32 reg , u32 val )
{
/**
* This relies on __test_and_clear_bit to modify the memory
* in a way that is atomic with respect to the local CPU .
* The hypervisor only accesses this memory from the local CPU so
* there ' s no need for lock or memory barriers .
* An optimization barrier is implied in apic write .
*/
if ( __test_and_clear_bit ( KVM_PV_EOI_BIT , & __get_cpu_var ( kvm_apic_eoi ) ) )
return ;
apic - > write ( APIC_EOI , APIC_EOI_ACK ) ;
}
2010-10-14 11:22:51 +02:00
void __cpuinit kvm_guest_cpu_init ( void )
{
if ( ! kvm_para_available ( ) )
return ;
if ( kvm_para_has_feature ( KVM_FEATURE_ASYNC_PF ) & & kvmapf ) {
u64 pa = __pa ( & __get_cpu_var ( apf_reason ) ) ;
2010-10-14 11:22:55 +02:00
# ifdef CONFIG_PREEMPT
pa | = KVM_ASYNC_PF_SEND_ALWAYS ;
# endif
2010-10-14 11:22:51 +02:00
wrmsrl ( MSR_KVM_ASYNC_PF_EN , pa | KVM_ASYNC_PF_ENABLED ) ;
__get_cpu_var ( apf_reason ) . enabled = 1 ;
printk ( KERN_INFO " KVM setup async PF for cpu %d \n " ,
smp_processor_id ( ) ) ;
}
2011-07-11 15:28:19 -04:00
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
if ( kvm_para_has_feature ( KVM_FEATURE_PV_EOI ) ) {
unsigned long pa ;
/* Size alignment is implied but just to make it explicit. */
BUILD_BUG_ON ( __alignof__ ( kvm_apic_eoi ) < 4 ) ;
__get_cpu_var ( kvm_apic_eoi ) = 0 ;
pa = __pa ( & __get_cpu_var ( kvm_apic_eoi ) ) | KVM_MSR_ENABLED ;
wrmsrl ( MSR_KVM_PV_EOI_EN , pa ) ;
}
2011-07-11 15:28:19 -04:00
if ( has_steal_clock )
kvm_register_steal_time ( ) ;
2010-10-14 11:22:51 +02:00
}
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
static void kvm_pv_disable_apf ( void )
2010-10-14 11:22:51 +02:00
{
if ( ! __get_cpu_var ( apf_reason ) . enabled )
return ;
wrmsrl ( MSR_KVM_ASYNC_PF_EN , 0 ) ;
__get_cpu_var ( apf_reason ) . enabled = 0 ;
printk ( KERN_INFO " Unregister pv shared memory for cpu %d \n " ,
smp_processor_id ( ) ) ;
}
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
static void kvm_pv_guest_cpu_reboot ( void * unused )
{
/*
* We disable PV EOI before we load a new kernel by kexec ,
* since MSR_KVM_PV_EOI_EN stores a pointer into old kernel ' s memory .
* New kernel can re - enable when it boots .
*/
if ( kvm_para_has_feature ( KVM_FEATURE_PV_EOI ) )
wrmsrl ( MSR_KVM_PV_EOI_EN , 0 ) ;
kvm_pv_disable_apf ( ) ;
}
2010-10-14 11:22:51 +02:00
static int kvm_pv_reboot_notify ( struct notifier_block * nb ,
unsigned long code , void * unused )
{
if ( code = = SYS_RESTART )
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
on_each_cpu ( kvm_pv_guest_cpu_reboot , NULL , 1 ) ;
2010-10-14 11:22:51 +02:00
return NOTIFY_DONE ;
}
static struct notifier_block kvm_pv_reboot_nb = {
. notifier_call = kvm_pv_reboot_notify ,
} ;
2011-07-11 15:28:19 -04:00
static u64 kvm_steal_clock ( int cpu )
{
u64 steal ;
struct kvm_steal_time * src ;
int version ;
src = & per_cpu ( steal_time , cpu ) ;
do {
version = src - > version ;
rmb ( ) ;
steal = src - > steal ;
rmb ( ) ;
} while ( ( version & 1 ) | | ( version ! = src - > version ) ) ;
return steal ;
}
void kvm_disable_steal_time ( void )
{
if ( ! has_steal_clock )
return ;
wrmsr ( MSR_KVM_STEAL_TIME , 0 , 0 ) ;
}
2010-10-14 11:22:49 +02:00
# ifdef CONFIG_SMP
static void __init kvm_smp_prepare_boot_cpu ( void )
{
2010-12-16 11:27:23 +02:00
# ifdef CONFIG_KVM_CLOCK
2010-10-14 11:22:49 +02:00
WARN_ON ( kvm_register_clock ( " primary cpu clock " ) ) ;
2010-12-16 11:27:23 +02:00
# endif
2010-10-14 11:22:51 +02:00
kvm_guest_cpu_init ( ) ;
2010-10-14 11:22:49 +02:00
native_smp_prepare_boot_cpu ( ) ;
}
2010-10-14 11:22:51 +02:00
2011-01-03 00:01:29 +01:00
static void __cpuinit kvm_guest_cpu_online ( void * dummy )
2010-10-14 11:22:51 +02:00
{
kvm_guest_cpu_init ( ) ;
}
static void kvm_guest_cpu_offline ( void * dummy )
{
2011-07-11 15:28:19 -04:00
kvm_disable_steal_time ( ) ;
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
if ( kvm_para_has_feature ( KVM_FEATURE_PV_EOI ) )
wrmsrl ( MSR_KVM_PV_EOI_EN , 0 ) ;
kvm_pv_disable_apf ( ) ;
2010-10-14 11:22:52 +02:00
apf_task_wake_all ( ) ;
2010-10-14 11:22:51 +02:00
}
static int __cpuinit kvm_cpu_notify ( struct notifier_block * self ,
unsigned long action , void * hcpu )
{
int cpu = ( unsigned long ) hcpu ;
switch ( action ) {
case CPU_ONLINE :
case CPU_DOWN_FAILED :
case CPU_ONLINE_FROZEN :
smp_call_function_single ( cpu , kvm_guest_cpu_online , NULL , 0 ) ;
break ;
case CPU_DOWN_PREPARE :
case CPU_DOWN_PREPARE_FROZEN :
smp_call_function_single ( cpu , kvm_guest_cpu_offline , NULL , 1 ) ;
break ;
default :
break ;
}
return NOTIFY_OK ;
}
static struct notifier_block __cpuinitdata kvm_cpu_notifier = {
. notifier_call = kvm_cpu_notify ,
} ;
2010-10-14 11:22:49 +02:00
# endif
2010-10-14 11:22:52 +02:00
static void __init kvm_apf_trap_init ( void )
{
set_intr_gate ( 14 , & async_page_fault ) ;
}
2008-02-22 12:21:36 -05:00
void __init kvm_guest_init ( void )
{
2010-10-14 11:22:52 +02:00
int i ;
2008-02-22 12:21:36 -05:00
if ( ! kvm_para_available ( ) )
return ;
paravirt_ops_setup ( ) ;
2010-10-14 11:22:51 +02:00
register_reboot_notifier ( & kvm_pv_reboot_nb ) ;
2010-10-14 11:22:52 +02:00
for ( i = 0 ; i < KVM_TASK_SLEEP_HASHSIZE ; i + + )
spin_lock_init ( & async_pf_sleepers [ i ] . lock ) ;
if ( kvm_para_has_feature ( KVM_FEATURE_ASYNC_PF ) )
x86_init . irqs . trap_init = kvm_apf_trap_init ;
2011-07-11 15:28:19 -04:00
if ( kvm_para_has_feature ( KVM_FEATURE_STEAL_TIME ) ) {
has_steal_clock = 1 ;
pv_time_ops . steal_clock = kvm_steal_clock ;
}
KVM guest: guest side for eoi avoidance
The idea is simple: there's a bit, per APIC, in guest memory,
that tells the guest that it does not need EOI.
Guest tests it using a single est and clear operation - this is
necessary so that host can detect interrupt nesting - and if set, it can
skip the EOI MSR.
I run a simple microbenchmark to show exit reduction
(note: for testing, need to apply follow-up patch
'kvm: host side for eoi optimization' + a qemu patch
I posted separately, on host):
Before:
Performance counter stats for 'sleep 1s':
47,357 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
5,001 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
22,124 kvm:kvm_apic [99.98%]
49,849 kvm:kvm_exit [99.98%]
21,115 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
22,937 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.98%]
22,207 kvm:kvm_apic_accept_irq [99.98%]
22,421 kvm:kvm_eoi [99.98%]
0 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
57 kvm:kvm_emulate_insn [99.99%]
0 kvm:vcpu_match_mmio [99.99%]
0 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
23,609 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [99.99%]
226 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002100578 seconds time elapsed
After:
Performance counter stats for 'sleep 1s':
28,354 kvm:kvm_entry [99.98%]
0 kvm:kvm_hypercall [99.98%]
0 kvm:kvm_hv_hypercall [99.98%]
1,347 kvm:kvm_pio [99.98%]
0 kvm:kvm_cpuid [99.98%]
1,931 kvm:kvm_apic [99.98%]
29,595 kvm:kvm_exit [99.98%]
24,884 kvm:kvm_inj_virq [99.98%]
0 kvm:kvm_inj_exception [99.98%]
0 kvm:kvm_page_fault [99.98%]
1,986 kvm:kvm_msr [99.98%]
0 kvm:kvm_cr [99.98%]
0 kvm:kvm_pic_set_irq [99.98%]
0 kvm:kvm_apic_ipi [99.99%]
25,953 kvm:kvm_apic_accept_irq [99.99%]
26,132 kvm:kvm_eoi [99.99%]
26,593 kvm:kvm_pv_eoi [99.99%]
0 kvm:kvm_nested_vmrun [99.99%]
0 kvm:kvm_nested_intercepts [99.99%]
0 kvm:kvm_nested_vmexit [99.99%]
0 kvm:kvm_nested_vmexit_inject [99.99%]
0 kvm:kvm_nested_intr_vmexit [99.99%]
0 kvm:kvm_invlpga [99.99%]
0 kvm:kvm_skinit [99.99%]
284 kvm:kvm_emulate_insn [99.99%]
68 kvm:vcpu_match_mmio [99.99%]
68 kvm:kvm_userspace_exit [99.99%]
2 kvm:kvm_set_irq [99.99%]
2 kvm:kvm_ioapic_set_irq [99.99%]
28,288 kvm:kvm_msi_set_irq [99.99%]
1 kvm:kvm_ack_irq [99.99%]
131 kvm:kvm_mmio [100.00%]
588 kvm:kvm_fpu [100.00%]
0 kvm:kvm_age_page [100.00%]
0 kvm:kvm_try_async_get_page [100.00%]
0 kvm:kvm_async_pf_doublefault [100.00%]
0 kvm:kvm_async_pf_not_present [100.00%]
0 kvm:kvm_async_pf_ready [100.00%]
0 kvm:kvm_async_pf_completed
1.002039622 seconds time elapsed
We see that # of exits is almost halved.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2012-06-24 19:24:34 +03:00
if ( kvm_para_has_feature ( KVM_FEATURE_PV_EOI ) ) {
struct apic * * drv ;
for ( drv = __apicdrivers ; drv < __apicdrivers_end ; drv + + ) {
/* Should happen once for each apic */
WARN_ON ( ( * drv ) - > eoi_write = = kvm_guest_apic_eoi_write ) ;
( * drv ) - > eoi_write = kvm_guest_apic_eoi_write ;
}
}
2010-10-14 11:22:49 +02:00
# ifdef CONFIG_SMP
smp_ops . smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu ;
2010-10-14 11:22:51 +02:00
register_cpu_notifier ( & kvm_cpu_notifier ) ;
# else
kvm_guest_cpu_init ( ) ;
2010-10-14 11:22:49 +02:00
# endif
2008-02-22 12:21:36 -05:00
}
2011-07-11 15:28:19 -04:00
2012-07-06 13:47:39 -04:00
static bool __init kvm_detect ( void )
{
if ( ! kvm_para_available ( ) )
return false ;
return true ;
}
const struct hypervisor_x86 x86_hyper_kvm __refconst = {
. name = " KVM " ,
. detect = kvm_detect ,
} ;
EXPORT_SYMBOL_GPL ( x86_hyper_kvm ) ;
2011-07-11 15:28:19 -04:00
static __init int activate_jump_labels ( void )
{
if ( has_steal_clock ) {
2012-02-24 08:31:31 +01:00
static_key_slow_inc ( & paravirt_steal_enabled ) ;
2011-07-11 15:28:19 -04:00
if ( steal_acc )
2012-02-24 08:31:31 +01:00
static_key_slow_inc ( & paravirt_steal_rq_enabled ) ;
2011-07-11 15:28:19 -04:00
}
return 0 ;
}
arch_initcall ( activate_jump_labels ) ;