From 5f4cb662a0a2533b45656607471571460310a5ca Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 14 Jul 2008 20:36:36 +0200 Subject: [PATCH 01/15] KVM: SVM: allow enabling/disabling NPT by reloading only the architecture module If NPT is enabled after loading both KVM modules on AMD and it should be disabled, both KVM modules must be reloaded. If only the architecture module is reloaded the behavior is undefined. With this patch it is possible to disable NPT only by reloading the kvm_amd module. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 6 ++++++ arch/x86/kvm/svm.c | 3 ++- include/asm-x86/kvm_host.h | 1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b0e4ddca6c18..d087d9c4f2d9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1870,6 +1870,12 @@ void kvm_enable_tdp(void) } EXPORT_SYMBOL_GPL(kvm_enable_tdp); +void kvm_disable_tdp(void) +{ + tdp_enabled = false; +} +EXPORT_SYMBOL_GPL(kvm_disable_tdp); + static void free_mmu_pages(struct kvm_vcpu *vcpu) { struct kvm_mmu_page *sp; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b756e876dce3..951b789cc913 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -453,7 +453,8 @@ static __init int svm_hardware_setup(void) if (npt_enabled) { printk(KERN_INFO "kvm: Nested Paging enabled\n"); kvm_enable_tdp(); - } + } else + kvm_disable_tdp(); return 0; diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index fdde0bedaa90..bc34dc21f178 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -556,6 +556,7 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t gva, u32 error_code); void kvm_enable_tdp(void); +void kvm_disable_tdp(void); int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); int complete_pio(struct kvm_vcpu *vcpu); From 98899aa0e0bf5de05850082be0eb837058c09ea5 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 16 Jul 2008 19:07:10 -0300 Subject: [PATCH 02/15] KVM: task switch: segment base is linear address The segment base is always a linear address, so translate before accessing guest memory. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f1cdb011cff..cd687395e4e7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3223,6 +3223,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct desc_struct *seg_desc) { + gpa_t gpa; struct descriptor_table dtable; u16 index = selector >> 3; @@ -3232,13 +3233,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); return 1; } - return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); + gpa += index * 8; + return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8); } /* allowed just for 8 bytes segments */ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, struct desc_struct *seg_desc) { + gpa_t gpa; struct descriptor_table dtable; u16 index = selector >> 3; @@ -3246,7 +3250,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, if (dtable.limit < index * 8 + 7) return 1; - return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); + gpa += index * 8; + return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8); } static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, @@ -3258,7 +3264,7 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, base_addr |= (seg_desc->base1 << 16); base_addr |= (seg_desc->base2 << 24); - return base_addr; + return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); } static int load_tss_segment32(struct kvm_vcpu *vcpu, From 34198bf8426276a2ce1e97056a0f02d43637e5ae Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 16 Jul 2008 19:07:11 -0300 Subject: [PATCH 03/15] KVM: task switch: use seg regs provided by subarch instead of reading from GDT There is no guarantee that the old TSS descriptor in the GDT contains the proper base address. This is the case for Windows installation's reboot-via-triplefault. Use guest registers instead. Also translate the address properly. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 93 +++++++++++++++------------------------------- 1 file changed, 30 insertions(+), 63 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index cd687395e4e7..27c6ece91da6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3267,54 +3267,6 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); } -static int load_tss_segment32(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_32 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_read_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_32)); -} - -static int save_tss_segment32(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_32 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_write_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_32)); -} - -static int load_tss_segment16(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_16 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_read_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_16)); -} - -static int save_tss_segment16(struct kvm_vcpu *vcpu, - struct desc_struct *seg_desc, - struct tss_segment_16 *tss) -{ - u32 base_addr; - - base_addr = get_tss_base_addr(vcpu, seg_desc); - - return kvm_write_guest(vcpu->kvm, base_addr, tss, - sizeof(struct tss_segment_16)); -} - static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) { struct kvm_segment kvm_seg; @@ -3472,20 +3424,26 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, } static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, - struct desc_struct *cseg_desc, + u32 old_tss_base, struct desc_struct *nseg_desc) { struct tss_segment_16 tss_segment_16; int ret = 0; - if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) + if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, + sizeof tss_segment_16)) goto out; save_state_to_tss16(vcpu, &tss_segment_16); - save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); - if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) + if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, + sizeof tss_segment_16)) goto out; + + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_16, sizeof tss_segment_16)) + goto out; + if (load_state_from_tss16(vcpu, &tss_segment_16)) goto out; @@ -3495,20 +3453,26 @@ out: } static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, - struct desc_struct *cseg_desc, + u32 old_tss_base, struct desc_struct *nseg_desc) { struct tss_segment_32 tss_segment_32; int ret = 0; - if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) + if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, + sizeof tss_segment_32)) goto out; save_state_to_tss32(vcpu, &tss_segment_32); - save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); - if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) + if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, + sizeof tss_segment_32)) goto out; + + if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), + &tss_segment_32, sizeof tss_segment_32)) + goto out; + if (load_state_from_tss32(vcpu, &tss_segment_32)) goto out; @@ -3523,16 +3487,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) struct desc_struct cseg_desc; struct desc_struct nseg_desc; int ret = 0; + u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); + u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); - kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR); + old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); + /* FIXME: Handle errors. Failure to read either TSS or their + * descriptors should generate a pagefault. + */ if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) goto out; - if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) + if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) goto out; - if (reason != TASK_SWITCH_IRET) { int cpl; @@ -3550,8 +3518,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { cseg_desc.type &= ~(1 << 1); //clear the B flag - save_guest_segment_descriptor(vcpu, tr_seg.selector, - &cseg_desc); + save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); } if (reason == TASK_SWITCH_IRET) { @@ -3563,10 +3530,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) kvm_x86_ops->cache_regs(vcpu); if (nseg_desc.type & 8) - ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, + ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, &nseg_desc); else - ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, + ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, &nseg_desc); if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { From 577bdc496614ced56d999bbb425e85adf2386490 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sat, 19 Jul 2008 08:57:05 +0300 Subject: [PATCH 04/15] KVM: Avoid instruction emulation when event delivery is pending When an event (such as an interrupt) is injected, and the stack is shadowed (and therefore write protected), the guest will exit. The current code will see that the stack is shadowed and emulate a few instructions, each time postponing the injection. Eventually the injection may succeed, but at that time the guest may be unwilling to accept the interrupt (for example, the TPR may have changed). This occurs every once in a while during a Windows 2008 boot. Fix by unshadowing the fault address if the fault was due to an event injection. Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 1 + arch/x86/kvm/svm.c | 7 ++++++- arch/x86/kvm/vmx.c | 2 ++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d087d9c4f2d9..2fa231923cf7 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1814,6 +1814,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) spin_unlock(&vcpu->kvm->mmu_lock); return r; } +EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) { diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 951b789cc913..e2ee264740c7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1008,10 +1008,13 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) struct kvm *kvm = svm->vcpu.kvm; u64 fault_address; u32 error_code; + bool event_injection = false; if (!irqchip_in_kernel(kvm) && - is_external_interrupt(exit_int_info)) + is_external_interrupt(exit_int_info)) { + event_injection = true; push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); + } fault_address = svm->vmcb->control.exit_info_2; error_code = svm->vmcb->control.exit_info_1; @@ -1025,6 +1028,8 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) (u32)fault_address, (u32)(fault_address >> 32), handler); + if (event_injection) + kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); } diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0cac63701719..b918fc83435c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2298,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) cr2 = vmcs_readl(EXIT_QUALIFICATION); KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, (u32)((u64)cr2 >> 32), handler); + if (vect_info & VECTORING_INFO_VALID_MASK) + kvm_mmu_unprotect_page_virt(vcpu, cr2); return kvm_mmu_page_fault(vcpu, cr2, error_code); } From c93cd3a58845012df2d658fecd0ac99f7008d753 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Sat, 19 Jul 2008 19:08:07 -0300 Subject: [PATCH 05/15] KVM: task switch: translate guest segment limit to virt-extension byte granular field If 'g' is one then limit is 4kb granular. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 27c6ece91da6..5916191420c7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3184,6 +3184,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, kvm_desct->base |= seg_desc->base2 << 24; kvm_desct->limit = seg_desc->limit0; kvm_desct->limit |= seg_desc->limit << 16; + if (seg_desc->g) { + kvm_desct->limit <<= 12; + kvm_desct->limit |= 0xfff; + } kvm_desct->selector = selector; kvm_desct->type = seg_desc->type; kvm_desct->present = seg_desc->p; From 5ec5726a16245138f5d5305b00a752acb5730076 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 16 Jul 2008 09:21:22 +0800 Subject: [PATCH 06/15] KVM: VMX: Fix bypass_guest_pf enabling when disable EPT in module parameter Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b918fc83435c..f71151d999e4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3305,7 +3305,7 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); - if (cpu_has_vmx_ept()) + if (vm_need_ept()) bypass_guest_pf = 0; if (bypass_guest_pf) From 5fdbcb9dd16f1e89ead127d3ee1a38e3a00cf1ea Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Wed, 16 Jul 2008 09:25:40 +0800 Subject: [PATCH 07/15] KVM: VMX: Fix undefined beaviour of EPT after reload kvm-intel.ko As well as move set base/mask ptes to vmx_init(). Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f71151d999e4..2a69773e3b26 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3118,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) return ERR_PTR(-ENOMEM); allocate_vpid(vmx); - if (id == 0 && vm_need_ept()) { - kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | - VMX_EPT_WRITABLE_MASK | - VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); - kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, - VMX_EPT_FAKE_DIRTY_MASK, 0ull, - VMX_EPT_EXECUTABLE_MASK); - kvm_enable_tdp(); - } err = kvm_vcpu_init(&vmx->vcpu, kvm, id); if (err) @@ -3305,8 +3296,17 @@ static int __init vmx_init(void) vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); - if (vm_need_ept()) + if (vm_need_ept()) { bypass_guest_pf = 0; + kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | + VMX_EPT_WRITABLE_MASK | + VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); + kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, + VMX_EPT_FAKE_DIRTY_MASK, 0ull, + VMX_EPT_EXECUTABLE_MASK); + kvm_enable_tdp(); + } else + kvm_disable_tdp(); if (bypass_guest_pf) kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); From cab7a1eeeb007be309cd99cf14407261a72d2418 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 22 Jul 2008 21:38:18 +0200 Subject: [PATCH 08/15] KVM: ia64: Fix irq disabling leak in error handling code There is a call to local_irq_restore in the normal exit case, so it would seem that there should be one on an error return as well. The semantic patch that finds this problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ expression l; expression E,E1,E2; @@ local_irq_save(l); ... when != local_irq_restore(l) when != spin_unlock_irqrestore(E,l) when any when strict ( if (...) { ... when != local_irq_restore(l) when != spin_unlock_irqrestore(E1,l) + local_irq_restore(l); return ...; } | if (...) + {local_irq_restore(l); return ...; + } | spin_unlock_irqrestore(E2,l); | local_irq_restore(l); ) // Signed-off-by: Julia Lawall Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 2672f4d278ac..7a37d06376be 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -125,9 +125,9 @@ void kvm_arch_hardware_enable(void *garbage) PAGE_KERNEL)); local_irq_save(saved_psr); slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + local_irq_restore(saved_psr); if (slot < 0) return; - local_irq_restore(saved_psr); spin_lock(&vp_lock); status = ia64_pal_vp_init_env(kvm_vsa_base ? @@ -160,9 +160,9 @@ void kvm_arch_hardware_disable(void *garbage) local_irq_save(saved_psr); slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT); + local_irq_restore(saved_psr); if (slot < 0) return; - local_irq_restore(saved_psr); status = ia64_pal_vp_exit_env(host_iva); if (status) @@ -1253,6 +1253,7 @@ static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id) uninit: kvm_vcpu_uninit(vcpu); fail: + local_irq_restore(psr); return r; } From 2bd0ac4eb469ef58c3b1746fccd15da871fc55c4 Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Fri, 25 Jul 2008 15:49:13 +0200 Subject: [PATCH 09/15] KVM: s390: Advertise KVM_CAP_USER_MEMORY KVM_CAP_USER_MEMORY is used by s390, therefore, we should advertise it. Signed-off-by: Carsten Otte Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/s390/kvm/kvm-s390.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1782cbcd2829..fcd41795b550 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -112,7 +112,12 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_dev_ioctl_check_extension(long ext) { - return 0; + switch (ext) { + case KVM_CAP_USER_MEMORY: + return 1; + default: + return 0; + } } /* Section: vm related */ From 99e65c92f2bbf84f43766a8bf701e36817d62822 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 25 Jul 2008 15:50:04 +0200 Subject: [PATCH 10/15] KVM: s390: Fix guest kconfig Cornelia Huck noticed that a modular virtio without kvm guest support leads to a build error in the s390 virtio transport: CONFIG_VIRTIO=m leads to ERROR: "vmem_add_mapping" [drivers/s390/kvm/kvm_virtio.ko] undefined! ERROR: "max_pfn" [drivers/s390/kvm/kvm_virtio.ko] undefined! ERROR: "vmem_remove_mapping" [drivers/s390/kvm/kvm_virtio.ko] undefined! The virtio transport only works with kvm guest support and only as a builtin. Lets change the build process of drivers/s390/kvm/kvm_virtio.c to depend on kvm guest support, which is also a bool. CONFIG_S390_GUEST already selects CONFIG_VIRTIO, that should prevent CONFIG_S390_GUEST=y CONFIG_VIRTIO=n situations. CC: Cornelia Huck Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- drivers/s390/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/s390/kvm/Makefile b/drivers/s390/kvm/Makefile index 4a5ec39f9ca6..0815690ac1e0 100644 --- a/drivers/s390/kvm/Makefile +++ b/drivers/s390/kvm/Makefile @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License (version 2 only) # as published by the Free Software Foundation. -obj-$(CONFIG_VIRTIO) += kvm_virtio.o +obj-$(CONFIG_S390_GUEST) += kvm_virtio.o From 0096369daa9eaaef1a309e5d8167b023af3f998d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 25 Jul 2008 15:51:00 +0200 Subject: [PATCH 11/15] KVM: s390: Change guestaddr type in gaccess All registers are unsigned long types. This patch changes all occurences of guestaddr in gaccess from u64 to unsigned long. Signed-off-by: Martin Schwidefsky Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/s390/kvm/gaccess.h | 62 ++++++++++++++++++++----------------- arch/s390/kvm/sigp.c | 5 +-- include/asm-s390/kvm_host.h | 2 +- 3 files changed, 37 insertions(+), 32 deletions(-) diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 4e0633c413f3..ed60f3a74a85 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -18,11 +18,11 @@ #include static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, - u64 guestaddr) + unsigned long guestaddr) { - u64 prefix = vcpu->arch.sie_block->prefix; - u64 origin = vcpu->kvm->arch.guest_origin; - u64 memsize = vcpu->kvm->arch.guest_memsize; + unsigned long prefix = vcpu->arch.sie_block->prefix; + unsigned long origin = vcpu->kvm->arch.guest_origin; + unsigned long memsize = vcpu->kvm->arch.guest_memsize; if (guestaddr < 2 * PAGE_SIZE) guestaddr += prefix; @@ -37,7 +37,7 @@ static inline void __user *__guestaddr_to_user(struct kvm_vcpu *vcpu, return (void __user *) guestaddr; } -static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int get_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, u64 *result) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -47,10 +47,10 @@ static inline int get_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, if (IS_ERR((void __force *) uptr)) return PTR_ERR((void __force *) uptr); - return get_user(*result, (u64 __user *) uptr); + return get_user(*result, (unsigned long __user *) uptr); } -static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int get_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, u32 *result) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -63,7 +63,7 @@ static inline int get_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, return get_user(*result, (u32 __user *) uptr); } -static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int get_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, u16 *result) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -76,7 +76,7 @@ static inline int get_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, return get_user(*result, (u16 __user *) uptr); } -static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int get_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, u8 *result) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -87,7 +87,7 @@ static inline int get_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, return get_user(*result, (u8 __user *) uptr); } -static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int put_guest_u64(struct kvm_vcpu *vcpu, unsigned long guestaddr, u64 value) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -100,7 +100,7 @@ static inline int put_guest_u64(struct kvm_vcpu *vcpu, u64 guestaddr, return put_user(value, (u64 __user *) uptr); } -static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int put_guest_u32(struct kvm_vcpu *vcpu, unsigned long guestaddr, u32 value) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -113,7 +113,7 @@ static inline int put_guest_u32(struct kvm_vcpu *vcpu, u64 guestaddr, return put_user(value, (u32 __user *) uptr); } -static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int put_guest_u16(struct kvm_vcpu *vcpu, unsigned long guestaddr, u16 value) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -126,7 +126,7 @@ static inline int put_guest_u16(struct kvm_vcpu *vcpu, u64 guestaddr, return put_user(value, (u16 __user *) uptr); } -static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, +static inline int put_guest_u8(struct kvm_vcpu *vcpu, unsigned long guestaddr, u8 value) { void __user *uptr = __guestaddr_to_user(vcpu, guestaddr); @@ -138,7 +138,8 @@ static inline int put_guest_u8(struct kvm_vcpu *vcpu, u64 guestaddr, } -static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest, +static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, + unsigned long guestdest, const void *from, unsigned long n) { int rc; @@ -153,12 +154,12 @@ static inline int __copy_to_guest_slow(struct kvm_vcpu *vcpu, u64 guestdest, return 0; } -static inline int copy_to_guest(struct kvm_vcpu *vcpu, u64 guestdest, +static inline int copy_to_guest(struct kvm_vcpu *vcpu, unsigned long guestdest, const void *from, unsigned long n) { - u64 prefix = vcpu->arch.sie_block->prefix; - u64 origin = vcpu->kvm->arch.guest_origin; - u64 memsize = vcpu->kvm->arch.guest_memsize; + unsigned long prefix = vcpu->arch.sie_block->prefix; + unsigned long origin = vcpu->kvm->arch.guest_origin; + unsigned long memsize = vcpu->kvm->arch.guest_memsize; if ((guestdest < 2 * PAGE_SIZE) && (guestdest + n > 2 * PAGE_SIZE)) goto slowpath; @@ -189,7 +190,8 @@ slowpath: } static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, - u64 guestsrc, unsigned long n) + unsigned long guestsrc, + unsigned long n) { int rc; unsigned long i; @@ -204,11 +206,11 @@ static inline int __copy_from_guest_slow(struct kvm_vcpu *vcpu, void *to, } static inline int copy_from_guest(struct kvm_vcpu *vcpu, void *to, - u64 guestsrc, unsigned long n) + unsigned long guestsrc, unsigned long n) { - u64 prefix = vcpu->arch.sie_block->prefix; - u64 origin = vcpu->kvm->arch.guest_origin; - u64 memsize = vcpu->kvm->arch.guest_memsize; + unsigned long prefix = vcpu->arch.sie_block->prefix; + unsigned long origin = vcpu->kvm->arch.guest_origin; + unsigned long memsize = vcpu->kvm->arch.guest_memsize; if ((guestsrc < 2 * PAGE_SIZE) && (guestsrc + n > 2 * PAGE_SIZE)) goto slowpath; @@ -238,11 +240,12 @@ slowpath: return __copy_from_guest_slow(vcpu, to, guestsrc, n); } -static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest, +static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, + unsigned long guestdest, const void *from, unsigned long n) { - u64 origin = vcpu->kvm->arch.guest_origin; - u64 memsize = vcpu->kvm->arch.guest_memsize; + unsigned long origin = vcpu->kvm->arch.guest_origin; + unsigned long memsize = vcpu->kvm->arch.guest_memsize; if (guestdest + n > memsize) return -EFAULT; @@ -256,10 +259,11 @@ static inline int copy_to_guest_absolute(struct kvm_vcpu *vcpu, u64 guestdest, } static inline int copy_from_guest_absolute(struct kvm_vcpu *vcpu, void *to, - u64 guestsrc, unsigned long n) + unsigned long guestsrc, + unsigned long n) { - u64 origin = vcpu->kvm->arch.guest_origin; - u64 memsize = vcpu->kvm->arch.guest_memsize; + unsigned long origin = vcpu->kvm->arch.guest_origin; + unsigned long memsize = vcpu->kvm->arch.guest_memsize; if (guestsrc + n > memsize) return -EFAULT; diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 5a556114eaa5..170392687ce0 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -43,7 +43,8 @@ #define SIGP_STAT_RECEIVER_CHECK 0x00000001UL -static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) +static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr, + unsigned long *reg) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; int rc; @@ -167,7 +168,7 @@ static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) } static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address, - u64 *reg) + unsigned long *reg) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h index 3234dd5b3511..6583c0d67757 100644 --- a/include/asm-s390/kvm_host.h +++ b/include/asm-s390/kvm_host.h @@ -231,5 +231,5 @@ struct kvm_arch{ struct kvm_s390_float_interrupt float_int; }; -extern int sie64a(struct kvm_s390_sie_block *, __u64 *); +extern int sie64a(struct kvm_s390_sie_block *, unsigned long *); #endif From 3cd612998f17d5b3588be7f4937720411d247ff6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 25 Jul 2008 15:51:54 +0200 Subject: [PATCH 12/15] KVM: s390: Fix program check on interrupt delivery handling The current interrupt handling on s390 misbehaves on an error case. On s390 each cpu has the prefix area (lowcore) for interrupt delivery. This memory must always be available. If we fail to access the prefix area for a guest on interrupt delivery the configuration is completely unusable. There is no point in sending another program interrupt to an inaccessible lowcore. Furthermore, we should not bug the host kernel, because this can be triggered by userspace. I think the guest kernel itself can not trigger the problem, as SET PREFIX and SIGNAL PROCESSOR SET PREFIX both check that the memory is available and sane. As this is a userspace bug (e.g. setting the wrong guest offset, unmapping guest memory) we should kill the userspace process instead of BUGing the host kernel. In the long term we probably should notify the userspace process about this problem. Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/s390/kvm/interrupt.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 11230b0db957..2960702b4824 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" @@ -246,15 +247,10 @@ static void __do_deliver_interrupt(struct kvm_vcpu *vcpu, default: BUG(); } - if (exception) { - VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" - " interrupt"); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - if (inti->type == KVM_S390_PROGRAM_INT) { - printk(KERN_WARNING "kvm: recursive program check\n"); - BUG(); - } + printk("kvm: The guest lowcore is not mapped during interrupt " + "delivery, killing userspace\n"); + do_exit(SIGKILL); } } @@ -277,14 +273,11 @@ static int __try_deliver_ckc_interrupt(struct kvm_vcpu *vcpu) __LC_EXT_NEW_PSW, sizeof(psw_t)); if (rc == -EFAULT) exception = 1; - if (exception) { - VCPU_EVENT(vcpu, 1, "%s", "program exception while delivering" \ - " ckc interrupt"); - kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - return 0; + printk("kvm: The guest lowcore is not mapped during interrupt " + "delivery, killing userspace\n"); + do_exit(SIGKILL); } - return 1; } From f5e10b09a5f8fc40666c95fe0cd6bcc2b8f11437 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 25 Jul 2008 15:52:44 +0200 Subject: [PATCH 13/15] KVM: s390: Fix instruction naming for lctlg Lets fix the name for the lctlg instruction... Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/s390/kvm/intercept.c | 8 ++++---- arch/s390/kvm/kvm-s390.c | 2 +- include/asm-s390/kvm_host.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 47a0b642174c..f94da68a5c22 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -20,7 +20,7 @@ #include "kvm-s390.h" #include "gaccess.h" -static int handle_lctg(struct kvm_vcpu *vcpu) +static int handle_lctlg(struct kvm_vcpu *vcpu) { int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4; int reg3 = vcpu->arch.sie_block->ipa & 0x000f; @@ -30,7 +30,7 @@ static int handle_lctg(struct kvm_vcpu *vcpu) u64 useraddr; int reg, rc; - vcpu->stat.instruction_lctg++; + vcpu->stat.instruction_lctlg++; if ((vcpu->arch.sie_block->ipb & 0xff) != 0x2f) return -ENOTSUPP; @@ -40,7 +40,7 @@ static int handle_lctg(struct kvm_vcpu *vcpu) reg = reg1; - VCPU_EVENT(vcpu, 5, "lctg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, + VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, disp2); do { @@ -99,7 +99,7 @@ static intercept_handler_t instruction_handlers[256] = { [0xae] = kvm_s390_handle_sigp, [0xb2] = kvm_s390_handle_priv, [0xb7] = handle_lctl, - [0xeb] = handle_lctg, + [0xeb] = handle_lctlg, }; static int handle_noop(struct kvm_vcpu *vcpu) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index fcd41795b550..8b00eb2ddf57 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -39,7 +39,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "exit_instruction", VCPU_STAT(exit_instruction) }, { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, - { "instruction_lctg", VCPU_STAT(instruction_lctg) }, + { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, { "instruction_lctl", VCPU_STAT(instruction_lctl) }, { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h index 6583c0d67757..3c55e4107dcc 100644 --- a/include/asm-s390/kvm_host.h +++ b/include/asm-s390/kvm_host.h @@ -111,7 +111,7 @@ struct kvm_vcpu_stat { u32 exit_validity; u32 exit_instruction; u32 instruction_lctl; - u32 instruction_lctg; + u32 instruction_lctlg; u32 exit_program_interruption; u32 exit_instr_and_program; u32 deliver_emergency_signal; From 5a00a5e7a3e013b2323f87c1b69ff9557eae5ec9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 25 Jul 2008 15:53:12 +0200 Subject: [PATCH 14/15] KVM: s390: Fix possible host kernel bug on lctl(g) handling The lctl(g) instructions require a specific alignment for the parameters. The architecture requires a specification program check if these alignments are not used. Enforcing this alignment also removes a possible host BUG, since the get_guest functions check for proper alignment and emits a BUG. Signed-off-by: Christian Borntraeger Signed-off-by: Avi Kivity --- arch/s390/kvm/intercept.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index f94da68a5c22..61236102203e 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -38,6 +38,9 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) if (base2) useraddr += vcpu->arch.guest_gprs[base2]; + if (useraddr & 7) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + reg = reg1; VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, @@ -74,6 +77,9 @@ static int handle_lctl(struct kvm_vcpu *vcpu) if (base2) useraddr += vcpu->arch.guest_gprs[base2]; + if (useraddr & 3) + return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); + VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x,b2:%x,d2:%x", reg1, reg3, base2, disp2); From cc04454fa81e93b5f1b5133950331639d2f59f85 Mon Sep 17 00:00:00 2001 From: Hollis Blanchard Date: Fri, 25 Jul 2008 13:54:50 -0500 Subject: [PATCH 15/15] KVM: ppc: fix invalidation of large guest pages When guest invalidates a large tlb map, there may be more than one corresponding shadow tlb maps that need to be invalidated. Use eaddr and eend to find these shadow tlb maps. Signed-off-by: Liu Yu Signed-off-by: Hollis Blanchard Signed-off-by: Avi Kivity --- arch/powerpc/kvm/44x_tlb.c | 5 +++-- arch/powerpc/kvm/emulate.c | 2 +- include/asm-powerpc/kvm_ppc.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c index 75dff7cfa814..5a5602da5091 100644 --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -177,7 +177,8 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, vcpu->arch.msr & MSR_PR); } -void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid) +void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, + gva_t eend, u32 asid) { unsigned int pid = asid & 0xff; int i; @@ -191,7 +192,7 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid) if (!get_tlb_v(stlbe)) continue; - if (eaddr < get_tlb_eaddr(stlbe)) + if (eend < get_tlb_eaddr(stlbe)) continue; if (eaddr > get_tlb_end(stlbe)) diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 000097461283..8c605d0a5488 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -137,7 +137,7 @@ static int kvmppc_emul_tlbwe(struct kvm_vcpu *vcpu, u32 inst) if (tlbe->word0 & PPC44x_TLB_VALID) { eaddr = get_tlb_eaddr(tlbe); asid = (tlbe->word0 & PPC44x_TLB_TS) | tlbe->tid; - kvmppc_mmu_invalidate(vcpu, eaddr, asid); + kvmppc_mmu_invalidate(vcpu, eaddr, get_tlb_end(tlbe), asid); } switch (ws) { diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h index 5a21115228af..a8b068792260 100644 --- a/include/asm-powerpc/kvm_ppc.h +++ b/include/asm-powerpc/kvm_ppc.h @@ -61,7 +61,8 @@ extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, u32 flags); -extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, u64 eaddr, u64 asid); +extern void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t eaddr, + gva_t eend, u32 asid); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_check_and_deliver_interrupts(struct kvm_vcpu *vcpu);