From c4e0e4ab4cf3ec2b3f0b628ead108d677644ebd9 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 11 May 2020 15:56:16 -0700 Subject: [PATCH 1/6] KVM: x86: Fix off-by-one error in kvm_vcpu_ioctl_x86_setup_mce Bank_num is a one-based count of banks, not a zero-based index. It overflows the allocated space only when strictly greater than KVM_MAX_MCE_BANKS. Fixes: a9e38c3e01ad ("KVM: x86: Catch potential overrun in MCE setup") Signed-off-by: Jue Wang Signed-off-by: Jim Mattson Reviewed-by: Peter Shier Message-Id: <20200511225616.19557-1-jmattson@google.com> Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d11eba8b85c6..c17e6eb9ad43 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3759,7 +3759,7 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, unsigned bank_num = mcg_cap & 0xff, bank; r = -EINVAL; - if (!bank_num || bank_num >= KVM_MAX_MCE_BANKS) + if (!bank_num || bank_num > KVM_MAX_MCE_BANKS) goto out; if (mcg_cap & ~(kvm_mce_cap_supported | 0xff | 0xff0000)) goto out; From d43e2675e96fc6ae1a633b6a69d296394448cc32 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 May 2020 05:34:41 -0400 Subject: [PATCH 2/6] KVM: x86: only do L1TF workaround on affected processors KVM stores the gfn in MMIO SPTEs as a caching optimization. These are split in two parts, as in "[high 11111 low]", to thwart any attempt to use these bits in an L1TF attack. This works as long as there are 5 free bits between MAXPHYADDR and bit 50 (inclusive), leaving bit 51 free so that the MMIO access triggers a reserved-bit-set page fault. The bit positions however were computed wrongly for AMD processors that have encryption support. In this case, x86_phys_bits is reduced (for example from 48 to 43, to account for the C bit at position 47 and four bits used internally to store the SEV ASID and other stuff) while x86_cache_bits in would remain set to 48, and _all_ bits between the reduced MAXPHYADDR and bit 51 are set. Then low_phys_bits would also cover some of the bits that are set in the shadow_mmio_value, terribly confusing the gfn caching mechanism. To fix this, avoid splitting gfns as long as the processor does not have the L1TF bug (which includes all AMD processors). When there is no splitting, low_phys_bits can be set to the reduced MAXPHYADDR removing the overlap. This fixes "npt=0" operation on EPYC processors. Thanks to Maxim Levitsky for bisecting this bug. Cc: stable@vger.kernel.org Fixes: 52918ed5fcf0 ("KVM: SVM: Override default MMIO mask if memory encryption is enabled") Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 8071952e9cf2..86619631ff6a 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -335,6 +335,8 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); BUG_ON((mmio_mask & mmio_value) != mmio_value); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; @@ -583,16 +585,15 @@ static void kvm_mmu_reset_all_pte_masks(void) * the most significant bits of legal physical address space. */ shadow_nonpresent_or_rsvd_mask = 0; - low_phys_bits = boot_cpu_data.x86_cache_bits; - if (boot_cpu_data.x86_cache_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) { + low_phys_bits = boot_cpu_data.x86_phys_bits; + if (boot_cpu_has_bug(X86_BUG_L1TF) && + !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= + 52 - shadow_nonpresent_or_rsvd_mask_len)) { + low_phys_bits = boot_cpu_data.x86_cache_bits + - shadow_nonpresent_or_rsvd_mask_len; shadow_nonpresent_or_rsvd_mask = - rsvd_bits(boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len, - boot_cpu_data.x86_cache_bits - 1); - low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; - } else - WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF)); + rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); + } shadow_nonpresent_or_rsvd_lower_gfn_mask = GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); From 6129ed877d409037b79866327102c9dc59a302fe Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 27 May 2020 01:49:09 -0700 Subject: [PATCH 3/6] KVM: x86/mmu: Set mmio_value to '0' if reserved #PF can't be generated Set the mmio_value to '0' instead of simply clearing the present bit to squash a benign warning in kvm_mmu_set_mmio_spte_mask() that complains about the mmio_value overlapping the lower GFN mask on systems with 52 bits of PA space. Opportunistically clean up the code and comments. Cc: stable@vger.kernel.org Fixes: d43e2675e96fc ("KVM: x86: only do L1TF workaround on affected processors") Signed-off-by: Sean Christopherson Message-Id: <20200527084909.23492-1-sean.j.christopherson@intel.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu/mmu.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 86619631ff6a..92d056954194 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -6143,25 +6143,16 @@ static void kvm_set_mmio_spte_mask(void) u64 mask; /* - * Set the reserved bits and the present bit of an paging-structure - * entry to generate page fault with PFER.RSV = 1. + * Set a reserved PA bit in MMIO SPTEs to generate page faults with + * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT + * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports + * 52-bit physical addresses then there are no reserved PA bits in the + * PTEs and so the reserved PA approach must be disabled. */ - - /* - * Mask the uppermost physical address bit, which would be reserved as - * long as the supported physical address width is less than 52. - */ - mask = 1ull << 51; - - /* Set the present bit. */ - mask |= 1ull; - - /* - * If reserved bit is not supported, clear the present bit to disable - * mmio page fault. - */ - if (shadow_phys_bits == 52) - mask &= ~1ull; + if (shadow_phys_bits < 52) + mask = BIT_ULL(51) | PT_PRESENT_MASK; + else + mask = 0; kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); } From 0abcc8f65cc23b65bc8d1614cc64b02b1641ed7c Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Sat, 23 May 2020 19:14:54 +0300 Subject: [PATCH 4/6] KVM: VMX: enable X86_FEATURE_WAITPKG in KVM capabilities Even though we might not allow the guest to use WAITPKG's new instructions, we should tell KVM that the feature is supported by the host CPU. Note that vmx_waitpkg_supported checks that WAITPKG _can_ be set in secondary execution controls as specified by VMX capability MSR, rather that we actually enable it for a guest. Cc: stable@vger.kernel.org Fixes: e69e72faa3a0 ("KVM: x86: Add support for user wait instructions") Suggested-by: Paolo Bonzini Signed-off-by: Maxim Levitsky Message-Id: <20200523161455.3940-2-mlevitsk@redhat.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/vmx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 89c766fad889..9b63ac8d97ee 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7138,6 +7138,9 @@ static __init void vmx_set_cpu_caps(void) /* CPUID 0x80000001 */ if (!cpu_has_vmx_rdtscp()) kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + + if (vmx_waitpkg_supported()) + kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); } static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) From f4cfcd2d5aea4e96c5d483c476f3057b6b7baf6a Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Sat, 23 May 2020 19:14:55 +0300 Subject: [PATCH 5/6] KVM: x86: don't expose MSR_IA32_UMWAIT_CONTROL unconditionally This msr is only available when the host supports WAITPKG feature. This breaks a nested guest, if the L1 hypervisor is set to ignore unknown msrs, because the only other safety check that the kernel does is that it attempts to read the msr and rejects it if it gets an exception. Cc: stable@vger.kernel.org Fixes: 6e3ba4abce ("KVM: vmx: Emulate MSR IA32_UMWAIT_CONTROL") Signed-off-by: Maxim Levitsky Message-Id: <20200523161455.3940-3-mlevitsk@redhat.com> Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c17e6eb9ad43..e0083a08da9e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5242,6 +5242,10 @@ static void kvm_init_msr_list(void) if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) continue; break; + case MSR_IA32_UMWAIT_CONTROL: + if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG)) + continue; + break; case MSR_IA32_RTIT_CTL: case MSR_IA32_RTIT_STATUS: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) From e7581caca4c105d81a490a3e15cf46d6407e3fa7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 May 2020 05:04:49 -0400 Subject: [PATCH 6/6] KVM: x86: simplify is_mmio_spte We can simply look at bits 52-53 to identify MMIO entries in KVM's page tables. Therefore, there is no need to pass a mask to kvm_mmu_set_mmio_spte_mask. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.h | 2 +- arch/x86/kvm/mmu/mmu.c | 10 +++------- arch/x86/kvm/svm/svm.c | 2 +- arch/x86/kvm/vmx/vmx.c | 3 +-- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 8a3b1bce722a..048e865ad485 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -51,7 +51,7 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask); +void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 92d056954194..bef05a437a89 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -244,7 +244,6 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; -static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_mmio_value; static u64 __read_mostly shadow_mmio_access_mask; static u64 __read_mostly shadow_present_mask; @@ -331,21 +330,19 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, kvm_flush_remote_tlbs_with_range(kvm, &range); } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) +void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); - BUG_ON((mmio_mask & mmio_value) != mmio_value); WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; - shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); static bool is_mmio_spte(u64 spte) { - return (spte & shadow_mmio_mask) == shadow_mmio_value; + return (spte & SPTE_SPECIAL_MASK) == SPTE_MMIO_MASK; } static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) @@ -568,7 +565,6 @@ static void kvm_mmu_reset_all_pte_masks(void) shadow_dirty_mask = 0; shadow_nx_mask = 0; shadow_x_mask = 0; - shadow_mmio_mask = 0; shadow_present_mask = 0; shadow_acc_track_mask = 0; @@ -6154,7 +6150,7 @@ static void kvm_set_mmio_spte_mask(void) else mask = 0; - kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); + kvm_mmu_set_mmio_spte_mask(mask, ACC_WRITE_MASK | ACC_USER_MASK); } static bool get_nx_auto_mode(void) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a862c768fd54..3c9b321a420a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -780,7 +780,7 @@ static __init void svm_adjust_mmio_mask(void) */ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; - kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); + kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK); } static void svm_hardware_teardown(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9b63ac8d97ee..65ff16f00c08 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4147,8 +4147,7 @@ static void ept_set_mmio_spte_mask(void) * EPT Misconfigurations can be generated if the value of bits 2:0 * of an EPT paging-structure entry is 110b (write/execute). */ - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE, 0); + kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0); } #define VMX_XSS_EXIT_BITMAP 0