diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2fc5f10a414e..d226012f849f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -873,6 +873,13 @@ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned lon if ((cr0 ^ old_cr0) & X86_CR0_PG) { kvm_clear_async_pf_completion_queue(vcpu); kvm_async_pf_hash_reset(vcpu); + + /* + * Clearing CR0.PG is defined to flush the TLB from the guest's + * perspective. + */ + if (!(cr0 & X86_CR0_PG)) + kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); } if ((cr0 ^ old_cr0) & KVM_MMU_CR0_ROLE_BITS) @@ -1059,28 +1066,43 @@ EXPORT_SYMBOL_GPL(kvm_is_valid_cr4); void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4) { + if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS) + kvm_mmu_reset_context(vcpu); + /* - * If any role bit is changed, the MMU needs to be reset. - * - * If CR4.PCIDE is changed 1 -> 0, the guest TLB must be flushed. * If CR4.PCIDE is changed 0 -> 1, there is no need to flush the TLB * according to the SDM; however, stale prev_roots could be reused * incorrectly in the future after a MOV to CR3 with NOFLUSH=1, so we - * free them all. KVM_REQ_MMU_RELOAD is fit for the both cases; it - * is slow, but changing CR4.PCIDE is a rare case. - * - * If CR4.PGE is changed, the guest TLB must be flushed. - * - * Note: resetting MMU is a superset of KVM_REQ_MMU_RELOAD and - * KVM_REQ_MMU_RELOAD is a superset of KVM_REQ_TLB_FLUSH_GUEST, hence - * the usage of "else if". + * free them all. This is *not* a superset of KVM_REQ_TLB_FLUSH_GUEST + * or KVM_REQ_TLB_FLUSH_CURRENT, because the hardware TLB is not flushed, + * so fall through. */ - if ((cr4 ^ old_cr4) & KVM_MMU_CR4_ROLE_BITS) - kvm_mmu_reset_context(vcpu); - else if ((cr4 ^ old_cr4) & X86_CR4_PCIDE) + if (!tdp_enabled && + (cr4 & X86_CR4_PCIDE) && !(old_cr4 & X86_CR4_PCIDE)) kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); - else if ((cr4 ^ old_cr4) & X86_CR4_PGE) + + /* + * The TLB has to be flushed for all PCIDs if any of the following + * (architecturally required) changes happen: + * - CR4.PCIDE is changed from 1 to 0 + * - CR4.PGE is toggled + * + * This is a superset of KVM_REQ_TLB_FLUSH_CURRENT. + */ + if (((cr4 ^ old_cr4) & X86_CR4_PGE) || + (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); + + /* + * The TLB has to be flushed for the current PCID if any of the + * following (architecturally required) changes happen: + * - CR4.SMEP is changed from 0 to 1 + * - CR4.PAE is toggled + */ + else if (((cr4 ^ old_cr4) & X86_CR4_PAE) || + ((cr4 & X86_CR4_SMEP) && !(old_cr4 & X86_CR4_SMEP))) + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } EXPORT_SYMBOL_GPL(kvm_post_set_cr4); @@ -11336,15 +11358,17 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) static_call(kvm_x86_update_exception_bitmap)(vcpu); /* - * Reset the MMU context if paging was enabled prior to INIT (which is - * implied if CR0.PG=1 as CR0 will be '0' prior to RESET). Unlike the - * standard CR0/CR4/EFER modification paths, only CR0.PG needs to be - * checked because it is unconditionally cleared on INIT and all other - * paging related bits are ignored if paging is disabled, i.e. CR0.WP, - * CR4, and EFER changes are all irrelevant if CR0.PG was '0'. + * On the standard CR0/CR4/EFER modification paths, there are several + * complex conditions determining whether the MMU has to be reset and/or + * which PCIDs have to be flushed. However, CR0.WP and the paging-related + * bits in CR4 and EFER are irrelevant if CR0.PG was '0'; and a reset+flush + * is needed anyway if CR0.PG was '1' (which can only happen for INIT, as + * CR0 will be '0' prior to RESET). So we only need to check CR0.PG here. */ - if (old_cr0 & X86_CR0_PG) + if (old_cr0 & X86_CR0_PG) { + kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu); kvm_mmu_reset_context(vcpu); + } /* * Intel's SDM states that all TLB entries are flushed on INIT. AMD's