KVM: nVMX: Allow L1 to use 5-level page walks for nested EPT

Add support for 5-level nested EPT, and advertise said support in the
EPT capabilities MSR.  KVM's MMU can already handle 5-level legacy page
tables, there's no reason to force an L1 VMM to use shadow paging if it
wants to employ 5-level page tables.

Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Sean Christopherson 2020-03-02 18:02:36 -08:00 committed by Paolo Bonzini
parent 8053f924ca
commit bb1fcc70d9
5 changed files with 37 additions and 12 deletions

View File

@ -500,6 +500,18 @@ enum vmcs_field {
VMX_EPT_EXECUTABLE_MASK) VMX_EPT_EXECUTABLE_MASK)
#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) #define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT)
static inline u8 vmx_eptp_page_walk_level(u64 eptp)
{
u64 encoded_level = eptp & VMX_EPTP_PWL_MASK;
if (encoded_level == VMX_EPTP_PWL_5)
return 5;
/* @eptp must be pre-validated by the caller. */
WARN_ON_ONCE(encoded_level != VMX_EPTP_PWL_4);
return 4;
}
/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */
#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ #define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \
VMX_EPT_EXECUTABLE_MASK) VMX_EPT_EXECUTABLE_MASK)

View File

@ -5008,14 +5008,14 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
static union kvm_mmu_role static union kvm_mmu_role
kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty,
bool execonly) bool execonly, u8 level)
{ {
union kvm_mmu_role role = {0}; union kvm_mmu_role role = {0};
/* SMM flag is inherited from root_mmu */ /* SMM flag is inherited from root_mmu */
role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm; role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm;
role.base.level = PT64_ROOT_4LEVEL; role.base.level = level;
role.base.gpte_is_8_bytes = true; role.base.gpte_is_8_bytes = true;
role.base.direct = false; role.base.direct = false;
role.base.ad_disabled = !accessed_dirty; role.base.ad_disabled = !accessed_dirty;
@ -5039,16 +5039,17 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty, gpa_t new_eptp) bool accessed_dirty, gpa_t new_eptp)
{ {
struct kvm_mmu *context = vcpu->arch.mmu; struct kvm_mmu *context = vcpu->arch.mmu;
u8 level = vmx_eptp_page_walk_level(new_eptp);
union kvm_mmu_role new_role = union kvm_mmu_role new_role =
kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty, kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty,
execonly); execonly, level);
__kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false); __kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false);
if (new_role.as_u64 == context->mmu_role.as_u64) if (new_role.as_u64 == context->mmu_role.as_u64)
return; return;
context->shadow_root_level = PT64_ROOT_4LEVEL; context->shadow_root_level = level;
context->nx = true; context->nx = true;
context->ept_ad = accessed_dirty; context->ept_ad = accessed_dirty;
@ -5057,7 +5058,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
context->sync_page = ept_sync_page; context->sync_page = ept_sync_page;
context->invlpg = ept_invlpg; context->invlpg = ept_invlpg;
context->update_pte = ept_update_pte; context->update_pte = ept_update_pte;
context->root_level = PT64_ROOT_4LEVEL; context->root_level = level;
context->direct_map = false; context->direct_map = false;
context->mmu_role.as_u64 = new_role.as_u64; context->mmu_role.as_u64 = new_role.as_u64;

View File

@ -66,7 +66,7 @@
#define PT_GUEST_ACCESSED_SHIFT 8 #define PT_GUEST_ACCESSED_SHIFT 8
#define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
#define CMPXCHG cmpxchg64 #define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 4 #define PT_MAX_FULL_LEVELS PT64_ROOT_MAX_LEVEL
#else #else
#error Invalid PTTYPE value #error Invalid PTTYPE value
#endif #endif

View File

@ -2582,9 +2582,19 @@ static bool valid_ept_address(struct kvm_vcpu *vcpu, u64 address)
return false; return false;
} }
/* only 4 levels page-walk length are valid */ /* Page-walk levels validity. */
if (CC((address & VMX_EPTP_PWL_MASK) != VMX_EPTP_PWL_4)) switch (address & VMX_EPTP_PWL_MASK) {
case VMX_EPTP_PWL_5:
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
return false;
break;
case VMX_EPTP_PWL_4:
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
return false;
break;
default:
return false; return false;
}
/* Reserved bits should not be set */ /* Reserved bits should not be set */
if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f))) if (CC(address >> maxphyaddr || ((address >> 7) & 0x1f)))
@ -6119,8 +6129,11 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
/* nested EPT: emulate EPT also to L1 */ /* nested EPT: emulate EPT also to L1 */
msrs->secondary_ctls_high |= msrs->secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT; SECONDARY_EXEC_ENABLE_EPT;
msrs->ept_caps = VMX_EPT_PAGE_WALK_4_BIT | msrs->ept_caps =
VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT; VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPT_PAGE_WALK_5_BIT |
VMX_EPTP_WB_BIT |
VMX_EPT_INVEPT_BIT;
if (cpu_has_vmx_ept_execute_only()) if (cpu_has_vmx_ept_execute_only())
msrs->ept_caps |= msrs->ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT; VMX_EPT_EXECUTE_ONLY_BIT;

View File

@ -2985,9 +2985,8 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
static int get_ept_level(struct kvm_vcpu *vcpu) static int get_ept_level(struct kvm_vcpu *vcpu)
{ {
/* Nested EPT currently only supports 4-level walks. */
if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu)))
return 4; return vmx_eptp_page_walk_level(nested_ept_get_cr3(vcpu));
if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48))
return 5; return 5;
return 4; return 4;