KVM: MMU: speedup update_permission_bitmask

update_permission_bitmask currently does a 128-iteration loop to,
essentially, compute a constant array.  Computing the 8 bits in parallel
reduces it to 16 iterations, and is enough to speed it up substantially
because many boolean operations in the inner loop become constants or
simplify noticeably.

Because update_permission_bitmask is actually the top item in the profile
for nested vmexits, this speeds up an L2->L1 vmexit by about ten thousand
clock cycles, or up to 30%:

                                         before     after
   cpuid                                 35173      25954
   vmcall                                35122      27079
   inl_from_pmtimer                      52635      42675
   inl_from_qemu                         53604      44599
   inl_from_kernel                       38498      30798
   outl_to_kernel                        34508      28816
   wr_tsc_adjust_msr                     34185      26818
   rd_tsc_adjust_msr                     37409      27049
   mmio-no-eventfd:pci-mem               50563      45276
   mmio-wildcard-eventfd:pci-mem         34495      30823
   mmio-datamatch-eventfd:pci-mem        35612      31071
   portio-no-eventfd:pci-io              44925      40661
   portio-wildcard-eventfd:pci-io        29708      27269
   portio-datamatch-eventfd:pci-io       31135      27164

(I wrote a small C program to compare the tables for all values of CR0.WP,
CR4.SMAP and CR4.SMEP, and they match).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
This commit is contained in:
Paolo Bonzini 2017-08-24 17:37:25 +02:00
parent fd8cb43373
commit 09f037aa48

View File

@ -4204,39 +4204,63 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
boot_cpu_data.x86_phys_bits, execonly);
}
#define BYTE_MASK(access) \
((1 & (access) ? 2 : 0) | \
(2 & (access) ? 4 : 0) | \
(3 & (access) ? 8 : 0) | \
(4 & (access) ? 16 : 0) | \
(5 & (access) ? 32 : 0) | \
(6 & (access) ? 64 : 0) | \
(7 & (access) ? 128 : 0))
static void update_permission_bitmask(struct kvm_vcpu *vcpu,
struct kvm_mmu *mmu, bool ept)
{
unsigned bit, byte, pfec;
u8 map;
bool fault, x, w, u, wf, uf, ff, smapf, cr4_smap, cr4_smep, smap = 0;
unsigned byte;
const u8 x = BYTE_MASK(ACC_EXEC_MASK);
const u8 w = BYTE_MASK(ACC_WRITE_MASK);
const u8 u = BYTE_MASK(ACC_USER_MASK);
bool cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP) != 0;
bool cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP) != 0;
bool cr0_wp = is_write_protection(vcpu);
cr4_smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
cr4_smap = kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
pfec = byte << 1;
map = 0;
wf = pfec & PFERR_WRITE_MASK;
uf = pfec & PFERR_USER_MASK;
ff = pfec & PFERR_FETCH_MASK;
unsigned pfec = byte << 1;
/*
* PFERR_RSVD_MASK bit is set in PFEC if the access is not
* subject to SMAP restrictions, and cleared otherwise. The
* bit is only meaningful if the SMAP bit is set in CR4.
* Each "*f" variable has a 1 bit for each UWX value
* that causes a fault with the given PFEC.
*/
smapf = !(pfec & PFERR_RSVD_MASK);
for (bit = 0; bit < 8; ++bit) {
x = bit & ACC_EXEC_MASK;
w = bit & ACC_WRITE_MASK;
u = bit & ACC_USER_MASK;
/* Faults from writes to non-writable pages */
u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0;
/* Faults from user mode accesses to supervisor pages */
u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0;
/* Faults from fetches of non-executable pages*/
u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0;
/* Faults from kernel mode fetches of user pages */
u8 smepf = 0;
/* Faults from kernel mode accesses of user pages */
u8 smapf = 0;
if (!ept) {
/* Faults from kernel mode accesses to user pages */
u8 kf = (pfec & PFERR_USER_MASK) ? 0 : u;
/* Not really needed: !nx will cause pte.nx to fault */
x |= !mmu->nx;
if (!mmu->nx)
ff = 0;
/* Allow supervisor writes if !cr0.wp */
w |= !is_write_protection(vcpu) && !uf;
if (!cr0_wp)
wf = (pfec & PFERR_USER_MASK) ? wf : 0;
/* Disallow supervisor fetches of user code if cr4.smep */
x &= !(cr4_smep && u && !uf);
if (cr4_smep)
smepf = (pfec & PFERR_FETCH_MASK) ? kf : 0;
/*
* SMAP:kernel-mode data accesses from user-mode
@ -4245,25 +4269,20 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
* conditions are ture:
* - X86_CR4_SMAP is set in CR4
* - A user page is accessed
* - The access is not a fetch
* - Page fault in kernel mode
* - if CPL = 3 or X86_EFLAGS_AC is clear
*
* Here, we cover the first three conditions.
* The fourth is computed dynamically in
* permission_fault() and is in smapf.
*
* Also, SMAP does not affect instruction
* fetches, add the !ff check here to make it
* clearer.
* The fourth is computed dynamically in permission_fault();
* PFERR_RSVD_MASK bit will be set in PFEC if the access is
* *not* subject to SMAP restrictions.
*/
smap = cr4_smap && u && !uf && !ff;
if (cr4_smap)
smapf = (pfec & (PFERR_RSVD_MASK|PFERR_FETCH_MASK)) ? 0 : kf;
}
fault = (ff && !x) || (uf && !u) || (wf && !w) ||
(smapf && smap);
map |= fault << bit;
}
mmu->permissions[byte] = map;
mmu->permissions[byte] = ff | uf | wf | smepf | smapf;
}
}