KVM/arm64 fixes for 6.2, take #1
- Fix the PMCR_EL0 reset value after the PMU rework - Correctly handle S2 fault triggered by a S1 page table walk by not always classifying it as a write, as this breaks on R/O memslots - Document why we cannot exit with KVM_EXIT_MMIO when taking a write fault from a S1 PTW on a R/O memslot - Put the Apple M2 on the naughty step for not being able to correctly implement the vgic SEIS feature, just liek the M1 before it - Reviewer updates: Alex is stepping down, replaced by Zenghui -----BEGIN PGP SIGNATURE----- iQJDBAABCgAtFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmO27gQPHG1hekBrZXJu ZWwub3JnAAoJECPQ0LrRPXpDwioP/A0UE7ujSxv3dlBstBhmtzOoX64pRufX01Kr 1oF24M1VuTVLwl3pp1nWH10SVWv5kukYZJAJ/3tDJOaMt/Q9c0exPCPc95i2p/r7 OC9j8rZVZnjGN6sAP5zazIT67tSanyLDeCC+j4J1pw20r2tB67LKSOoozEb5How7 CX+Oa2OiEiI34jp33v3mFQ3VxY3714QUMBUK7n+L29IFXGmQp6dfbhn2iY3uNpoU YYrkPzBLUC1H//oCx0qoDDCXXeOKMGuWP1At5GIDz6ZSCBVpKdVbftCC59Dk7dDz 7BdQ5JoEc15RTZajdopOog4RV4YHP8VszaClhCA1ML0Pd2Mf4UVLlPnn7F+3yR3r pMgjlOAlLJwHiwggJZ0EQ0wFdx9LuGeu3OwckGE/JxeEwaMdzGAEfcFoAGZV0ExZ 7riiKS+NmtrkuE9wJfWOrpDiseymmUbuhHq+F/HDq/SP6UdezAylkcxZRuN/ZCRc 9XVhTcWu/UPxoaSSd/sB4l9X8Ey/cZe28+kV7eE/m2g79bZKxHd4UUOUymb/aJxj og10A6i0B1DOWMtKJ9hEsB6wI6Hllrqcbo8ewX1znKoKbfHZDeU/N5D4ZvTz85sf zyqbsSZPDxMOwBPYTqZqG65tEWWw68HIJ9cqQzKDehN1Xm1coNIWSPrUnBMpSsWJ qDQNmIzf =XBtQ -----END PGP SIGNATURE----- Merge tag 'kvmarm-fixes-6.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into kvm-master KVM/arm64 fixes for 6.2, take #1 - Fix the PMCR_EL0 reset value after the PMU rework - Correctly handle S2 fault triggered by a S1 page table walk by not always classifying it as a write, as this breaks on R/O memslots - Document why we cannot exit with KVM_EXIT_MMIO when taking a write fault from a S1 PTW on a R/O memslot - Put the Apple M2 on the naughty step for not being able to correctly implement the vgic SEIS feature, just liek the M1 before it - Reviewer updates: Alex is stepping down, replaced by Zenghui
This commit is contained in:
commit
71d0393576
@ -1354,6 +1354,14 @@ the memory region are automatically reflected into the guest. For example, an
|
||||
mmap() that affects the region will be made visible immediately. Another
|
||||
example is madvise(MADV_DROP).
|
||||
|
||||
Note: On arm64, a write generated by the page-table walker (to update
|
||||
the Access and Dirty flags, for example) never results in a
|
||||
KVM_EXIT_MMIO exit when the slot has the KVM_MEM_READONLY flag. This
|
||||
is because KVM cannot provide the data that would be written by the
|
||||
page-table walker, making it impossible to emulate the access.
|
||||
Instead, an abort (data abort if the cause of the page-table update
|
||||
was a load or a store, instruction abort if it was an instruction
|
||||
fetch) is injected in the guest.
|
||||
|
||||
4.36 KVM_SET_TSS_ADDR
|
||||
---------------------
|
||||
|
@ -11356,9 +11356,9 @@ F: virt/kvm/*
|
||||
KERNEL VIRTUAL MACHINE FOR ARM64 (KVM/arm64)
|
||||
M: Marc Zyngier <maz@kernel.org>
|
||||
R: James Morse <james.morse@arm.com>
|
||||
R: Alexandru Elisei <alexandru.elisei@arm.com>
|
||||
R: Suzuki K Poulose <suzuki.poulose@arm.com>
|
||||
R: Oliver Upton <oliver.upton@linux.dev>
|
||||
R: Zenghui Yu <yuzenghui@huawei.com>
|
||||
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
|
||||
L: kvmarm@lists.linux.dev
|
||||
L: kvmarm@lists.cs.columbia.edu (deprecated, moderated for non-subscribers)
|
||||
|
@ -124,6 +124,8 @@
|
||||
#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
|
||||
#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
|
||||
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
|
||||
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
|
||||
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
|
||||
|
||||
#define AMPERE_CPU_PART_AMPERE1 0xAC3
|
||||
|
||||
@ -177,6 +179,8 @@
|
||||
#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
|
||||
#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
|
||||
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
|
||||
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
|
||||
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
|
||||
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
|
||||
|
||||
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
|
||||
|
@ -114,6 +114,15 @@
|
||||
#define ESR_ELx_FSC_ACCESS (0x08)
|
||||
#define ESR_ELx_FSC_FAULT (0x04)
|
||||
#define ESR_ELx_FSC_PERM (0x0C)
|
||||
#define ESR_ELx_FSC_SEA_TTW0 (0x14)
|
||||
#define ESR_ELx_FSC_SEA_TTW1 (0x15)
|
||||
#define ESR_ELx_FSC_SEA_TTW2 (0x16)
|
||||
#define ESR_ELx_FSC_SEA_TTW3 (0x17)
|
||||
#define ESR_ELx_FSC_SECC (0x18)
|
||||
#define ESR_ELx_FSC_SECC_TTW0 (0x1c)
|
||||
#define ESR_ELx_FSC_SECC_TTW1 (0x1d)
|
||||
#define ESR_ELx_FSC_SECC_TTW2 (0x1e)
|
||||
#define ESR_ELx_FSC_SECC_TTW3 (0x1f)
|
||||
|
||||
/* ISS field definitions for Data Aborts */
|
||||
#define ESR_ELx_ISV_SHIFT (24)
|
||||
|
@ -319,21 +319,6 @@
|
||||
BIT(18) | \
|
||||
GENMASK(16, 15))
|
||||
|
||||
/* For compatibility with fault code shared with 32-bit */
|
||||
#define FSC_FAULT ESR_ELx_FSC_FAULT
|
||||
#define FSC_ACCESS ESR_ELx_FSC_ACCESS
|
||||
#define FSC_PERM ESR_ELx_FSC_PERM
|
||||
#define FSC_SEA ESR_ELx_FSC_EXTABT
|
||||
#define FSC_SEA_TTW0 (0x14)
|
||||
#define FSC_SEA_TTW1 (0x15)
|
||||
#define FSC_SEA_TTW2 (0x16)
|
||||
#define FSC_SEA_TTW3 (0x17)
|
||||
#define FSC_SECC (0x18)
|
||||
#define FSC_SECC_TTW0 (0x1c)
|
||||
#define FSC_SECC_TTW1 (0x1d)
|
||||
#define FSC_SECC_TTW2 (0x1e)
|
||||
#define FSC_SECC_TTW3 (0x1f)
|
||||
|
||||
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
|
||||
#define HPFAR_MASK (~UL(0xf))
|
||||
/*
|
||||
|
@ -349,16 +349,16 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *v
|
||||
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
|
||||
{
|
||||
switch (kvm_vcpu_trap_get_fault(vcpu)) {
|
||||
case FSC_SEA:
|
||||
case FSC_SEA_TTW0:
|
||||
case FSC_SEA_TTW1:
|
||||
case FSC_SEA_TTW2:
|
||||
case FSC_SEA_TTW3:
|
||||
case FSC_SECC:
|
||||
case FSC_SECC_TTW0:
|
||||
case FSC_SECC_TTW1:
|
||||
case FSC_SECC_TTW2:
|
||||
case FSC_SECC_TTW3:
|
||||
case ESR_ELx_FSC_EXTABT:
|
||||
case ESR_ELx_FSC_SEA_TTW0:
|
||||
case ESR_ELx_FSC_SEA_TTW1:
|
||||
case ESR_ELx_FSC_SEA_TTW2:
|
||||
case ESR_ELx_FSC_SEA_TTW3:
|
||||
case ESR_ELx_FSC_SECC:
|
||||
case ESR_ELx_FSC_SECC_TTW0:
|
||||
case ESR_ELx_FSC_SECC_TTW1:
|
||||
case ESR_ELx_FSC_SECC_TTW2:
|
||||
case ESR_ELx_FSC_SECC_TTW3:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -373,8 +373,26 @@ static __always_inline int kvm_vcpu_sys_get_rt(struct kvm_vcpu *vcpu)
|
||||
|
||||
static inline bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
if (kvm_vcpu_abt_iss1tw(vcpu))
|
||||
return true;
|
||||
if (kvm_vcpu_abt_iss1tw(vcpu)) {
|
||||
/*
|
||||
* Only a permission fault on a S1PTW should be
|
||||
* considered as a write. Otherwise, page tables baked
|
||||
* in a read-only memslot will result in an exception
|
||||
* being delivered in the guest.
|
||||
*
|
||||
* The drawback is that we end-up faulting twice if the
|
||||
* guest is using any of HW AF/DB: a translation fault
|
||||
* to map the page containing the PT (read only at
|
||||
* first), then a permission fault to allow the flags
|
||||
* to be set.
|
||||
*/
|
||||
switch (kvm_vcpu_trap_get_fault_type(vcpu)) {
|
||||
case ESR_ELx_FSC_PERM:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (kvm_vcpu_trap_is_iabt(vcpu))
|
||||
return false;
|
||||
|
@ -60,7 +60,7 @@ static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault)
|
||||
*/
|
||||
if (!(esr & ESR_ELx_S1PTW) &&
|
||||
(cpus_have_final_cap(ARM64_WORKAROUND_834220) ||
|
||||
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
|
||||
(esr & ESR_ELx_FSC_TYPE) == ESR_ELx_FSC_PERM)) {
|
||||
if (!__translate_far_to_hpfar(far, &hpfar))
|
||||
return false;
|
||||
} else {
|
||||
|
@ -367,7 +367,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code)
|
||||
if (static_branch_unlikely(&vgic_v2_cpuif_trap)) {
|
||||
bool valid;
|
||||
|
||||
valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT &&
|
||||
valid = kvm_vcpu_trap_get_fault_type(vcpu) == ESR_ELx_FSC_FAULT &&
|
||||
kvm_vcpu_dabt_isvalid(vcpu) &&
|
||||
!kvm_vcpu_abt_issea(vcpu) &&
|
||||
!kvm_vcpu_abt_iss1tw(vcpu);
|
||||
|
@ -1212,7 +1212,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
|
||||
VM_BUG_ON(write_fault && exec_fault);
|
||||
|
||||
if (fault_status == FSC_PERM && !write_fault && !exec_fault) {
|
||||
if (fault_status == ESR_ELx_FSC_PERM && !write_fault && !exec_fault) {
|
||||
kvm_err("Unexpected L2 read permission error\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
@ -1277,7 +1277,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* only exception to this is when dirty logging is enabled at runtime
|
||||
* and a write fault needs to collapse a block entry into a table.
|
||||
*/
|
||||
if (fault_status != FSC_PERM || (logging_active && write_fault)) {
|
||||
if (fault_status != ESR_ELx_FSC_PERM ||
|
||||
(logging_active && write_fault)) {
|
||||
ret = kvm_mmu_topup_memory_cache(memcache,
|
||||
kvm_mmu_cache_min_pages(kvm));
|
||||
if (ret)
|
||||
@ -1342,7 +1343,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* backed by a THP and thus use block mapping if possible.
|
||||
*/
|
||||
if (vma_pagesize == PAGE_SIZE && !(force_pte || device)) {
|
||||
if (fault_status == FSC_PERM && fault_granule > PAGE_SIZE)
|
||||
if (fault_status == ESR_ELx_FSC_PERM &&
|
||||
fault_granule > PAGE_SIZE)
|
||||
vma_pagesize = fault_granule;
|
||||
else
|
||||
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
|
||||
@ -1350,7 +1352,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
&fault_ipa);
|
||||
}
|
||||
|
||||
if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
|
||||
/* Check the VMM hasn't introduced a new disallowed VMA */
|
||||
if (kvm_vma_mte_allowed(vma)) {
|
||||
sanitise_mte_tags(kvm, pfn, vma_pagesize);
|
||||
@ -1376,7 +1378,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
|
||||
* permissions only if vma_pagesize equals fault_granule. Otherwise,
|
||||
* kvm_pgtable_stage2_map() should be called to change block size.
|
||||
*/
|
||||
if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
|
||||
if (fault_status == ESR_ELx_FSC_PERM && vma_pagesize == fault_granule)
|
||||
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
|
||||
else
|
||||
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
|
||||
@ -1441,7 +1443,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
fault_ipa = kvm_vcpu_get_fault_ipa(vcpu);
|
||||
is_iabt = kvm_vcpu_trap_is_iabt(vcpu);
|
||||
|
||||
if (fault_status == FSC_FAULT) {
|
||||
if (fault_status == ESR_ELx_FSC_FAULT) {
|
||||
/* Beyond sanitised PARange (which is the IPA limit) */
|
||||
if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) {
|
||||
kvm_inject_size_fault(vcpu);
|
||||
@ -1476,8 +1478,9 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
kvm_vcpu_get_hfar(vcpu), fault_ipa);
|
||||
|
||||
/* Check the stage-2 fault is trans. fault or write fault */
|
||||
if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
|
||||
fault_status != FSC_ACCESS) {
|
||||
if (fault_status != ESR_ELx_FSC_FAULT &&
|
||||
fault_status != ESR_ELx_FSC_PERM &&
|
||||
fault_status != ESR_ELx_FSC_ACCESS) {
|
||||
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
|
||||
kvm_vcpu_trap_get_class(vcpu),
|
||||
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
|
||||
@ -1539,7 +1542,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
|
||||
/* Userspace should not be able to register out-of-bounds IPAs */
|
||||
VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm));
|
||||
|
||||
if (fault_status == FSC_ACCESS) {
|
||||
if (fault_status == ESR_ELx_FSC_ACCESS) {
|
||||
handle_access_fault(vcpu, fault_ipa);
|
||||
ret = 1;
|
||||
goto out_unlock;
|
||||
|
@ -646,7 +646,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
|
||||
return;
|
||||
|
||||
/* Only preserve PMCR_EL0.N, and reset the rest to 0 */
|
||||
pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK;
|
||||
pmcr = read_sysreg(pmcr_el0) & (ARMV8_PMU_PMCR_N_MASK << ARMV8_PMU_PMCR_N_SHIFT);
|
||||
if (!kvm_supports_32bit_el0())
|
||||
pmcr |= ARMV8_PMU_PMCR_LC;
|
||||
|
||||
|
@ -616,6 +616,8 @@ static const struct midr_range broken_seis[] = {
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_PRO),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_ICESTORM_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M1_FIRESTORM_MAX),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_BLIZZARD),
|
||||
MIDR_ALL_VERSIONS(MIDR_APPLE_M2_AVALANCHE),
|
||||
{},
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user