2019-06-03 07:44:50 +02:00
// SPDX-License-Identifier: GPL-2.0-only
2012-12-10 16:23:59 +00:00
/*
* Copyright ( C ) 2012 , 2013 - ARM Ltd
* Author : Marc Zyngier < marc . zyngier @ arm . com >
*
* Derived from arch / arm / kvm / reset . c
* Copyright ( C ) 2012 - Virtual Open Systems and Columbia University
* Author : Christoffer Dall < c . dall @ virtualopensystems . com >
*/
# include <linux/errno.h>
2019-02-28 18:56:50 +00:00
# include <linux/kernel.h>
2012-12-10 16:23:59 +00:00
# include <linux/kvm_host.h>
# include <linux/kvm.h>
2015-07-07 17:30:02 +01:00
# include <linux/hw_breakpoint.h>
2019-02-28 18:46:44 +00:00
# include <linux/slab.h>
2019-02-28 18:56:50 +00:00
# include <linux/string.h>
2019-02-28 18:46:44 +00:00
# include <linux/types.h>
2012-12-10 16:23:59 +00:00
2012-12-07 17:52:03 +00:00
# include <kvm/arm_arch_timer.h>
2018-09-26 17:32:43 +01:00
# include <asm/cpufeature.h>
2012-12-10 16:23:59 +00:00
# include <asm/cputype.h>
2019-02-28 18:46:44 +00:00
# include <asm/fpsimd.h>
2012-12-10 16:23:59 +00:00
# include <asm/ptrace.h>
# include <asm/kvm_arm.h>
arm64: kvm: allows kvm cpu hotplug
The current kvm implementation on arm64 does cpu-specific initialization
at system boot, and has no way to gracefully shutdown a core in terms of
kvm. This prevents kexec from rebooting the system at EL2.
This patch adds a cpu tear-down function and also puts an existing cpu-init
code into a separate function, kvm_arch_hardware_disable() and
kvm_arch_hardware_enable() respectively.
We don't need the arm64 specific cpu hotplug hook any more.
Since this patch modifies common code between arm and arm64, one stub
definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
compilation errors.
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
[Rebase, added separate VHE init/exit path, changed resets use of
kvm_call_hyp() to the __version, en/disabled hardware in init_subsystems(),
added icache maintenance to __kvm_hyp_reset() and removed lr restore, removed
guest-enter after teardown handling]
Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-27 17:47:05 +01:00
# include <asm/kvm_asm.h>
2012-12-10 16:23:59 +00:00
# include <asm/kvm_coproc.h>
2018-12-20 11:36:07 +00:00
# include <asm/kvm_emulate.h>
arm64: kvm: allows kvm cpu hotplug
The current kvm implementation on arm64 does cpu-specific initialization
at system boot, and has no way to gracefully shutdown a core in terms of
kvm. This prevents kexec from rebooting the system at EL2.
This patch adds a cpu tear-down function and also puts an existing cpu-init
code into a separate function, kvm_arch_hardware_disable() and
kvm_arch_hardware_enable() respectively.
We don't need the arm64 specific cpu hotplug hook any more.
Since this patch modifies common code between arm and arm64, one stub
definition, __cpu_reset_hyp_mode(), is added on arm side to avoid
compilation errors.
Signed-off-by: AKASHI Takahiro <takahiro.akashi@linaro.org>
[Rebase, added separate VHE init/exit path, changed resets use of
kvm_call_hyp() to the __version, en/disabled hardware in init_subsystems(),
added icache maintenance to __kvm_hyp_reset() and removed lr restore, removed
guest-enter after teardown handling]
Signed-off-by: James Morse <james.morse@arm.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
2016-04-27 17:47:05 +01:00
# include <asm/kvm_mmu.h>
2019-02-28 18:56:50 +00:00
# include <asm/virt.h>
2012-12-10 16:23:59 +00:00
2018-09-26 17:32:52 +01:00
/* Maximum phys_shift supported for any VM on this host */
static u32 kvm_ipa_limit ;
2012-12-10 16:23:59 +00:00
/*
* ARMv8 Reset Values
*/
static const struct kvm_regs default_regs_reset = {
. regs . pstate = ( PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT |
PSR_F_BIT | PSR_D_BIT ) ,
} ;
2013-02-07 10:46:46 +00:00
static const struct kvm_regs default_regs_reset32 = {
2018-07-05 15:16:53 +01:00
. regs . pstate = ( PSR_AA32_MODE_SVC | PSR_AA32_A_BIT |
PSR_AA32_I_BIT | PSR_AA32_F_BIT ) ,
2013-02-07 10:46:46 +00:00
} ;
static bool cpu_has_32bit_el1 ( void )
{
u64 pfr0 ;
2017-03-23 15:14:39 +00:00
pfr0 = read_sanitised_ftr_reg ( SYS_ID_AA64PFR0_EL1 ) ;
2013-02-07 10:46:46 +00:00
return ! ! ( pfr0 & 0x20 ) ;
}
2015-07-07 17:30:02 +01:00
/**
2018-10-13 00:12:48 +08:00
* kvm_arch_vm_ioctl_check_extension
2015-07-07 17:30:02 +01:00
*
* We currently assume that the number of HW registers is uniform
* across all CPUs ( see cpuinfo_sanity_check ) .
*/
2018-10-13 00:12:48 +08:00
int kvm_arch_vm_ioctl_check_extension ( struct kvm * kvm , long ext )
2012-12-10 16:23:59 +00:00
{
int r ;
switch ( ext ) {
2013-02-07 10:46:46 +00:00
case KVM_CAP_ARM_EL1_32BIT :
r = cpu_has_32bit_el1 ( ) ;
break ;
2015-07-07 17:30:02 +01:00
case KVM_CAP_GUEST_DEBUG_HW_BPS :
r = get_num_brps ( ) ;
break ;
case KVM_CAP_GUEST_DEBUG_HW_WPS :
r = get_num_wrps ( ) ;
break ;
2016-01-11 22:46:15 +08:00
case KVM_CAP_ARM_PMU_V3 :
r = kvm_arm_support_pmu_v3 ( ) ;
break ;
2018-07-19 16:24:23 +01:00
case KVM_CAP_ARM_INJECT_SERROR_ESR :
r = cpus_have_const_cap ( ARM64_HAS_RAS_EXTN ) ;
break ;
2015-07-07 17:30:02 +01:00
case KVM_CAP_SET_GUEST_DEBUG :
2016-01-11 20:56:17 +08:00
case KVM_CAP_VCPU_ATTRIBUTES :
2015-07-07 17:30:02 +01:00
r = 1 ;
break ;
2018-09-26 17:32:54 +01:00
case KVM_CAP_ARM_VM_IPA_SIZE :
r = kvm_ipa_limit ;
break ;
2019-01-15 12:21:22 +00:00
case KVM_CAP_ARM_SVE :
r = system_supports_sve ( ) ;
break ;
2019-04-23 10:12:37 +05:30
case KVM_CAP_ARM_PTRAUTH_ADDRESS :
case KVM_CAP_ARM_PTRAUTH_GENERIC :
r = has_vhe ( ) & & system_supports_address_auth ( ) & &
system_supports_generic_auth ( ) ;
break ;
2012-12-10 16:23:59 +00:00
default :
r = 0 ;
}
return r ;
}
2019-02-28 18:46:44 +00:00
unsigned int kvm_sve_max_vl ;
2019-04-12 15:30:58 +01:00
int kvm_arm_init_sve ( void )
2019-02-28 18:46:44 +00:00
{
if ( system_supports_sve ( ) ) {
kvm_sve_max_vl = sve_max_virtualisable_vl ;
/*
* The get_sve_reg ( ) / set_sve_reg ( ) ioctl interface will need
* to be extended with multiple register slice support in
* order to support vector lengths greater than
* SVE_VL_ARCH_MAX :
*/
if ( WARN_ON ( kvm_sve_max_vl > SVE_VL_ARCH_MAX ) )
kvm_sve_max_vl = SVE_VL_ARCH_MAX ;
/*
* Don ' t even try to make use of vector lengths that
* aren ' t available on all CPUs , for now :
*/
if ( kvm_sve_max_vl < sve_max_vl )
pr_warn ( " KVM: SVE vector length for guests limited to %u bytes \n " ,
kvm_sve_max_vl ) ;
}
return 0 ;
}
2019-02-28 18:56:50 +00:00
static int kvm_vcpu_enable_sve ( struct kvm_vcpu * vcpu )
{
if ( ! system_supports_sve ( ) )
return - EINVAL ;
/* Verify that KVM startup enforced this when SVE was detected: */
if ( WARN_ON ( ! has_vhe ( ) ) )
return - EINVAL ;
vcpu - > arch . sve_max_vl = kvm_sve_max_vl ;
/*
* Userspace can still customize the vector lengths by writing
* KVM_REG_ARM64_SVE_VLS . Allocation is deferred until
* kvm_arm_vcpu_finalize ( ) , which freezes the configuration .
*/
vcpu - > arch . flags | = KVM_ARM64_GUEST_HAS_SVE ;
return 0 ;
}
2019-02-28 18:46:44 +00:00
/*
* Finalize vcpu ' s maximum SVE vector length , allocating
* vcpu - > arch . sve_state as necessary .
*/
static int kvm_vcpu_finalize_sve ( struct kvm_vcpu * vcpu )
{
void * buf ;
unsigned int vl ;
vl = vcpu - > arch . sve_max_vl ;
/*
* Resposibility for these properties is shared between
* kvm_arm_init_arch_resources ( ) , kvm_vcpu_enable_sve ( ) and
* set_sve_vls ( ) . Double - check here just to be sure :
*/
if ( WARN_ON ( ! sve_vl_valid ( vl ) | | vl > sve_max_virtualisable_vl | |
vl > SVE_VL_ARCH_MAX ) )
return - EIO ;
buf = kzalloc ( SVE_SIG_REGS_SIZE ( sve_vq_from_vl ( vl ) ) , GFP_KERNEL ) ;
if ( ! buf )
return - ENOMEM ;
vcpu - > arch . sve_state = buf ;
vcpu - > arch . flags | = KVM_ARM64_VCPU_SVE_FINALIZED ;
return 0 ;
}
2019-04-10 17:17:37 +01:00
int kvm_arm_vcpu_finalize ( struct kvm_vcpu * vcpu , int feature )
2019-02-28 18:46:44 +00:00
{
2019-04-10 17:17:37 +01:00
switch ( feature ) {
2019-02-28 18:46:44 +00:00
case KVM_ARM_VCPU_SVE :
if ( ! vcpu_has_sve ( vcpu ) )
return - EINVAL ;
if ( kvm_arm_vcpu_sve_finalized ( vcpu ) )
return - EPERM ;
return kvm_vcpu_finalize_sve ( vcpu ) ;
}
return - EINVAL ;
}
bool kvm_arm_vcpu_is_finalized ( struct kvm_vcpu * vcpu )
{
if ( vcpu_has_sve ( vcpu ) & & ! kvm_arm_vcpu_sve_finalized ( vcpu ) )
return false ;
return true ;
}
2019-12-18 13:55:27 -08:00
void kvm_arm_vcpu_destroy ( struct kvm_vcpu * vcpu )
2019-02-28 18:46:44 +00:00
{
kfree ( vcpu - > arch . sve_state ) ;
}
2019-02-28 18:56:50 +00:00
static void kvm_vcpu_reset_sve ( struct kvm_vcpu * vcpu )
{
if ( vcpu_has_sve ( vcpu ) )
memset ( vcpu - > arch . sve_state , 0 , vcpu_sve_state_size ( vcpu ) ) ;
}
2019-04-23 10:12:36 +05:30
static int kvm_vcpu_enable_ptrauth ( struct kvm_vcpu * vcpu )
{
/* Support ptrauth only if the system supports these capabilities. */
if ( ! has_vhe ( ) )
return - EINVAL ;
if ( ! system_supports_address_auth ( ) | |
! system_supports_generic_auth ( ) )
return - EINVAL ;
/*
* For now make sure that both address / generic pointer authentication
* features are requested by the userspace together .
*/
if ( ! test_bit ( KVM_ARM_VCPU_PTRAUTH_ADDRESS , vcpu - > arch . features ) | |
! test_bit ( KVM_ARM_VCPU_PTRAUTH_GENERIC , vcpu - > arch . features ) )
return - EINVAL ;
vcpu - > arch . flags | = KVM_ARM64_GUEST_HAS_PTRAUTH ;
return 0 ;
}
2012-12-10 16:23:59 +00:00
/**
* kvm_reset_vcpu - sets core registers and sys_regs to reset value
* @ vcpu : The VCPU pointer
*
* This function finds the right table above and sets the registers on
2016-05-21 13:53:14 +02:00
* the virtual CPU struct to their architecturally defined reset
2019-02-28 18:56:50 +00:00
* values , except for registers whose reset is deferred until
* kvm_arm_vcpu_finalize ( ) .
2018-12-20 12:44:05 +01:00
*
* Note : This function can be called from two paths : The KVM_ARM_VCPU_INIT
* ioctl or as part of handling a request issued by another VCPU in the PSCI
* handling code . In the first case , the VCPU will not be loaded , and in the
* second case the VCPU will be loaded . Because this function operates purely
* on the memory - backed valus of system registers , we want to do a full put if
* we were loaded ( handling a request ) and load the values back at the end of
* the function . Otherwise we leave the state alone . In both cases , we
* disable preemption around the vcpu reset as we would otherwise race with
* preempt notifiers which also call put / load .
2012-12-10 16:23:59 +00:00
*/
int kvm_reset_vcpu ( struct kvm_vcpu * vcpu )
{
const struct kvm_regs * cpu_reset ;
2018-12-20 12:44:05 +01:00
int ret = - EINVAL ;
bool loaded ;
2019-03-04 17:37:44 +00:00
/* Reset PMU outside of the non-preemptible section */
kvm_pmu_vcpu_reset ( vcpu ) ;
2018-12-20 12:44:05 +01:00
preempt_disable ( ) ;
loaded = ( vcpu - > cpu ! = - 1 ) ;
if ( loaded )
kvm_arch_vcpu_put ( vcpu ) ;
2012-12-10 16:23:59 +00:00
2019-02-28 18:56:50 +00:00
if ( ! kvm_arm_vcpu_sve_finalized ( vcpu ) ) {
if ( test_bit ( KVM_ARM_VCPU_SVE , vcpu - > arch . features ) ) {
ret = kvm_vcpu_enable_sve ( vcpu ) ;
if ( ret )
goto out ;
}
} else {
kvm_vcpu_reset_sve ( vcpu ) ;
}
2019-04-23 10:12:36 +05:30
if ( test_bit ( KVM_ARM_VCPU_PTRAUTH_ADDRESS , vcpu - > arch . features ) | |
test_bit ( KVM_ARM_VCPU_PTRAUTH_GENERIC , vcpu - > arch . features ) ) {
if ( kvm_vcpu_enable_ptrauth ( vcpu ) )
goto out ;
}
2012-12-10 16:23:59 +00:00
switch ( vcpu - > arch . target ) {
default :
2013-02-07 10:46:46 +00:00
if ( test_bit ( KVM_ARM_VCPU_EL1_32BIT , vcpu - > arch . features ) ) {
if ( ! cpu_has_32bit_el1 ( ) )
2018-12-20 12:44:05 +01:00
goto out ;
2013-02-07 10:46:46 +00:00
cpu_reset = & default_regs_reset32 ;
} else {
cpu_reset = & default_regs_reset ;
}
2012-12-10 16:23:59 +00:00
break ;
}
/* Reset core registers */
memcpy ( vcpu_gp_regs ( vcpu ) , cpu_reset , sizeof ( * cpu_reset ) ) ;
/* Reset system registers */
kvm_reset_sys_regs ( vcpu ) ;
2018-12-20 11:36:07 +00:00
/*
* Additional reset state handling that PSCI may have imposed on us .
* Must be done after all the sys_reg reset .
*/
if ( vcpu - > arch . reset_state . reset ) {
unsigned long target_pc = vcpu - > arch . reset_state . pc ;
/* Gracefully handle Thumb2 entry point */
if ( vcpu_mode_is_32bit ( vcpu ) & & ( target_pc & 1 ) ) {
target_pc & = ~ 1UL ;
vcpu_set_thumb ( vcpu ) ;
}
/* Propagate caller endianness */
if ( vcpu - > arch . reset_state . be )
kvm_vcpu_set_be ( vcpu ) ;
* vcpu_pc ( vcpu ) = target_pc ;
vcpu_set_reg ( vcpu , 0 , vcpu - > arch . reset_state . r0 ) ;
vcpu - > arch . reset_state . reset = false ;
}
2018-05-29 13:11:18 +01:00
/* Default workaround setup is enabled (if supported) */
if ( kvm_arm_have_ssbd ( ) = = KVM_SSBD_KERNEL )
vcpu - > arch . workaround_flags | = VCPU_WORKAROUND_2_FLAG ;
2012-12-07 17:52:03 +00:00
/* Reset timer */
2018-12-20 12:44:05 +01:00
ret = kvm_timer_vcpu_reset ( vcpu ) ;
out :
if ( loaded )
kvm_arch_vcpu_load ( vcpu , smp_processor_id ( ) ) ;
preempt_enable ( ) ;
return ret ;
2012-12-10 16:23:59 +00:00
}
2018-09-26 17:32:42 +01:00
2018-09-26 17:32:52 +01:00
void kvm_set_ipa_limit ( void )
{
unsigned int ipa_max , pa_max , va_max , parange ;
parange = read_sanitised_ftr_reg ( SYS_ID_AA64MMFR0_EL1 ) & 0x7 ;
pa_max = id_aa64mmfr0_parange_to_phys_shift ( parange ) ;
/* Clamp the IPA limit to the PA size supported by the kernel */
ipa_max = ( pa_max > PHYS_MASK_SHIFT ) ? PHYS_MASK_SHIFT : pa_max ;
/*
* Since our stage2 table is dependent on the stage1 page table code ,
* we must always honor the following condition :
*
* Number of levels in Stage1 > = Number of levels in Stage2 .
*
* So clamp the ipa limit further down to limit the number of levels .
* Since we can concatenate upto 16 tables at entry level , we could
* go upto 4 bits above the maximum VA addressible with the current
* number of levels .
*/
va_max = PGDIR_SHIFT + PAGE_SHIFT - 3 ;
va_max + = 4 ;
if ( va_max < ipa_max )
ipa_max = va_max ;
/*
* If the final limit is lower than the real physical address
* limit of the CPUs , report the reason .
*/
if ( ipa_max < pa_max )
pr_info ( " kvm: Limiting the IPA size due to kernel %s Address limit \n " ,
( va_max < pa_max ) ? " Virtual " : " Physical " ) ;
WARN ( ipa_max < KVM_PHYS_SHIFT ,
" KVM IPA limit (%d bit) is smaller than default size \n " , ipa_max ) ;
kvm_ipa_limit = ipa_max ;
kvm_info ( " IPA Size Limit: %dbits \n " , kvm_ipa_limit ) ;
}
2018-09-26 17:32:43 +01:00
/*
* Configure the VTCR_EL2 for this VM . The VTCR value is common
* across all the physical CPUs on the system . We use system wide
* sanitised values to fill in different fields , except for Hardware
* Management of Access Flags . HA Flag is set unconditionally on
* all CPUs , as it is safe to run with or without the feature and
* the bit is RES0 on CPUs that don ' t support it .
*/
2018-10-01 13:40:36 +01:00
int kvm_arm_setup_stage2 ( struct kvm * kvm , unsigned long type )
2018-09-26 17:32:42 +01:00
{
2018-09-26 17:32:43 +01:00
u64 vtcr = VTCR_EL2_FLAGS ;
u32 parange , phys_shift ;
2018-09-26 17:32:53 +01:00
u8 lvls ;
2018-09-26 17:32:43 +01:00
2018-09-26 17:32:54 +01:00
if ( type & ~ KVM_VM_TYPE_ARM_IPA_SIZE_MASK )
2018-09-26 17:32:42 +01:00
return - EINVAL ;
2018-09-26 17:32:43 +01:00
2018-09-26 17:32:54 +01:00
phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE ( type ) ;
if ( phys_shift ) {
if ( phys_shift > kvm_ipa_limit | |
phys_shift < 32 )
return - EINVAL ;
} else {
phys_shift = KVM_PHYS_SHIFT ;
}
2018-09-26 17:32:43 +01:00
parange = read_sanitised_ftr_reg ( SYS_ID_AA64MMFR0_EL1 ) & 7 ;
if ( parange > ID_AA64MMFR0_PARANGE_MAX )
parange = ID_AA64MMFR0_PARANGE_MAX ;
vtcr | = parange < < VTCR_EL2_PS_SHIFT ;
vtcr | = VTCR_EL2_T0SZ ( phys_shift ) ;
2018-09-26 17:32:53 +01:00
/*
* Use a minimum 2 level page table to prevent splitting
* host PMD huge pages at stage2 .
*/
lvls = stage2_pgtable_levels ( phys_shift ) ;
if ( lvls < 2 )
lvls = 2 ;
vtcr | = VTCR_EL2_LVLS_TO_SL0 ( lvls ) ;
2018-09-26 17:32:43 +01:00
/*
* Enable the Hardware Access Flag management , unconditionally
* on all CPUs . The features is RES0 on CPUs without the support
* and must be ignored by the CPUs .
*/
vtcr | = VTCR_EL2_HA ;
/* Set the vmid bits */
vtcr | = ( kvm_get_vmid_bits ( ) = = 16 ) ?
VTCR_EL2_VS_16BIT :
VTCR_EL2_VS_8BIT ;
kvm - > arch . vtcr = vtcr ;
2018-09-26 17:32:42 +01:00
return 0 ;
}