2019-06-03 08:44:50 +03:00
/* SPDX-License-Identifier: GPL-2.0-only */
2015-11-24 18:51:12 +03:00
/*
* Copyright ( C ) 2015 , 2016 ARM Ltd .
*/
# ifndef __KVM_ARM_VGIC_NEW_H__
# define __KVM_ARM_VGIC_NEW_H__
2015-12-01 17:02:35 +03:00
# include <linux/irqchip/arm-gic-common.h>
2016-04-26 13:06:47 +03:00
# define PRODUCT_ID_KVM 0x4b /* ASCII code K */
# define IMPLEMENTER_ARM 0x43b
2015-12-21 18:36:04 +03:00
# define VGIC_ADDR_UNDEF (-1)
# define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF)
2016-01-27 17:54:30 +03:00
# define INTERRUPT_ID_BITS_SPIS 10
2016-07-15 14:43:34 +03:00
# define INTERRUPT_ID_BITS_ITS 16
2015-12-01 17:34:02 +03:00
# define VGIC_PRI_BITS 5
2015-11-26 20:19:25 +03:00
# define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS)
2017-01-26 17:20:47 +03:00
# define VGIC_AFFINITY_0_SHIFT 0
# define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT)
# define VGIC_AFFINITY_1_SHIFT 8
# define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT)
# define VGIC_AFFINITY_2_SHIFT 16
# define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT)
# define VGIC_AFFINITY_3_SHIFT 24
# define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT)
# define VGIC_AFFINITY_LEVEL(reg, level) \
( ( ( ( reg ) & VGIC_AFFINITY_ # # level # # _MASK ) \
> > VGIC_AFFINITY_ # # level # # _SHIFT ) < < MPIDR_LEVEL_SHIFT ( level ) )
/*
* The Userspace encodes the affinity differently from the MPIDR ,
* Below macro converts vgic userspace format to MPIDR reg format .
*/
# define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \
VGIC_AFFINITY_LEVEL ( val , 1 ) | \
VGIC_AFFINITY_LEVEL ( val , 2 ) | \
VGIC_AFFINITY_LEVEL ( val , 3 ) )
2017-01-26 17:20:51 +03:00
/*
2020-04-14 19:48:35 +03:00
* As per Documentation / virt / kvm / devices / arm - vgic - v3 . rst ,
2017-01-26 17:20:51 +03:00
* below macros are defined for CPUREG encoding .
*/
# define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000
# define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14
# define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800
# define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11
# define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780
# define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7
# define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078
# define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3
# define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007
# define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0
# define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \
KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \
KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \
KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \
KVM_REG_ARM_VGIC_SYSREG_OP2_MASK )
2017-01-09 18:19:41 +03:00
/*
2020-04-14 19:48:35 +03:00
* As per Documentation / virt / kvm / devices / arm - vgic - its . rst ,
2017-01-09 18:19:41 +03:00
* below macros are defined for ITS table entry encoding .
*/
# define KVM_ITS_CTE_VALID_SHIFT 63
# define KVM_ITS_CTE_VALID_MASK BIT_ULL(63)
# define KVM_ITS_CTE_RDBASE_SHIFT 16
# define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0)
2017-05-03 18:38:01 +03:00
# define KVM_ITS_ITE_NEXT_SHIFT 48
# define KVM_ITS_ITE_PINTID_SHIFT 16
# define KVM_ITS_ITE_PINTID_MASK GENMASK_ULL(47, 16)
# define KVM_ITS_ITE_ICID_MASK GENMASK_ULL(15, 0)
2017-01-09 18:27:07 +03:00
# define KVM_ITS_DTE_VALID_SHIFT 63
# define KVM_ITS_DTE_VALID_MASK BIT_ULL(63)
# define KVM_ITS_DTE_NEXT_SHIFT 49
# define KVM_ITS_DTE_NEXT_MASK GENMASK_ULL(62, 49)
# define KVM_ITS_DTE_ITTADDR_SHIFT 5
# define KVM_ITS_DTE_ITTADDR_MASK GENMASK_ULL(48, 5)
# define KVM_ITS_DTE_SIZE_MASK GENMASK_ULL(4, 0)
# define KVM_ITS_L1E_VALID_MASK BIT_ULL(63)
/* we only support 64 kB translation table page size */
# define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16)
2017-01-09 18:19:41 +03:00
2018-05-22 10:55:17 +03:00
# define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0)
# define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12)
# define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12
# define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16)
# define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52)
# define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52
2018-08-03 16:57:03 +03:00
# ifdef CONFIG_DEBUG_SPINLOCK
# define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p)
# else
# define DEBUG_SPINLOCK_BUG_ON(p)
# endif
2018-03-06 12:21:06 +03:00
/* Requires the irq_lock to be held by the caller. */
2017-01-23 16:07:18 +03:00
static inline bool irq_is_pending ( struct vgic_irq * irq )
{
if ( irq - > config = = VGIC_CONFIG_EDGE )
return irq - > pending_latch ;
else
return irq - > pending_latch | | irq - > line_level ;
}
KVM: arm/arm64: vgic: Support level-triggered mapped interrupts
Level-triggered mapped IRQs are special because we only observe rising
edges as input to the VGIC, and we don't set the EOI flag and therefore
are not told when the level goes down, so that we can re-queue a new
interrupt when the level goes up.
One way to solve this problem is to side-step the logic of the VGIC and
special case the validation in the injection path, but it has the
unfortunate drawback of having to peak into the physical GIC state
whenever we want to know if the interrupt is pending on the virtual
distributor.
Instead, we can maintain the current semantics of a level triggered
interrupt by sort of treating it as an edge-triggered interrupt,
following from the fact that we only observe an asserting edge. This
requires us to be a bit careful when populating the LRs and when folding
the state back in though:
* We lower the line level when populating the LR, so that when
subsequently observing an asserting edge, the VGIC will do the right
thing.
* If the guest never acked the interrupt while running (for example if
it had masked interrupts at the CPU level while running), we have
to preserve the pending state of the LR and move it back to the
line_level field of the struct irq when folding LR state.
If the guest never acked the interrupt while running, but changed the
device state and lowered the line (again with interrupts masked) then
we need to observe this change in the line_level.
Both of the above situations are solved by sampling the physical line
and set the line level when folding the LR back.
* Finally, if the guest never acked the interrupt while running and
sampling the line reveals that the device state has changed and the
line has been lowered, we must clear the physical active state, since
we will otherwise never be told when the interrupt becomes asserted
again.
This has the added benefit of making the timer optimization patches
(https://lists.cs.columbia.edu/pipermail/kvmarm/2017-July/026343.html) a
bit simpler, because the timer code doesn't have to clear the active
state on the sync anymore. It also potentially improves the performance
of the timer implementation because the GIC knows the state or the LR
and only needs to clear the
active state when the pending bit in the LR is still set, where the
timer has to always clear it when returning from running the guest with
an injected timer interrupt.
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2017-08-29 11:40:44 +03:00
static inline bool vgic_irq_is_mapped_level ( struct vgic_irq * irq )
{
return irq - > config = = VGIC_CONFIG_LEVEL & & irq - > hw ;
}
KVM: arm/arm64: vgic: Fix source vcpu issues for GICv2 SGI
Now that we make sure we don't inject multiple instances of the
same GICv2 SGI at the same time, we've made another bug more
obvious:
If we exit with an active SGI, we completely lose track of which
vcpu it came from. On the next entry, we restore it with 0 as a
source, and if that wasn't the right one, too bad. While this
doesn't seem to trouble GIC-400, the architectural model gets
offended and doesn't deactivate the interrupt on EOI.
Another connected issue is that we will happilly make pending
an interrupt from another vcpu, overriding the above zero with
something that is just as inconsistent. Don't do that.
The final issue is that we signal a maintenance interrupt when
no pending interrupts are present in the LR. Assuming we've fixed
the two issues above, we end-up in a situation where we keep
exiting as soon as we've reached the active state, and not be
able to inject the following pending.
The fix comes in 3 parts:
- GICv2 SGIs have their source vcpu saved if they are active on
exit, and restored on entry
- Multi-SGIs cannot go via the Pending+Active state, as this would
corrupt the source field
- Multi-SGIs are converted to using MI on EOI instead of NPIE
Fixes: 16ca6a607d84bef0 ("KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid")
Reported-by: Mark Rutland <mark.rutland@arm.com>
Tested-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
2018-04-18 12:39:04 +03:00
static inline int vgic_irq_get_lr_count ( struct vgic_irq * irq )
{
/* Account for the active state as an interrupt */
if ( vgic_irq_is_sgi ( irq - > intid ) & & irq - > source )
return hweight8 ( irq - > source ) + irq - > active ;
return irq_is_pending ( irq ) | | irq - > active ;
}
static inline bool vgic_irq_is_multi_sgi ( struct vgic_irq * irq )
{
return vgic_irq_get_lr_count ( irq ) > 1 ;
}
2017-03-22 00:05:22 +03:00
/*
* This struct provides an intermediate representation of the fields contained
* in the GICH_VMCR and ICH_VMCR registers , such that code exporting the GIC
* state to userspace can generate either GICv2 or GICv3 CPU interface
* registers regardless of the hardware backed GIC used .
*/
2015-12-03 14:47:37 +03:00
struct vgic_vmcr {
2017-05-20 15:12:34 +03:00
u32 grpen0 ;
u32 grpen1 ;
u32 ackctl ;
u32 fiqen ;
u32 cbpr ;
u32 eoim ;
2015-12-03 14:47:37 +03:00
u32 abpr ;
u32 bpr ;
2017-03-22 00:05:22 +03:00
u32 pmr ; /* Priority mask field in the GICC_PMR and
* ICC_PMR_EL1 priority field format */
2015-12-03 14:47:37 +03:00
} ;
2017-01-26 17:20:47 +03:00
struct vgic_reg_attr {
struct kvm_vcpu * vcpu ;
gpa_t addr ;
} ;
int vgic_v3_parse_attr ( struct kvm_device * dev , struct kvm_device_attr * attr ,
struct vgic_reg_attr * reg_attr ) ;
int vgic_v2_parse_attr ( struct kvm_device * dev , struct kvm_device_attr * attr ,
struct vgic_reg_attr * reg_attr ) ;
const struct vgic_register_region *
vgic_get_mmio_region ( struct kvm_vcpu * vcpu , struct vgic_io_device * iodev ,
gpa_t addr , int len ) ;
2015-11-24 18:51:12 +03:00
struct vgic_irq * vgic_get_irq ( struct kvm * kvm , struct kvm_vcpu * vcpu ,
u32 intid ) ;
2019-03-18 15:45:22 +03:00
void __vgic_put_lpi_locked ( struct kvm * kvm , struct vgic_irq * irq ) ;
2016-07-15 14:43:27 +03:00
void vgic_put_irq ( struct kvm * kvm , struct vgic_irq * irq ) ;
KVM: arm/arm64: vgic: Support level-triggered mapped interrupts
Level-triggered mapped IRQs are special because we only observe rising
edges as input to the VGIC, and we don't set the EOI flag and therefore
are not told when the level goes down, so that we can re-queue a new
interrupt when the level goes up.
One way to solve this problem is to side-step the logic of the VGIC and
special case the validation in the injection path, but it has the
unfortunate drawback of having to peak into the physical GIC state
whenever we want to know if the interrupt is pending on the virtual
distributor.
Instead, we can maintain the current semantics of a level triggered
interrupt by sort of treating it as an edge-triggered interrupt,
following from the fact that we only observe an asserting edge. This
requires us to be a bit careful when populating the LRs and when folding
the state back in though:
* We lower the line level when populating the LR, so that when
subsequently observing an asserting edge, the VGIC will do the right
thing.
* If the guest never acked the interrupt while running (for example if
it had masked interrupts at the CPU level while running), we have
to preserve the pending state of the LR and move it back to the
line_level field of the struct irq when folding LR state.
If the guest never acked the interrupt while running, but changed the
device state and lowered the line (again with interrupts masked) then
we need to observe this change in the line_level.
Both of the above situations are solved by sampling the physical line
and set the line level when folding the LR back.
* Finally, if the guest never acked the interrupt while running and
sampling the line reveals that the device state has changed and the
line has been lowered, we must clear the physical active state, since
we will otherwise never be told when the interrupt becomes asserted
again.
This has the added benefit of making the timer optimization patches
(https://lists.cs.columbia.edu/pipermail/kvmarm/2017-July/026343.html) a
bit simpler, because the timer code doesn't have to clear the active
state on the sync anymore. It also potentially improves the performance
of the timer implementation because the GIC knows the state or the LR
and only needs to clear the
active state when the pending bit in the LR is still set, where the
timer has to always clear it when returning from running the guest with
an injected timer interrupt.
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2017-08-29 11:40:44 +03:00
bool vgic_get_phys_line_level ( struct vgic_irq * irq ) ;
KVM: arm/arm64: Support VGIC dist pend/active changes for mapped IRQs
For mapped IRQs (with the HW bit set in the LR) we have to follow some
rules of the architecture. One of these rules is that VM must not be
allowed to deactivate a virtual interrupt with the HW bit set unless the
physical interrupt is also active.
This works fine when injecting mapped interrupts, because we leave it up
to the injector to either set EOImode==1 or manually set the active
state of the physical interrupt.
However, the guest can set virtual interrupt to be pending or active by
writing to the virtual distributor, which could lead to deactivating a
virtual interrupt with the HW bit set without the physical interrupt
being active.
We could set the physical interrupt to active whenever we are about to
enter the VM with a HW interrupt either pending or active, but that
would be really slow, especially on GICv2. So we take the long way
around and do the hard work when needed, which is expected to be
extremely rare.
When the VM sets the pending state for a HW interrupt on the virtual
distributor we set the active state on the physical distributor, because
the virtual interrupt can become active and then the guest can
deactivate it.
When the VM clears the pending state we also clear it on the physical
side, because the injector might otherwise raise the interrupt. We also
clear the physical active state when the virtual interrupt is not
active, since otherwise a SPEND/CPEND sequence from the guest would
prevent signaling of future interrupts.
Changing the state of mapped interrupts from userspace is not supported,
and it's expected that userspace unmaps devices from VFIO before
attempting to set the interrupt state, because the interrupt state is
driven by hardware.
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2017-09-01 17:25:12 +03:00
void vgic_irq_set_phys_pending ( struct vgic_irq * irq , bool pending ) ;
KVM: arm/arm64: vgic: Support level-triggered mapped interrupts
Level-triggered mapped IRQs are special because we only observe rising
edges as input to the VGIC, and we don't set the EOI flag and therefore
are not told when the level goes down, so that we can re-queue a new
interrupt when the level goes up.
One way to solve this problem is to side-step the logic of the VGIC and
special case the validation in the injection path, but it has the
unfortunate drawback of having to peak into the physical GIC state
whenever we want to know if the interrupt is pending on the virtual
distributor.
Instead, we can maintain the current semantics of a level triggered
interrupt by sort of treating it as an edge-triggered interrupt,
following from the fact that we only observe an asserting edge. This
requires us to be a bit careful when populating the LRs and when folding
the state back in though:
* We lower the line level when populating the LR, so that when
subsequently observing an asserting edge, the VGIC will do the right
thing.
* If the guest never acked the interrupt while running (for example if
it had masked interrupts at the CPU level while running), we have
to preserve the pending state of the LR and move it back to the
line_level field of the struct irq when folding LR state.
If the guest never acked the interrupt while running, but changed the
device state and lowered the line (again with interrupts masked) then
we need to observe this change in the line_level.
Both of the above situations are solved by sampling the physical line
and set the line level when folding the LR back.
* Finally, if the guest never acked the interrupt while running and
sampling the line reveals that the device state has changed and the
line has been lowered, we must clear the physical active state, since
we will otherwise never be told when the interrupt becomes asserted
again.
This has the added benefit of making the timer optimization patches
(https://lists.cs.columbia.edu/pipermail/kvmarm/2017-July/026343.html) a
bit simpler, because the timer code doesn't have to clear the active
state on the sync anymore. It also potentially improves the performance
of the timer implementation because the GIC knows the state or the LR
and only needs to clear the
active state when the pending bit in the LR is still set, where the
timer has to always clear it when returning from running the guest with
an injected timer interrupt.
Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
2017-08-29 11:40:44 +03:00
void vgic_irq_set_phys_active ( struct vgic_irq * irq , bool active ) ;
2016-10-16 23:19:11 +03:00
bool vgic_queue_irq_unlock ( struct kvm * kvm , struct vgic_irq * irq ,
unsigned long flags ) ;
2016-04-26 13:06:47 +03:00
void vgic_kick_vcpus ( struct kvm * kvm ) ;
2015-11-24 18:51:12 +03:00
2016-07-15 14:43:31 +03:00
int vgic_check_ioaddr ( struct kvm * kvm , phys_addr_t * ioaddr ,
phys_addr_t addr , phys_addr_t alignment ) ;
2015-11-26 20:19:25 +03:00
void vgic_v2_fold_lr_state ( struct kvm_vcpu * vcpu ) ;
void vgic_v2_populate_lr ( struct kvm_vcpu * vcpu , struct vgic_irq * irq , int lr ) ;
void vgic_v2_clear_lr ( struct kvm_vcpu * vcpu , int lr ) ;
void vgic_v2_set_underflow ( struct kvm_vcpu * vcpu ) ;
KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid
The vgic code is trying to be clever when injecting GICv2 SGIs,
and will happily populate LRs with the same interrupt number if
they come from multiple vcpus (after all, they are distinct
interrupt sources).
Unfortunately, this is against the letter of the architecture,
and the GICv2 architecture spec says "Each valid interrupt stored
in the List registers must have a unique VirtualID for that
virtual CPU interface.". GICv3 has similar (although slightly
ambiguous) restrictions.
This results in guests locking up when using GICv2-on-GICv3, for
example. The obvious fix is to stop trying so hard, and inject
a single vcpu per SGI per guest entry. After all, pending SGIs
with multiple source vcpus are pretty rare, and are mostly seen
in scenario where the physical CPUs are severely overcomitted.
But as we now only inject a single instance of a multi-source SGI per
vcpu entry, we may delay those interrupts for longer than strictly
necessary, and run the risk of injecting lower priority interrupts
in the meantime.
In order to address this, we adopt a three stage strategy:
- If we encounter a multi-source SGI in the AP list while computing
its depth, we force the list to be sorted
- When populating the LRs, we prevent the injection of any interrupt
of lower priority than that of the first multi-source SGI we've
injected.
- Finally, the injection of a multi-source SGI triggers the request
of a maintenance interrupt when there will be no pending interrupt
in the LRs (HCR_NPIE).
At the point where the last pending interrupt in the LRs switches
from Pending to Active, the maintenance interrupt will be delivered,
allowing us to add the remaining SGIs using the same process.
Cc: stable@vger.kernel.org
Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework")
Acked-by: Christoffer Dall <cdall@kernel.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
2018-03-07 00:48:01 +03:00
void vgic_v2_set_npie ( struct kvm_vcpu * vcpu ) ;
2015-12-21 19:34:52 +03:00
int vgic_v2_has_attr_regs ( struct kvm_device * dev , struct kvm_device_attr * attr ) ;
2016-04-25 02:11:37 +03:00
int vgic_v2_dist_uaccess ( struct kvm_vcpu * vcpu , bool is_write ,
int offset , u32 * val ) ;
2015-12-03 14:48:42 +03:00
int vgic_v2_cpuif_uaccess ( struct kvm_vcpu * vcpu , bool is_write ,
int offset , u32 * val ) ;
2015-12-03 14:47:37 +03:00
void vgic_v2_set_vmcr ( struct kvm_vcpu * vcpu , struct vgic_vmcr * vmcr ) ;
void vgic_v2_get_vmcr ( struct kvm_vcpu * vcpu , struct vgic_vmcr * vmcr ) ;
2015-12-21 20:09:38 +03:00
void vgic_v2_enable ( struct kvm_vcpu * vcpu ) ;
2015-12-01 17:02:35 +03:00
int vgic_v2_probe ( const struct gic_kvm_info * info ) ;
2015-12-21 17:04:42 +03:00
int vgic_v2_map_resources ( struct kvm * kvm ) ;
2016-04-26 23:32:49 +03:00
int vgic_register_dist_iodev ( struct kvm * kvm , gpa_t dist_base_address ,
enum vgic_type ) ;
2015-11-26 20:19:25 +03:00
2017-03-18 15:56:56 +03:00
void vgic_v2_init_lrs ( void ) ;
2016-03-24 13:21:04 +03:00
void vgic_v2_load ( struct kvm_vcpu * vcpu ) ;
void vgic_v2_put ( struct kvm_vcpu * vcpu ) ;
KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block
Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer
touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or
its GICv2 equivalent) loaded as long as we can, only syncing it
back when we're scheduled out.
There is a small snag with that though: kvm_vgic_vcpu_pending_irq(),
which is indirectly called from kvm_vcpu_check_block(), needs to
evaluate the guest's view of ICC_PMR_EL1. At the point were we
call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever
changes to PMR is not visible in memory until we do a vcpu_put().
Things go really south if the guest does the following:
mov x0, #0 // or any small value masking interrupts
msr ICC_PMR_EL1, x0
[vcpu preempted, then rescheduled, VMCR sampled]
mov x0, #ff // allow all interrupts
msr ICC_PMR_EL1, x0
wfi // traps to EL2, so samping of VMCR
[interrupt arrives just after WFI]
Here, the hypervisor's view of PMR is zero, while the guest has enabled
its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no
interrupts are pending (despite an interrupt being received) and we'll
block for no reason. If the guest doesn't have a periodic interrupt
firing once it has blocked, it will stay there forever.
To avoid this unfortuante situation, let's resync VMCR from
kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block()
will observe the latest value of PMR.
This has been found by booting an arm64 Linux guest with the pseudo NMI
feature, and thus using interrupt priorities to mask interrupts instead
of the usual PSTATE masking.
Cc: stable@vger.kernel.org # 4.12
Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put")
Signed-off-by: Marc Zyngier <maz@kernel.org>
2019-08-02 12:28:32 +03:00
void vgic_v2_vmcr_sync ( struct kvm_vcpu * vcpu ) ;
2017-03-18 15:56:56 +03:00
2016-12-22 22:39:10 +03:00
void vgic_v2_save_state ( struct kvm_vcpu * vcpu ) ;
void vgic_v2_restore_state ( struct kvm_vcpu * vcpu ) ;
2016-07-17 13:27:23 +03:00
static inline void vgic_get_irq_kref ( struct vgic_irq * irq )
{
if ( irq - > intid < VGIC_MIN_LPI )
return ;
kref_get ( & irq - > refcount ) ;
}
2015-11-30 16:09:53 +03:00
void vgic_v3_fold_lr_state ( struct kvm_vcpu * vcpu ) ;
void vgic_v3_populate_lr ( struct kvm_vcpu * vcpu , struct vgic_irq * irq , int lr ) ;
void vgic_v3_clear_lr ( struct kvm_vcpu * vcpu , int lr ) ;
void vgic_v3_set_underflow ( struct kvm_vcpu * vcpu ) ;
KVM: arm/arm64: vgic: Don't populate multiple LRs with the same vintid
The vgic code is trying to be clever when injecting GICv2 SGIs,
and will happily populate LRs with the same interrupt number if
they come from multiple vcpus (after all, they are distinct
interrupt sources).
Unfortunately, this is against the letter of the architecture,
and the GICv2 architecture spec says "Each valid interrupt stored
in the List registers must have a unique VirtualID for that
virtual CPU interface.". GICv3 has similar (although slightly
ambiguous) restrictions.
This results in guests locking up when using GICv2-on-GICv3, for
example. The obvious fix is to stop trying so hard, and inject
a single vcpu per SGI per guest entry. After all, pending SGIs
with multiple source vcpus are pretty rare, and are mostly seen
in scenario where the physical CPUs are severely overcomitted.
But as we now only inject a single instance of a multi-source SGI per
vcpu entry, we may delay those interrupts for longer than strictly
necessary, and run the risk of injecting lower priority interrupts
in the meantime.
In order to address this, we adopt a three stage strategy:
- If we encounter a multi-source SGI in the AP list while computing
its depth, we force the list to be sorted
- When populating the LRs, we prevent the injection of any interrupt
of lower priority than that of the first multi-source SGI we've
injected.
- Finally, the injection of a multi-source SGI triggers the request
of a maintenance interrupt when there will be no pending interrupt
in the LRs (HCR_NPIE).
At the point where the last pending interrupt in the LRs switches
from Pending to Active, the maintenance interrupt will be delivered,
allowing us to add the remaining SGIs using the same process.
Cc: stable@vger.kernel.org
Fixes: 0919e84c0fc1 ("KVM: arm/arm64: vgic-new: Add IRQ sync/flush framework")
Acked-by: Christoffer Dall <cdall@kernel.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
2018-03-07 00:48:01 +03:00
void vgic_v3_set_npie ( struct kvm_vcpu * vcpu ) ;
2015-12-03 14:47:37 +03:00
void vgic_v3_set_vmcr ( struct kvm_vcpu * vcpu , struct vgic_vmcr * vmcr ) ;
void vgic_v3_get_vmcr ( struct kvm_vcpu * vcpu , struct vgic_vmcr * vmcr ) ;
2015-12-21 20:09:38 +03:00
void vgic_v3_enable ( struct kvm_vcpu * vcpu ) ;
2015-12-01 17:02:35 +03:00
int vgic_v3_probe ( const struct gic_kvm_info * info ) ;
2015-12-21 17:04:42 +03:00
int vgic_v3_map_resources ( struct kvm * kvm ) ;
2017-05-04 12:19:52 +03:00
int vgic_v3_lpi_sync_pending_status ( struct kvm * kvm , struct vgic_irq * irq ) ;
2017-01-09 18:28:27 +03:00
int vgic_v3_save_pending_tables ( struct kvm * kvm ) ;
2018-05-22 10:55:17 +03:00
int vgic_v3_set_redist_base ( struct kvm * kvm , u32 index , u64 addr , u32 count ) ;
2017-05-08 13:30:24 +03:00
int vgic_register_redist_iodev ( struct kvm_vcpu * vcpu ) ;
2017-05-08 13:23:51 +03:00
bool vgic_v3_check_base ( struct kvm * kvm ) ;
2016-09-12 17:49:18 +03:00
2016-03-24 13:21:04 +03:00
void vgic_v3_load ( struct kvm_vcpu * vcpu ) ;
void vgic_v3_put ( struct kvm_vcpu * vcpu ) ;
KVM: arm/arm64: Sync ICH_VMCR_EL2 back when about to block
Since commit commit 328e56647944 ("KVM: arm/arm64: vgic: Defer
touching GICH_VMCR to vcpu_load/put"), we leave ICH_VMCR_EL2 (or
its GICv2 equivalent) loaded as long as we can, only syncing it
back when we're scheduled out.
There is a small snag with that though: kvm_vgic_vcpu_pending_irq(),
which is indirectly called from kvm_vcpu_check_block(), needs to
evaluate the guest's view of ICC_PMR_EL1. At the point were we
call kvm_vcpu_check_block(), the vcpu is still loaded, and whatever
changes to PMR is not visible in memory until we do a vcpu_put().
Things go really south if the guest does the following:
mov x0, #0 // or any small value masking interrupts
msr ICC_PMR_EL1, x0
[vcpu preempted, then rescheduled, VMCR sampled]
mov x0, #ff // allow all interrupts
msr ICC_PMR_EL1, x0
wfi // traps to EL2, so samping of VMCR
[interrupt arrives just after WFI]
Here, the hypervisor's view of PMR is zero, while the guest has enabled
its interrupts. kvm_vgic_vcpu_pending_irq() will then say that no
interrupts are pending (despite an interrupt being received) and we'll
block for no reason. If the guest doesn't have a periodic interrupt
firing once it has blocked, it will stay there forever.
To avoid this unfortuante situation, let's resync VMCR from
kvm_arch_vcpu_blocking(), ensuring that a following kvm_vcpu_check_block()
will observe the latest value of PMR.
This has been found by booting an arm64 Linux guest with the pseudo NMI
feature, and thus using interrupt priorities to mask interrupts instead
of the usual PSTATE masking.
Cc: stable@vger.kernel.org # 4.12
Fixes: 328e56647944 ("KVM: arm/arm64: vgic: Defer touching GICH_VMCR to vcpu_load/put")
Signed-off-by: Marc Zyngier <maz@kernel.org>
2019-08-02 12:28:32 +03:00
void vgic_v3_vmcr_sync ( struct kvm_vcpu * vcpu ) ;
2016-03-24 13:21:04 +03:00
2016-07-15 14:43:30 +03:00
bool vgic_has_its ( struct kvm * kvm ) ;
2016-07-15 14:43:38 +03:00
int kvm_vgic_register_its_device ( void ) ;
2016-07-15 14:43:34 +03:00
void vgic_enable_lpis ( struct kvm_vcpu * vcpu ) ;
2019-04-02 08:36:23 +03:00
void vgic_flush_pending_lpis ( struct kvm_vcpu * vcpu ) ;
2016-07-15 14:43:37 +03:00
int vgic_its_inject_msi ( struct kvm * kvm , struct kvm_msi * msi ) ;
2017-01-26 17:20:47 +03:00
int vgic_v3_has_attr_regs ( struct kvm_device * dev , struct kvm_device_attr * attr ) ;
int vgic_v3_dist_uaccess ( struct kvm_vcpu * vcpu , bool is_write ,
int offset , u32 * val ) ;
int vgic_v3_redist_uaccess ( struct kvm_vcpu * vcpu , bool is_write ,
int offset , u32 * val ) ;
2017-01-26 17:20:51 +03:00
int vgic_v3_cpu_sysregs_uaccess ( struct kvm_vcpu * vcpu , bool is_write ,
u64 id , u64 * val ) ;
int vgic_v3_has_cpu_sysregs_attr ( struct kvm_vcpu * vcpu , bool is_write , u64 id ,
u64 * reg ) ;
2017-01-26 17:20:52 +03:00
int vgic_v3_line_level_info_uaccess ( struct kvm_vcpu * vcpu , bool is_write ,
u32 intid , u64 * val ) ;
2016-07-15 14:43:23 +03:00
int kvm_register_vgic_device ( unsigned long type ) ;
2017-01-26 17:20:50 +03:00
void vgic_set_vmcr ( struct kvm_vcpu * vcpu , struct vgic_vmcr * vmcr ) ;
void vgic_get_vmcr ( struct kvm_vcpu * vcpu , struct vgic_vmcr * vmcr ) ;
2015-12-21 20:09:38 +03:00
int vgic_lazy_init ( struct kvm * kvm ) ;
int vgic_init ( struct kvm * kvm ) ;
2015-11-30 16:01:58 +03:00
2018-05-29 19:22:04 +03:00
void vgic_debug_init ( struct kvm * kvm ) ;
void vgic_debug_destroy ( struct kvm * kvm ) ;
2017-01-18 01:09:13 +03:00
2017-03-23 13:51:52 +03:00
bool lock_all_vcpus ( struct kvm * kvm ) ;
void unlock_all_vcpus ( struct kvm * kvm ) ;
2017-09-01 12:41:52 +03:00
static inline int vgic_v3_max_apr_idx ( struct kvm_vcpu * vcpu )
{
struct vgic_cpu * cpu_if = & vcpu - > arch . vgic_cpu ;
/*
* num_pri_bits are initialized with HW supported values .
* We can rely safely on num_pri_bits even if VM has not
* restored ICC_CTLR_EL1 before restoring APnR registers .
*/
switch ( cpu_if - > num_pri_bits ) {
case 7 : return 3 ;
case 6 : return 1 ;
default : return 0 ;
}
}
2018-05-22 10:55:09 +03:00
static inline bool
vgic_v3_redist_region_full ( struct vgic_redist_region * region )
{
if ( ! region - > count )
return false ;
return ( region - > free_index > = region - > count ) ;
}
struct vgic_redist_region * vgic_v3_rdist_free_slot ( struct list_head * rdregs ) ;
2018-05-22 10:55:11 +03:00
static inline size_t
vgic_v3_rd_region_size ( struct kvm * kvm , struct vgic_redist_region * rdreg )
{
if ( ! rdreg - > count )
return atomic_read ( & kvm - > online_vcpus ) * KVM_VGIC_V3_REDIST_SIZE ;
else
return rdreg - > count * KVM_VGIC_V3_REDIST_SIZE ;
}
2018-05-22 10:55:17 +03:00
struct vgic_redist_region * vgic_v3_rdist_region_from_index ( struct kvm * kvm ,
u32 index ) ;
2018-05-22 10:55:11 +03:00
bool vgic_v3_rdist_overlap ( struct kvm * kvm , gpa_t base , size_t size ) ;
2018-05-22 10:55:12 +03:00
static inline bool vgic_dist_overlap ( struct kvm * kvm , gpa_t base , size_t size )
{
struct vgic_dist * d = & kvm - > arch . vgic ;
return ( base + size > d - > vgic_dist_base ) & &
( base < d - > vgic_dist_base + KVM_VGIC_V3_DIST_SIZE ) ;
}
2018-03-23 18:18:26 +03:00
int vgic_copy_lpi_list ( struct kvm * kvm , struct kvm_vcpu * vcpu , u32 * * intid_ptr ) ;
2017-10-27 17:28:35 +03:00
int vgic_its_resolve_lpi ( struct kvm * kvm , struct vgic_its * its ,
u32 devid , u32 eventid , struct vgic_irq * * irq ) ;
struct vgic_its * vgic_msi_to_its ( struct kvm * kvm , struct kvm_msi * msi ) ;
2019-03-18 13:29:30 +03:00
int vgic_its_inject_cached_translation ( struct kvm * kvm , struct kvm_msi * msi ) ;
2019-03-18 13:13:01 +03:00
void vgic_lpi_translation_cache_init ( struct kvm * kvm ) ;
void vgic_lpi_translation_cache_destroy ( struct kvm * kvm ) ;
2019-06-10 12:26:37 +03:00
void vgic_its_invalidate_cache ( struct kvm * kvm ) ;
2017-10-27 17:28:35 +03:00
2017-10-27 17:28:37 +03:00
bool vgic_supports_direct_msis ( struct kvm * kvm ) ;
2017-10-27 17:28:38 +03:00
int vgic_v4_init ( struct kvm * kvm ) ;
void vgic_v4_teardown ( struct kvm * kvm ) ;
2020-03-04 23:33:26 +03:00
void vgic_v4_configure_vsgis ( struct kvm * kvm ) ;
2017-10-27 17:28:37 +03:00
2015-11-24 18:51:12 +03:00
# endif