59112e9c39
Lockdep reports a circular lock dependency between the srcu and the config_lock: [ 262.179917] -> #1 (&kvm->srcu){.+.+}-{0:0}: [ 262.182010] __synchronize_srcu+0xb0/0x224 [ 262.183422] synchronize_srcu_expedited+0x24/0x34 [ 262.184554] kvm_io_bus_register_dev+0x324/0x50c [ 262.185650] vgic_register_redist_iodev+0x254/0x398 [ 262.186740] vgic_v3_set_redist_base+0x3b0/0x724 [ 262.188087] kvm_vgic_addr+0x364/0x600 [ 262.189189] vgic_set_common_attr+0x90/0x544 [ 262.190278] vgic_v3_set_attr+0x74/0x9c [ 262.191432] kvm_device_ioctl+0x2a0/0x4e4 [ 262.192515] __arm64_sys_ioctl+0x7ac/0x1ba8 [ 262.193612] invoke_syscall.constprop.0+0x70/0x1e0 [ 262.195006] do_el0_svc+0xe4/0x2d4 [ 262.195929] el0_svc+0x44/0x8c [ 262.196917] el0t_64_sync_handler+0xf4/0x120 [ 262.198238] el0t_64_sync+0x190/0x194 [ 262.199224] [ 262.199224] -> #0 (&kvm->arch.config_lock){+.+.}-{3:3}: [ 262.201094] __lock_acquire+0x2b70/0x626c [ 262.202245] lock_acquire+0x454/0x778 [ 262.203132] __mutex_lock+0x190/0x8b4 [ 262.204023] mutex_lock_nested+0x24/0x30 [ 262.205100] vgic_mmio_write_v3_misc+0x5c/0x2a0 [ 262.206178] dispatch_mmio_write+0xd8/0x258 [ 262.207498] __kvm_io_bus_write+0x1e0/0x350 [ 262.208582] kvm_io_bus_write+0xe0/0x1cc [ 262.209653] io_mem_abort+0x2ac/0x6d8 [ 262.210569] kvm_handle_guest_abort+0x9b8/0x1f88 [ 262.211937] handle_exit+0xc4/0x39c [ 262.212971] kvm_arch_vcpu_ioctl_run+0x90c/0x1c04 [ 262.214154] kvm_vcpu_ioctl+0x450/0x12f8 [ 262.215233] __arm64_sys_ioctl+0x7ac/0x1ba8 [ 262.216402] invoke_syscall.constprop.0+0x70/0x1e0 [ 262.217774] do_el0_svc+0xe4/0x2d4 [ 262.218758] el0_svc+0x44/0x8c [ 262.219941] el0t_64_sync_handler+0xf4/0x120 [ 262.221110] el0t_64_sync+0x190/0x194 Note that the current report, which can be triggered by the vgic_irq kselftest, is a triple chain that includes slots_lock, but after inverting the slots_lock/config_lock dependency, the actual problem reported above remains. In several places, the vgic code calls kvm_io_bus_register_dev(), which synchronizes the srcu, while holding config_lock (#1). And the MMIO handler takes the config_lock while holding the srcu read lock (#0). Break dependency #1, by registering the distributor and redistributors without holding config_lock. The ITS also uses kvm_io_bus_register_dev() but already relies on slots_lock to serialize calls. The distributor iodev is created on the first KVM_RUN call. Multiple threads will race for vgic initialization, and only the first one will see !vgic_ready() under the lock. To serialize those threads, rely on slots_lock rather than config_lock. Redistributors are created earlier, through KVM_DEV_ARM_VGIC_GRP_ADDR ioctls and vCPU creation. Similarly, serialize the iodev creation with slots_lock, and the rest with config_lock. Fixes: f00327731131 ("KVM: arm64: Use config_lock to protect vgic state") Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> Reviewed-by: Oliver Upton <oliver.upton@linux.dev> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20230518100914.2837292-2-jean-philippe@linaro.org
1119 lines
28 KiB
C
1119 lines
28 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* VGIC MMIO handling functions
|
|
*/
|
|
|
|
#include <linux/bitops.h>
|
|
#include <linux/bsearch.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/kvm.h>
|
|
#include <linux/kvm_host.h>
|
|
#include <kvm/iodev.h>
|
|
#include <kvm/arm_arch_timer.h>
|
|
#include <kvm/arm_vgic.h>
|
|
|
|
#include "vgic.h"
|
|
#include "vgic-mmio.h"
|
|
|
|
unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
return -1UL;
|
|
}
|
|
|
|
void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
|
|
unsigned int len, unsigned long val)
|
|
{
|
|
/* Ignore */
|
|
}
|
|
|
|
int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr,
|
|
unsigned int len, unsigned long val)
|
|
{
|
|
/* Ignore */
|
|
return 0;
|
|
}
|
|
|
|
unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
u32 value = 0;
|
|
int i;
|
|
|
|
/* Loop over all IRQs affected by this read */
|
|
for (i = 0; i < len * 8; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
if (irq->group)
|
|
value |= BIT(i);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
static void vgic_update_vsgi(struct vgic_irq *irq)
|
|
{
|
|
WARN_ON(its_prop_update_vsgi(irq->host_irq, irq->priority, irq->group));
|
|
}
|
|
|
|
void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr,
|
|
unsigned int len, unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for (i = 0; i < len * 8; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
irq->group = !!(val & BIT(i));
|
|
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
|
vgic_update_vsgi(irq);
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
} else {
|
|
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
|
}
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value
|
|
* of the enabled bit, so there is only one function for both here.
|
|
*/
|
|
unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
u32 value = 0;
|
|
int i;
|
|
|
|
/* Loop over all IRQs affected by this read */
|
|
for (i = 0; i < len * 8; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
if (irq->enabled)
|
|
value |= (1U << i);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
void vgic_mmio_write_senable(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
|
if (!irq->enabled) {
|
|
struct irq_data *data;
|
|
|
|
irq->enabled = true;
|
|
data = &irq_to_desc(irq->host_irq)->irq_data;
|
|
while (irqd_irq_disabled(data))
|
|
enable_irq(irq->host_irq);
|
|
}
|
|
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
|
|
continue;
|
|
} else if (vgic_irq_is_mapped_level(irq)) {
|
|
bool was_high = irq->line_level;
|
|
|
|
/*
|
|
* We need to update the state of the interrupt because
|
|
* the guest might have changed the state of the device
|
|
* while the interrupt was disabled at the VGIC level.
|
|
*/
|
|
irq->line_level = vgic_get_phys_line_level(irq);
|
|
/*
|
|
* Deactivate the physical interrupt so the GIC will let
|
|
* us know when it is asserted again.
|
|
*/
|
|
if (!irq->active && was_high && !irq->line_level)
|
|
vgic_irq_set_phys_active(irq, false);
|
|
}
|
|
irq->enabled = true;
|
|
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled)
|
|
disable_irq_nosync(irq->host_irq);
|
|
|
|
irq->enabled = false;
|
|
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
irq->enabled = true;
|
|
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
irq->enabled = false;
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static unsigned long __read_pending(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
bool is_user)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
u32 value = 0;
|
|
int i;
|
|
|
|
/* Loop over all IRQs affected by this read */
|
|
for (i = 0; i < len * 8; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
unsigned long flags;
|
|
bool val;
|
|
|
|
/*
|
|
* When used from userspace with a GICv3 model:
|
|
*
|
|
* Pending state of interrupt is latched in pending_latch
|
|
* variable. Userspace will save and restore pending state
|
|
* and line_level separately.
|
|
* Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
|
|
* for handling of ISPENDR and ICPENDR.
|
|
*/
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
|
int err;
|
|
|
|
val = false;
|
|
err = irq_get_irqchip_state(irq->host_irq,
|
|
IRQCHIP_STATE_PENDING,
|
|
&val);
|
|
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
|
|
} else if (!is_user && vgic_irq_is_mapped_level(irq)) {
|
|
val = vgic_get_phys_line_level(irq);
|
|
} else {
|
|
switch (vcpu->kvm->arch.vgic.vgic_model) {
|
|
case KVM_DEV_TYPE_ARM_VGIC_V3:
|
|
if (is_user) {
|
|
val = irq->pending_latch;
|
|
break;
|
|
}
|
|
fallthrough;
|
|
default:
|
|
val = irq_is_pending(irq);
|
|
break;
|
|
}
|
|
}
|
|
|
|
value |= ((u32)val << i);
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
return __read_pending(vcpu, addr, len, false);
|
|
}
|
|
|
|
unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
return __read_pending(vcpu, addr, len, true);
|
|
}
|
|
|
|
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
|
{
|
|
return (vgic_irq_is_sgi(irq->intid) &&
|
|
vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2);
|
|
}
|
|
|
|
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
/* GICD_ISPENDR0 SGI bits are WI */
|
|
if (is_vgic_v2_sgi(vcpu, irq)) {
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
continue;
|
|
}
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
|
|
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
|
/* HW SGI? Ask the GIC to inject it */
|
|
int err;
|
|
err = irq_set_irqchip_state(irq->host_irq,
|
|
IRQCHIP_STATE_PENDING,
|
|
true);
|
|
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
|
|
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
|
|
continue;
|
|
}
|
|
|
|
irq->pending_latch = true;
|
|
if (irq->hw)
|
|
vgic_irq_set_phys_active(irq, true);
|
|
|
|
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
irq->pending_latch = true;
|
|
|
|
/*
|
|
* GICv2 SGIs are terribly broken. We can't restore
|
|
* the source of the interrupt, so just pick the vcpu
|
|
* itself as the source...
|
|
*/
|
|
if (is_vgic_v2_sgi(vcpu, irq))
|
|
irq->source |= BIT(vcpu->vcpu_id);
|
|
|
|
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Must be called with irq->irq_lock held */
|
|
static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
|
|
{
|
|
irq->pending_latch = false;
|
|
|
|
/*
|
|
* We don't want the guest to effectively mask the physical
|
|
* interrupt by doing a write to SPENDR followed by a write to
|
|
* CPENDR for HW interrupts, so we clear the active state on
|
|
* the physical side if the virtual interrupt is not active.
|
|
* This may lead to taking an additional interrupt on the
|
|
* host, but that should not be a problem as the worst that
|
|
* can happen is an additional vgic injection. We also clear
|
|
* the pending state to maintain proper semantics for edge HW
|
|
* interrupts.
|
|
*/
|
|
vgic_irq_set_phys_pending(irq, false);
|
|
if (!irq->active)
|
|
vgic_irq_set_phys_active(irq, false);
|
|
}
|
|
|
|
void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
/* GICD_ICPENDR0 SGI bits are WI */
|
|
if (is_vgic_v2_sgi(vcpu, irq)) {
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
continue;
|
|
}
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
|
|
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
|
/* HW SGI? Ask the GIC to clear its pending bit */
|
|
int err;
|
|
err = irq_set_irqchip_state(irq->host_irq,
|
|
IRQCHIP_STATE_PENDING,
|
|
false);
|
|
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
|
|
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
|
|
continue;
|
|
}
|
|
|
|
if (irq->hw)
|
|
vgic_hw_irq_cpending(vcpu, irq);
|
|
else
|
|
irq->pending_latch = false;
|
|
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
/*
|
|
* More fun with GICv2 SGIs! If we're clearing one of them
|
|
* from userspace, which source vcpu to clear? Let's not
|
|
* even think of it, and blow the whole set.
|
|
*/
|
|
if (is_vgic_v2_sgi(vcpu, irq))
|
|
irq->source = 0;
|
|
|
|
irq->pending_latch = false;
|
|
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* If we are fiddling with an IRQ's active state, we have to make sure the IRQ
|
|
* is not queued on some running VCPU's LRs, because then the change to the
|
|
* active state can be overwritten when the VCPU's state is synced coming back
|
|
* from the guest.
|
|
*
|
|
* For shared interrupts as well as GICv3 private interrupts accessed from the
|
|
* non-owning CPU, we have to stop all the VCPUs because interrupts can be
|
|
* migrated while we don't hold the IRQ locks and we don't want to be chasing
|
|
* moving targets.
|
|
*
|
|
* For GICv2 private interrupts we don't have to do anything because
|
|
* userspace accesses to the VGIC state already require all VCPUs to be
|
|
* stopped, and only the VCPU itself can modify its private interrupts
|
|
* active state, which guarantees that the VCPU is not running.
|
|
*/
|
|
static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid)
|
|
{
|
|
if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
|
|
vcpu != kvm_get_running_vcpu()) ||
|
|
intid >= VGIC_NR_PRIVATE_IRQS)
|
|
kvm_arm_halt_guest(vcpu->kvm);
|
|
}
|
|
|
|
/* See vgic_access_active_prepare */
|
|
static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid)
|
|
{
|
|
if ((vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 &&
|
|
vcpu != kvm_get_running_vcpu()) ||
|
|
intid >= VGIC_NR_PRIVATE_IRQS)
|
|
kvm_arm_resume_guest(vcpu->kvm);
|
|
}
|
|
|
|
static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
u32 value = 0;
|
|
int i;
|
|
|
|
/* Loop over all IRQs affected by this read */
|
|
for (i = 0; i < len * 8; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
/*
|
|
* Even for HW interrupts, don't evaluate the HW state as
|
|
* all the guest is interested in is the virtual state.
|
|
*/
|
|
if (irq->active)
|
|
value |= (1U << i);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
u32 val;
|
|
|
|
mutex_lock(&vcpu->kvm->arch.config_lock);
|
|
vgic_access_active_prepare(vcpu, intid);
|
|
|
|
val = __vgic_mmio_read_active(vcpu, addr, len);
|
|
|
|
vgic_access_active_finish(vcpu, intid);
|
|
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
|
|
|
return val;
|
|
}
|
|
|
|
unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
return __vgic_mmio_read_active(vcpu, addr, len);
|
|
}
|
|
|
|
/* Must be called with irq->irq_lock held */
|
|
static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
|
bool active, bool is_uaccess)
|
|
{
|
|
if (is_uaccess)
|
|
return;
|
|
|
|
irq->active = active;
|
|
vgic_irq_set_phys_active(irq, active);
|
|
}
|
|
|
|
static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
|
|
bool active)
|
|
{
|
|
unsigned long flags;
|
|
struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu();
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
|
|
if (irq->hw && !vgic_irq_is_sgi(irq->intid)) {
|
|
vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu);
|
|
} else if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
|
|
/*
|
|
* GICv4.1 VSGI feature doesn't track an active state,
|
|
* so let's not kid ourselves, there is nothing we can
|
|
* do here.
|
|
*/
|
|
irq->active = false;
|
|
} else {
|
|
u32 model = vcpu->kvm->arch.vgic.vgic_model;
|
|
u8 active_source;
|
|
|
|
irq->active = active;
|
|
|
|
/*
|
|
* The GICv2 architecture indicates that the source CPUID for
|
|
* an SGI should be provided during an EOI which implies that
|
|
* the active state is stored somewhere, but at the same time
|
|
* this state is not architecturally exposed anywhere and we
|
|
* have no way of knowing the right source.
|
|
*
|
|
* This may lead to a VCPU not being able to receive
|
|
* additional instances of a particular SGI after migration
|
|
* for a GICv2 VM on some GIC implementations. Oh well.
|
|
*/
|
|
active_source = (requester_vcpu) ? requester_vcpu->vcpu_id : 0;
|
|
|
|
if (model == KVM_DEV_TYPE_ARM_VGIC_V2 &&
|
|
active && vgic_irq_is_sgi(irq->intid))
|
|
irq->active_source = active_source;
|
|
}
|
|
|
|
if (irq->active)
|
|
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
|
else
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
}
|
|
|
|
static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
vgic_mmio_change_active(vcpu, irq, false);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
|
|
mutex_lock(&vcpu->kvm->arch.config_lock);
|
|
vgic_access_active_prepare(vcpu, intid);
|
|
|
|
__vgic_mmio_write_cactive(vcpu, addr, len, val);
|
|
|
|
vgic_access_active_finish(vcpu, intid);
|
|
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
|
}
|
|
|
|
int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
__vgic_mmio_write_cactive(vcpu, addr, len, val);
|
|
return 0;
|
|
}
|
|
|
|
static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
int i;
|
|
|
|
for_each_set_bit(i, &val, len * 8) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
vgic_mmio_change_active(vcpu, irq, true);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
|
|
|
|
mutex_lock(&vcpu->kvm->arch.config_lock);
|
|
vgic_access_active_prepare(vcpu, intid);
|
|
|
|
__vgic_mmio_write_sactive(vcpu, addr, len, val);
|
|
|
|
vgic_access_active_finish(vcpu, intid);
|
|
mutex_unlock(&vcpu->kvm->arch.config_lock);
|
|
}
|
|
|
|
int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
__vgic_mmio_write_sactive(vcpu, addr, len, val);
|
|
return 0;
|
|
}
|
|
|
|
unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
|
|
int i;
|
|
u64 val = 0;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
val |= (u64)irq->priority << (i * 8);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return val;
|
|
}
|
|
|
|
/*
|
|
* We currently don't handle changing the priority of an interrupt that
|
|
* is already pending on a VCPU. If there is a need for this, we would
|
|
* need to make this VCPU exit and re-evaluate the priorities, potentially
|
|
* leading to this interrupt getting presented now to the guest (if it has
|
|
* been masked by the priority mask before).
|
|
*/
|
|
void vgic_mmio_write_priority(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 8);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for (i = 0; i < len; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
/* Narrow the priority range to what we actually support */
|
|
irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS);
|
|
if (irq->hw && vgic_irq_is_sgi(irq->intid))
|
|
vgic_update_vsgi(irq);
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
|
|
u32 value = 0;
|
|
int i;
|
|
|
|
for (i = 0; i < len * 4; i++) {
|
|
struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
if (irq->config == VGIC_CONFIG_EDGE)
|
|
value |= (2U << (i * 2));
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return value;
|
|
}
|
|
|
|
void vgic_mmio_write_config(struct kvm_vcpu *vcpu,
|
|
gpa_t addr, unsigned int len,
|
|
unsigned long val)
|
|
{
|
|
u32 intid = VGIC_ADDR_TO_INTID(addr, 2);
|
|
int i;
|
|
unsigned long flags;
|
|
|
|
for (i = 0; i < len * 4; i++) {
|
|
struct vgic_irq *irq;
|
|
|
|
/*
|
|
* The configuration cannot be changed for SGIs in general,
|
|
* for PPIs this is IMPLEMENTATION DEFINED. The arch timer
|
|
* code relies on PPIs being level triggered, so we also
|
|
* make them read-only here.
|
|
*/
|
|
if (intid + i < VGIC_NR_PRIVATE_IRQS)
|
|
continue;
|
|
|
|
irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
|
|
if (test_bit(i * 2 + 1, &val))
|
|
irq->config = VGIC_CONFIG_EDGE;
|
|
else
|
|
irq->config = VGIC_CONFIG_LEVEL;
|
|
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
u32 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid)
|
|
{
|
|
int i;
|
|
u32 val = 0;
|
|
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
|
|
|
|
for (i = 0; i < 32; i++) {
|
|
struct vgic_irq *irq;
|
|
|
|
if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
|
|
continue;
|
|
|
|
irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level)
|
|
val |= (1U << i);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
|
|
return val;
|
|
}
|
|
|
|
void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid,
|
|
const u32 val)
|
|
{
|
|
int i;
|
|
int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
|
|
unsigned long flags;
|
|
|
|
for (i = 0; i < 32; i++) {
|
|
struct vgic_irq *irq;
|
|
bool new_level;
|
|
|
|
if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs)
|
|
continue;
|
|
|
|
irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
|
|
|
|
/*
|
|
* Line level is set irrespective of irq type
|
|
* (level or edge) to avoid dependency that VM should
|
|
* restore irq config before line level.
|
|
*/
|
|
new_level = !!(val & (1U << i));
|
|
raw_spin_lock_irqsave(&irq->irq_lock, flags);
|
|
irq->line_level = new_level;
|
|
if (new_level)
|
|
vgic_queue_irq_unlock(vcpu->kvm, irq, flags);
|
|
else
|
|
raw_spin_unlock_irqrestore(&irq->irq_lock, flags);
|
|
|
|
vgic_put_irq(vcpu->kvm, irq);
|
|
}
|
|
}
|
|
|
|
static int match_region(const void *key, const void *elt)
|
|
{
|
|
const unsigned int offset = (unsigned long)key;
|
|
const struct vgic_register_region *region = elt;
|
|
|
|
if (offset < region->reg_offset)
|
|
return -1;
|
|
|
|
if (offset >= region->reg_offset + region->len)
|
|
return 1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct vgic_register_region *
|
|
vgic_find_mmio_region(const struct vgic_register_region *regions,
|
|
int nr_regions, unsigned int offset)
|
|
{
|
|
return bsearch((void *)(uintptr_t)offset, regions, nr_regions,
|
|
sizeof(regions[0]), match_region);
|
|
}
|
|
|
|
void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
|
|
{
|
|
if (kvm_vgic_global_state.type == VGIC_V2)
|
|
vgic_v2_set_vmcr(vcpu, vmcr);
|
|
else
|
|
vgic_v3_set_vmcr(vcpu, vmcr);
|
|
}
|
|
|
|
void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr)
|
|
{
|
|
if (kvm_vgic_global_state.type == VGIC_V2)
|
|
vgic_v2_get_vmcr(vcpu, vmcr);
|
|
else
|
|
vgic_v3_get_vmcr(vcpu, vmcr);
|
|
}
|
|
|
|
/*
|
|
* kvm_mmio_read_buf() returns a value in a format where it can be converted
|
|
* to a byte array and be directly observed as the guest wanted it to appear
|
|
* in memory if it had done the store itself, which is LE for the GIC, as the
|
|
* guest knows the GIC is always LE.
|
|
*
|
|
* We convert this value to the CPUs native format to deal with it as a data
|
|
* value.
|
|
*/
|
|
unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len)
|
|
{
|
|
unsigned long data = kvm_mmio_read_buf(val, len);
|
|
|
|
switch (len) {
|
|
case 1:
|
|
return data;
|
|
case 2:
|
|
return le16_to_cpu(data);
|
|
case 4:
|
|
return le32_to_cpu(data);
|
|
default:
|
|
return le64_to_cpu(data);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* kvm_mmio_write_buf() expects a value in a format such that if converted to
|
|
* a byte array it is observed as the guest would see it if it could perform
|
|
* the load directly. Since the GIC is LE, and the guest knows this, the
|
|
* guest expects a value in little endian format.
|
|
*
|
|
* We convert the data value from the CPUs native format to LE so that the
|
|
* value is returned in the proper format.
|
|
*/
|
|
void vgic_data_host_to_mmio_bus(void *buf, unsigned int len,
|
|
unsigned long data)
|
|
{
|
|
switch (len) {
|
|
case 1:
|
|
break;
|
|
case 2:
|
|
data = cpu_to_le16(data);
|
|
break;
|
|
case 4:
|
|
data = cpu_to_le32(data);
|
|
break;
|
|
default:
|
|
data = cpu_to_le64(data);
|
|
}
|
|
|
|
kvm_mmio_write_buf(buf, len, data);
|
|
}
|
|
|
|
static
|
|
struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev)
|
|
{
|
|
return container_of(dev, struct vgic_io_device, dev);
|
|
}
|
|
|
|
static bool check_region(const struct kvm *kvm,
|
|
const struct vgic_register_region *region,
|
|
gpa_t addr, int len)
|
|
{
|
|
int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS;
|
|
|
|
switch (len) {
|
|
case sizeof(u8):
|
|
flags = VGIC_ACCESS_8bit;
|
|
break;
|
|
case sizeof(u32):
|
|
flags = VGIC_ACCESS_32bit;
|
|
break;
|
|
case sizeof(u64):
|
|
flags = VGIC_ACCESS_64bit;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) {
|
|
if (!region->bits_per_irq)
|
|
return true;
|
|
|
|
/* Do we access a non-allocated IRQ? */
|
|
return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
const struct vgic_register_region *
|
|
vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
|
gpa_t addr, int len)
|
|
{
|
|
const struct vgic_register_region *region;
|
|
|
|
region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions,
|
|
addr - iodev->base_addr);
|
|
if (!region || !check_region(vcpu->kvm, region, addr, len))
|
|
return NULL;
|
|
|
|
return region;
|
|
}
|
|
|
|
static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
|
gpa_t addr, u32 *val)
|
|
{
|
|
const struct vgic_register_region *region;
|
|
struct kvm_vcpu *r_vcpu;
|
|
|
|
region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
|
|
if (!region) {
|
|
*val = 0;
|
|
return 0;
|
|
}
|
|
|
|
r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
|
|
if (region->uaccess_read)
|
|
*val = region->uaccess_read(r_vcpu, addr, sizeof(u32));
|
|
else
|
|
*val = region->read(r_vcpu, addr, sizeof(u32));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev,
|
|
gpa_t addr, const u32 *val)
|
|
{
|
|
const struct vgic_register_region *region;
|
|
struct kvm_vcpu *r_vcpu;
|
|
|
|
region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32));
|
|
if (!region)
|
|
return 0;
|
|
|
|
r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu;
|
|
if (region->uaccess_write)
|
|
return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val);
|
|
|
|
region->write(r_vcpu, addr, sizeof(u32), *val);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Userland access to VGIC registers.
|
|
*/
|
|
int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev,
|
|
bool is_write, int offset, u32 *val)
|
|
{
|
|
if (is_write)
|
|
return vgic_uaccess_write(vcpu, dev, offset, val);
|
|
else
|
|
return vgic_uaccess_read(vcpu, dev, offset, val);
|
|
}
|
|
|
|
static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
|
gpa_t addr, int len, void *val)
|
|
{
|
|
struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
|
|
const struct vgic_register_region *region;
|
|
unsigned long data = 0;
|
|
|
|
region = vgic_get_mmio_region(vcpu, iodev, addr, len);
|
|
if (!region) {
|
|
memset(val, 0, len);
|
|
return 0;
|
|
}
|
|
|
|
switch (iodev->iodev_type) {
|
|
case IODEV_CPUIF:
|
|
data = region->read(vcpu, addr, len);
|
|
break;
|
|
case IODEV_DIST:
|
|
data = region->read(vcpu, addr, len);
|
|
break;
|
|
case IODEV_REDIST:
|
|
data = region->read(iodev->redist_vcpu, addr, len);
|
|
break;
|
|
case IODEV_ITS:
|
|
data = region->its_read(vcpu->kvm, iodev->its, addr, len);
|
|
break;
|
|
}
|
|
|
|
vgic_data_host_to_mmio_bus(val, len, data);
|
|
return 0;
|
|
}
|
|
|
|
static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
|
|
gpa_t addr, int len, const void *val)
|
|
{
|
|
struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev);
|
|
const struct vgic_register_region *region;
|
|
unsigned long data = vgic_data_mmio_bus_to_host(val, len);
|
|
|
|
region = vgic_get_mmio_region(vcpu, iodev, addr, len);
|
|
if (!region)
|
|
return 0;
|
|
|
|
switch (iodev->iodev_type) {
|
|
case IODEV_CPUIF:
|
|
region->write(vcpu, addr, len, data);
|
|
break;
|
|
case IODEV_DIST:
|
|
region->write(vcpu, addr, len, data);
|
|
break;
|
|
case IODEV_REDIST:
|
|
region->write(iodev->redist_vcpu, addr, len, data);
|
|
break;
|
|
case IODEV_ITS:
|
|
region->its_write(vcpu->kvm, iodev->its, addr, len, data);
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
const struct kvm_io_device_ops kvm_io_gic_ops = {
|
|
.read = dispatch_mmio_read,
|
|
.write = dispatch_mmio_write,
|
|
};
|
|
|
|
int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address,
|
|
enum vgic_type type)
|
|
{
|
|
struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev;
|
|
unsigned int len;
|
|
|
|
switch (type) {
|
|
case VGIC_V2:
|
|
len = vgic_v2_init_dist_iodev(io_device);
|
|
break;
|
|
case VGIC_V3:
|
|
len = vgic_v3_init_dist_iodev(io_device);
|
|
break;
|
|
default:
|
|
BUG_ON(1);
|
|
}
|
|
|
|
io_device->base_addr = dist_base_address;
|
|
io_device->iodev_type = IODEV_DIST;
|
|
io_device->redist_vcpu = NULL;
|
|
|
|
return kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address,
|
|
len, &io_device->dev);
|
|
}
|