4d0b829881
Now that the irq_data_update_affinity helper exists, enforce its use by returning a a const cpumask from irq_data_get_affinity_mask. Since the previous commit already updated places that needed to call irq_data_update_affinity, this commit updates the remaining code that either did not modify the cpumask or immediately passed the modified mask to irq_set_affinity. Signed-off-by: Samuel Holland <samuel@sholland.org> Reviewed-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20220701200056.46555-8-samuel@sholland.org
365 lines
9.5 KiB
C
365 lines
9.5 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
* Irqdomain for Linux to run as the root partition on Microsoft Hypervisor.
|
|
*
|
|
* Authors:
|
|
* Sunil Muthuswamy <sunilmut@microsoft.com>
|
|
* Wei Liu <wei.liu@kernel.org>
|
|
*/
|
|
|
|
#include <linux/pci.h>
|
|
#include <linux/irq.h>
|
|
#include <asm/mshyperv.h>
|
|
|
|
static int hv_map_interrupt(union hv_device_id device_id, bool level,
|
|
int cpu, int vector, struct hv_interrupt_entry *entry)
|
|
{
|
|
struct hv_input_map_device_interrupt *input;
|
|
struct hv_output_map_device_interrupt *output;
|
|
struct hv_device_interrupt_descriptor *intr_desc;
|
|
unsigned long flags;
|
|
u64 status;
|
|
int nr_bank, var_size;
|
|
|
|
local_irq_save(flags);
|
|
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
|
|
|
|
intr_desc = &input->interrupt_descriptor;
|
|
memset(input, 0, sizeof(*input));
|
|
input->partition_id = hv_current_partition_id;
|
|
input->device_id = device_id.as_uint64;
|
|
intr_desc->interrupt_type = HV_X64_INTERRUPT_TYPE_FIXED;
|
|
intr_desc->vector_count = 1;
|
|
intr_desc->target.vector = vector;
|
|
|
|
if (level)
|
|
intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_LEVEL;
|
|
else
|
|
intr_desc->trigger_mode = HV_INTERRUPT_TRIGGER_MODE_EDGE;
|
|
|
|
intr_desc->target.vp_set.valid_bank_mask = 0;
|
|
intr_desc->target.vp_set.format = HV_GENERIC_SET_SPARSE_4K;
|
|
nr_bank = cpumask_to_vpset(&(intr_desc->target.vp_set), cpumask_of(cpu));
|
|
if (nr_bank < 0) {
|
|
local_irq_restore(flags);
|
|
pr_err("%s: unable to generate VP set\n", __func__);
|
|
return EINVAL;
|
|
}
|
|
intr_desc->target.flags = HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET;
|
|
|
|
/*
|
|
* var-sized hypercall, var-size starts after vp_mask (thus
|
|
* vp_set.format does not count, but vp_set.valid_bank_mask
|
|
* does).
|
|
*/
|
|
var_size = nr_bank + 1;
|
|
|
|
status = hv_do_rep_hypercall(HVCALL_MAP_DEVICE_INTERRUPT, 0, var_size,
|
|
input, output);
|
|
*entry = output->interrupt_entry;
|
|
|
|
local_irq_restore(flags);
|
|
|
|
if (!hv_result_success(status))
|
|
pr_err("%s: hypercall failed, status %lld\n", __func__, status);
|
|
|
|
return hv_result(status);
|
|
}
|
|
|
|
static int hv_unmap_interrupt(u64 id, struct hv_interrupt_entry *old_entry)
|
|
{
|
|
unsigned long flags;
|
|
struct hv_input_unmap_device_interrupt *input;
|
|
struct hv_interrupt_entry *intr_entry;
|
|
u64 status;
|
|
|
|
local_irq_save(flags);
|
|
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
|
|
|
|
memset(input, 0, sizeof(*input));
|
|
intr_entry = &input->interrupt_entry;
|
|
input->partition_id = hv_current_partition_id;
|
|
input->device_id = id;
|
|
*intr_entry = *old_entry;
|
|
|
|
status = hv_do_hypercall(HVCALL_UNMAP_DEVICE_INTERRUPT, input, NULL);
|
|
local_irq_restore(flags);
|
|
|
|
return hv_result(status);
|
|
}
|
|
|
|
#ifdef CONFIG_PCI_MSI
|
|
struct rid_data {
|
|
struct pci_dev *bridge;
|
|
u32 rid;
|
|
};
|
|
|
|
static int get_rid_cb(struct pci_dev *pdev, u16 alias, void *data)
|
|
{
|
|
struct rid_data *rd = data;
|
|
u8 bus = PCI_BUS_NUM(rd->rid);
|
|
|
|
if (pdev->bus->number != bus || PCI_BUS_NUM(alias) != bus) {
|
|
rd->bridge = pdev;
|
|
rd->rid = alias;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static union hv_device_id hv_build_pci_dev_id(struct pci_dev *dev)
|
|
{
|
|
union hv_device_id dev_id;
|
|
struct rid_data data = {
|
|
.bridge = NULL,
|
|
.rid = PCI_DEVID(dev->bus->number, dev->devfn)
|
|
};
|
|
|
|
pci_for_each_dma_alias(dev, get_rid_cb, &data);
|
|
|
|
dev_id.as_uint64 = 0;
|
|
dev_id.device_type = HV_DEVICE_TYPE_PCI;
|
|
dev_id.pci.segment = pci_domain_nr(dev->bus);
|
|
|
|
dev_id.pci.bdf.bus = PCI_BUS_NUM(data.rid);
|
|
dev_id.pci.bdf.device = PCI_SLOT(data.rid);
|
|
dev_id.pci.bdf.function = PCI_FUNC(data.rid);
|
|
dev_id.pci.source_shadow = HV_SOURCE_SHADOW_NONE;
|
|
|
|
if (data.bridge) {
|
|
int pos;
|
|
|
|
/*
|
|
* Microsoft Hypervisor requires a bus range when the bridge is
|
|
* running in PCI-X mode.
|
|
*
|
|
* To distinguish conventional vs PCI-X bridge, we can check
|
|
* the bridge's PCI-X Secondary Status Register, Secondary Bus
|
|
* Mode and Frequency bits. See PCI Express to PCI/PCI-X Bridge
|
|
* Specification Revision 1.0 5.2.2.1.3.
|
|
*
|
|
* Value zero means it is in conventional mode, otherwise it is
|
|
* in PCI-X mode.
|
|
*/
|
|
|
|
pos = pci_find_capability(data.bridge, PCI_CAP_ID_PCIX);
|
|
if (pos) {
|
|
u16 status;
|
|
|
|
pci_read_config_word(data.bridge, pos +
|
|
PCI_X_BRIDGE_SSTATUS, &status);
|
|
|
|
if (status & PCI_X_SSTATUS_FREQ) {
|
|
/* Non-zero, PCI-X mode */
|
|
u8 sec_bus, sub_bus;
|
|
|
|
dev_id.pci.source_shadow = HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE;
|
|
|
|
pci_read_config_byte(data.bridge, PCI_SECONDARY_BUS, &sec_bus);
|
|
dev_id.pci.shadow_bus_range.secondary_bus = sec_bus;
|
|
pci_read_config_byte(data.bridge, PCI_SUBORDINATE_BUS, &sub_bus);
|
|
dev_id.pci.shadow_bus_range.subordinate_bus = sub_bus;
|
|
}
|
|
}
|
|
}
|
|
|
|
return dev_id;
|
|
}
|
|
|
|
static int hv_map_msi_interrupt(struct pci_dev *dev, int cpu, int vector,
|
|
struct hv_interrupt_entry *entry)
|
|
{
|
|
union hv_device_id device_id = hv_build_pci_dev_id(dev);
|
|
|
|
return hv_map_interrupt(device_id, false, cpu, vector, entry);
|
|
}
|
|
|
|
static inline void entry_to_msi_msg(struct hv_interrupt_entry *entry, struct msi_msg *msg)
|
|
{
|
|
/* High address is always 0 */
|
|
msg->address_hi = 0;
|
|
msg->address_lo = entry->msi_entry.address.as_uint32;
|
|
msg->data = entry->msi_entry.data.as_uint32;
|
|
}
|
|
|
|
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry);
|
|
static void hv_irq_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
|
|
{
|
|
struct msi_desc *msidesc;
|
|
struct pci_dev *dev;
|
|
struct hv_interrupt_entry out_entry, *stored_entry;
|
|
struct irq_cfg *cfg = irqd_cfg(data);
|
|
const cpumask_t *affinity;
|
|
int cpu;
|
|
u64 status;
|
|
|
|
msidesc = irq_data_get_msi_desc(data);
|
|
dev = msi_desc_to_pci_dev(msidesc);
|
|
|
|
if (!cfg) {
|
|
pr_debug("%s: cfg is NULL", __func__);
|
|
return;
|
|
}
|
|
|
|
affinity = irq_data_get_effective_affinity_mask(data);
|
|
cpu = cpumask_first_and(affinity, cpu_online_mask);
|
|
|
|
if (data->chip_data) {
|
|
/*
|
|
* This interrupt is already mapped. Let's unmap first.
|
|
*
|
|
* We don't use retarget interrupt hypercalls here because
|
|
* Microsoft Hypervisor doens't allow root to change the vector
|
|
* or specify VPs outside of the set that is initially used
|
|
* during mapping.
|
|
*/
|
|
stored_entry = data->chip_data;
|
|
data->chip_data = NULL;
|
|
|
|
status = hv_unmap_msi_interrupt(dev, stored_entry);
|
|
|
|
kfree(stored_entry);
|
|
|
|
if (status != HV_STATUS_SUCCESS) {
|
|
pr_debug("%s: failed to unmap, status %lld", __func__, status);
|
|
return;
|
|
}
|
|
}
|
|
|
|
stored_entry = kzalloc(sizeof(*stored_entry), GFP_ATOMIC);
|
|
if (!stored_entry) {
|
|
pr_debug("%s: failed to allocate chip data\n", __func__);
|
|
return;
|
|
}
|
|
|
|
status = hv_map_msi_interrupt(dev, cpu, cfg->vector, &out_entry);
|
|
if (status != HV_STATUS_SUCCESS) {
|
|
kfree(stored_entry);
|
|
return;
|
|
}
|
|
|
|
*stored_entry = out_entry;
|
|
data->chip_data = stored_entry;
|
|
entry_to_msi_msg(&out_entry, msg);
|
|
|
|
return;
|
|
}
|
|
|
|
static int hv_unmap_msi_interrupt(struct pci_dev *dev, struct hv_interrupt_entry *old_entry)
|
|
{
|
|
return hv_unmap_interrupt(hv_build_pci_dev_id(dev).as_uint64, old_entry);
|
|
}
|
|
|
|
static void hv_teardown_msi_irq(struct pci_dev *dev, struct irq_data *irqd)
|
|
{
|
|
struct hv_interrupt_entry old_entry;
|
|
struct msi_msg msg;
|
|
u64 status;
|
|
|
|
if (!irqd->chip_data) {
|
|
pr_debug("%s: no chip data\n!", __func__);
|
|
return;
|
|
}
|
|
|
|
old_entry = *(struct hv_interrupt_entry *)irqd->chip_data;
|
|
entry_to_msi_msg(&old_entry, &msg);
|
|
|
|
kfree(irqd->chip_data);
|
|
irqd->chip_data = NULL;
|
|
|
|
status = hv_unmap_msi_interrupt(dev, &old_entry);
|
|
|
|
if (status != HV_STATUS_SUCCESS)
|
|
pr_err("%s: hypercall failed, status %lld\n", __func__, status);
|
|
}
|
|
|
|
static void hv_msi_free_irq(struct irq_domain *domain,
|
|
struct msi_domain_info *info, unsigned int virq)
|
|
{
|
|
struct irq_data *irqd = irq_get_irq_data(virq);
|
|
struct msi_desc *desc;
|
|
|
|
if (!irqd)
|
|
return;
|
|
|
|
desc = irq_data_get_msi_desc(irqd);
|
|
if (!desc || !desc->irq || WARN_ON_ONCE(!dev_is_pci(desc->dev)))
|
|
return;
|
|
|
|
hv_teardown_msi_irq(to_pci_dev(desc->dev), irqd);
|
|
}
|
|
|
|
/*
|
|
* IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
|
|
* which implement the MSI or MSI-X Capability Structure.
|
|
*/
|
|
static struct irq_chip hv_pci_msi_controller = {
|
|
.name = "HV-PCI-MSI",
|
|
.irq_unmask = pci_msi_unmask_irq,
|
|
.irq_mask = pci_msi_mask_irq,
|
|
.irq_ack = irq_chip_ack_parent,
|
|
.irq_retrigger = irq_chip_retrigger_hierarchy,
|
|
.irq_compose_msi_msg = hv_irq_compose_msi_msg,
|
|
.irq_set_affinity = msi_domain_set_affinity,
|
|
.flags = IRQCHIP_SKIP_SET_WAKE,
|
|
};
|
|
|
|
static struct msi_domain_ops pci_msi_domain_ops = {
|
|
.msi_free = hv_msi_free_irq,
|
|
.msi_prepare = pci_msi_prepare,
|
|
};
|
|
|
|
static struct msi_domain_info hv_pci_msi_domain_info = {
|
|
.flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
|
|
MSI_FLAG_PCI_MSIX,
|
|
.ops = &pci_msi_domain_ops,
|
|
.chip = &hv_pci_msi_controller,
|
|
.handler = handle_edge_irq,
|
|
.handler_name = "edge",
|
|
};
|
|
|
|
struct irq_domain * __init hv_create_pci_msi_domain(void)
|
|
{
|
|
struct irq_domain *d = NULL;
|
|
struct fwnode_handle *fn;
|
|
|
|
fn = irq_domain_alloc_named_fwnode("HV-PCI-MSI");
|
|
if (fn)
|
|
d = pci_msi_create_irq_domain(fn, &hv_pci_msi_domain_info, x86_vector_domain);
|
|
|
|
/* No point in going further if we can't get an irq domain */
|
|
BUG_ON(!d);
|
|
|
|
return d;
|
|
}
|
|
|
|
#endif /* CONFIG_PCI_MSI */
|
|
|
|
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry)
|
|
{
|
|
union hv_device_id device_id;
|
|
|
|
device_id.as_uint64 = 0;
|
|
device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
|
|
device_id.ioapic.ioapic_id = (u8)ioapic_id;
|
|
|
|
return hv_unmap_interrupt(device_id.as_uint64, entry);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_unmap_ioapic_interrupt);
|
|
|
|
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int cpu, int vector,
|
|
struct hv_interrupt_entry *entry)
|
|
{
|
|
union hv_device_id device_id;
|
|
|
|
device_id.as_uint64 = 0;
|
|
device_id.device_type = HV_DEVICE_TYPE_IOAPIC;
|
|
device_id.ioapic.ioapic_id = (u8)ioapic_id;
|
|
|
|
return hv_map_interrupt(device_id, level, cpu, vector, entry);
|
|
}
|
|
EXPORT_SYMBOL_GPL(hv_map_ioapic_interrupt);
|