From e5dfd093eca01a5d8d967f959a2372d7d82eb59c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 22:21:15 +0100 Subject: [PATCH 001/134] clocksource/drivers/hyper-v: Include asm/hyperv-tlfs.h not asm/mshyperv.h clocksource/hyperv_timer.h is included into the VDSO build. It includes asm/mshyperv.h which in turn includes the world and some more. This worked so far by chance, but any subtle change in the include chain results in a build breakage because VDSO builds are building user space libraries. Include asm/hyperv-tlfs.h instead which contains everything what the VDSO build needs except the hv_get_raw_timer() define. Move this define into a separate header file, which contains the prerequisites (msr.h) and is included by clocksource/hyperv_timer.h. Fixup drivers/hv/vmbus_drv.c which relies on the indirect include of asm/mshyperv.h. With that the VDSO build only pulls in the minimum requirements. Signed-off-by: Thomas Gleixner Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/87fsemtut0.ffs@tglx --- arch/x86/include/asm/hyperv_timer.h | 9 +++++++++ arch/x86/include/asm/mshyperv.h | 2 -- drivers/hv/vmbus_drv.c | 1 + include/clocksource/hyperv_timer.h | 4 +++- 4 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 arch/x86/include/asm/hyperv_timer.h diff --git a/arch/x86/include/asm/hyperv_timer.h b/arch/x86/include/asm/hyperv_timer.h new file mode 100644 index 000000000000..388fa81b8f38 --- /dev/null +++ b/arch/x86/include/asm/hyperv_timer.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_HYPERV_TIMER_H +#define _ASM_X86_HYPERV_TIMER_H + +#include + +#define hv_get_raw_timer() rdtsc_ordered() + +#endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 61f0c206bff0..6d502f3efb0f 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -19,8 +19,6 @@ typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, void *data); -#define hv_get_raw_timer() rdtsc_ordered() - void hyperv_vector_handler(struct pt_regs *regs); #if IS_ENABLED(CONFIG_HYPERV) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8b2e413bf19c..1f5d37ad74b9 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" struct vmbus_dynid { diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index b3f5d73ae1d6..b4a3935801ca 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -15,13 +15,15 @@ #include #include -#include +#include #define HV_MAX_MAX_DELTA_TICKS 0xffffffff #define HV_MIN_DELTA_TICKS 1 #ifdef CONFIG_HYPERV_TIMER +#include + /* Routines called by the VMbus driver */ extern int hv_stimer_alloc(bool have_percpu_irqs); extern int hv_stimer_cleanup(unsigned int cpu); From fd19ce77993a49f3afc56bb4cae7eafb1ec69e0c Mon Sep 17 00:00:00 2001 From: Angus Chen Date: Fri, 7 Oct 2022 18:32:36 +0800 Subject: [PATCH 002/134] genirq: Remove unused argument force of irq_set_affinity_deactivated() The force parameter in irq_set_affinity_deactivated() is not used, get rid of it. Signed-off-by: Angus Chen Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20221007103236.599-1-angus.chen@jaguarmicro.com --- kernel/irq/manage.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 40fe7806cc8c..5b7cf28df290 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -321,7 +321,7 @@ static int irq_try_set_affinity(struct irq_data *data, } static bool irq_set_affinity_deactivated(struct irq_data *data, - const struct cpumask *mask, bool force) + const struct cpumask *mask) { struct irq_desc *desc = irq_data_to_desc(data); @@ -354,7 +354,7 @@ int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask, if (!chip || !chip->irq_set_affinity) return -EINVAL; - if (irq_set_affinity_deactivated(data, mask, force)) + if (irq_set_affinity_deactivated(data, mask)) return 0; if (irq_can_move_pcntxt(data) && !irqd_is_setaffinity_pending(data)) { From fe97f59a78fefda6c4a8c8ee6a070b1f769fc801 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:15 +0100 Subject: [PATCH 003/134] PCI/MSI: Check for MSI enabled in __pci_msix_enable() PCI/MSI and PCI/MSI-X are mutually exclusive, but the MSI-X enable code lacks a check for already enabled MSI. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122013.653556720@linutronix.de --- drivers/pci/msi/msi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index fdd2ec09651e..160af9f01669 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -935,6 +935,11 @@ static int __pci_enable_msix_range(struct pci_dev *dev, if (maxvec < minvec) return -ERANGE; + if (dev->msi_enabled) { + pci_info(dev, "can't enable MSI-X (MSI already enabled)\n"); + return -EINVAL; + } + if (WARN_ON_ONCE(dev->msix_enabled)) return -EINVAL; From 527f378c42eaac0b48a8c6ff16da99a6177ff9e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:17 +0100 Subject: [PATCH 004/134] iommu/vt-d: Remove bogus check for multi MSI-X PCI/Multi-MSI is MSI specific and not supported for MSI-X. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Link: https://lore.kernel.org/r/20221111122013.713848846@linutronix.de --- drivers/iommu/intel/irq_remapping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 5962bb5027d0..0b80a272e9a7 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1334,8 +1334,7 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, if (!info || !iommu) return -EINVAL; - if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI && - info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX) + if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI) return -EINVAL; /* From 1c82f0d3fcdcb509a7ba1a2c8f58890155750b00 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:19 +0100 Subject: [PATCH 005/134] iommu/amd: Remove bogus check for multi MSI-X PCI/Multi-MSI is MSI specific and not supported for MSI-X Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20221111122013.772447165@linutronix.de --- drivers/iommu/amd/iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index d3b39d0416fa..8ece86484b27 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3294,8 +3294,7 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (!info) return -EINVAL; - if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI && - info->type != X86_IRQ_ALLOC_TYPE_PCI_MSIX) + if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI) return -EINVAL; /* From befd780253e774ea9388dd8dfad7c627a0aa7e02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:20 +0100 Subject: [PATCH 006/134] genirq/msi: Use MSI_DESC_ALL in msi_add_simple_msi_descs() There are no associated MSI descriptors in the requested range when the MSI descriptor allocation fails. Use MSI_DESC_ALL as the filter which prepares the next step to get rid of the filter for freeing. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122013.831151822@linutronix.de --- kernel/irq/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index a9ee535293eb..bba6359d8450 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -120,7 +120,7 @@ static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsi fail_mem: ret = -ENOMEM; fail: - msi_free_msi_descs_range(dev, MSI_DESC_NOTASSOCIATED, index, last); + msi_free_msi_descs_range(dev, MSI_DESC_ALL, index, last); return ret; } From 2f2940d168236a92df524a1bd99fc7b0325918b5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:22 +0100 Subject: [PATCH 007/134] genirq/msi: Remove filter from msi_free_descs_free_range() When a range of descriptors is freed then all of them are not associated to a linux interrupt. Remove the filter and add a warning to the free function. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122013.888850936@linutronix.de --- drivers/base/platform-msi.c | 2 +- include/linux/msi.h | 5 ++--- kernel/irq/msi.c | 19 ++++++++++--------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index 12b044151298..dddafa197693 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -325,7 +325,7 @@ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int vir msi_lock_descs(data->dev); irq_domain_free_irqs_common(domain, virq, nr_irqs); - msi_free_msi_descs_range(data->dev, MSI_DESC_ALL, virq, virq + nr_irqs - 1); + msi_free_msi_descs_range(data->dev, virq, virq + nr_irqs - 1); msi_unlock_descs(data->dev); } diff --git a/include/linux/msi.h b/include/linux/msi.h index fc918a658d48..969ce46369b3 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -247,8 +247,7 @@ static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) #endif /* CONFIG_PCI_MSI */ int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); -void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, - unsigned int first_index, unsigned int last_index); +void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index); /** * msi_free_msi_descs - Free MSI descriptors of a device @@ -256,7 +255,7 @@ void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, */ static inline void msi_free_msi_descs(struct device *dev) { - msi_free_msi_descs_range(dev, MSI_DESC_ALL, 0, MSI_MAX_INDEX); + msi_free_msi_descs_range(dev, 0, MSI_MAX_INDEX); } void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index bba6359d8450..1ca484698bda 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -120,7 +120,7 @@ static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsi fail_mem: ret = -ENOMEM; fail: - msi_free_msi_descs_range(dev, MSI_DESC_ALL, index, last); + msi_free_msi_descs_range(dev, index, last); return ret; } @@ -141,12 +141,11 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) /** * msi_free_msi_descs_range - Free MSI descriptors of a device * @dev: Device to free the descriptors - * @filter: Descriptor state filter * @first_index: Index to start freeing from * @last_index: Last index to be freed */ -void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, - unsigned int first_index, unsigned int last_index) +void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, + unsigned int last_index) { struct xarray *xa = &dev->msi.data->__store; struct msi_desc *desc; @@ -155,10 +154,12 @@ void msi_free_msi_descs_range(struct device *dev, enum msi_desc_filter filter, lockdep_assert_held(&dev->msi.data->mutex); xa_for_each_range(xa, idx, desc, first_index, last_index) { - if (msi_desc_match(desc, filter)) { - xa_erase(xa, idx); - msi_free_desc(desc); - } + xa_erase(xa, idx); + + /* Leak the descriptor when it is still referenced */ + if (WARN_ON_ONCE(msi_desc_match(desc, MSI_DESC_ASSOCIATED))) + continue; + msi_free_desc(desc); } } @@ -739,7 +740,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, fail: for (--virq; virq >= virq_base; virq--) irq_domain_free_irqs_common(domain, virq, 1); - msi_free_msi_descs_range(dev, MSI_DESC_ALL, virq_base, virq_base + nvec - 1); + msi_free_msi_descs_range(dev, virq_base, virq_base + nvec - 1); unlock: msi_unlock_descs(dev); return ret; From fdd5340411b25450612767270ac3cd0d8454183c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:23 +0100 Subject: [PATCH 008/134] genirq/msi: Add missing kernel doc to msi_next_desc() W=1 complains about this. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122013.947071142@linutronix.de --- kernel/irq/msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 1ca484698bda..8a6d0dcab7a2 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -282,6 +282,7 @@ EXPORT_SYMBOL_GPL(msi_first_desc); /** * msi_next_desc - Get the next MSI descriptor of a device * @dev: Device to operate on + * @filter: Descriptor state filter * * The first invocation of msi_next_desc() has to be preceeded by a * successful invocation of __msi_first_desc(). Consecutive invocations are From 762687ceb31fc296e2e1406559e8bb50251c5277 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:25 +0100 Subject: [PATCH 009/134] genirq/msi: Make __msi_domain_alloc_irqs() static Nothing outside of the core code requires this. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122014.004725919@linutronix.de --- include/linux/msi.h | 7 ++----- kernel/irq/msi.c | 6 ++++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 969ce46369b3..9b552ee58a37 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -334,9 +334,8 @@ struct msi_domain_info; * MSI_FLAG_USE_DEF_DOM_OPS is not set to avoid breaking existing users and * because these callbacks are obviously mandatory. * - * This is NOT meant to be abused, but it can be useful to build wrappers - * for specialized MSI irq domains which need extra work before and after - * calling __msi_domain_alloc_irqs()/__msi_domain_free_irqs(). + * __msi_domain_free_irqs() is exposed for PPC pseries to handle extra + * work after all interrupts and descriptors have been freed. */ struct msi_domain_ops { irq_hw_number_t (*get_hwirq)(struct msi_domain_info *info, @@ -425,8 +424,6 @@ int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec); int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, int nvec); int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 8a6d0dcab7a2..3ccc7f68804b 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -462,6 +462,8 @@ static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *de #endif /* !CONFIG_SYSFS */ #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); + static inline void irq_chip_write_msi_msg(struct irq_data *data, struct msi_msg *msg) { @@ -852,8 +854,8 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag return 0; } -int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec) +static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, + int nvec) { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; From f6d3486a3d2f3c67d732641834eec872fcc66472 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:27 +0100 Subject: [PATCH 010/134] genirq/msi: Provide msi_domain_ops:: Post_free() To prepare for removing the exposure of __msi_domain_free_irqs() provide a post_free() callback in the MSI domain ops which can be used to solve the problem of the only user of __msi_domain_free_irqs() in arch/powerpc. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122014.063153448@linutronix.de --- include/linux/msi.h | 4 ++++ kernel/irq/msi.c | 2 ++ 2 files changed, 6 insertions(+) diff --git a/include/linux/msi.h b/include/linux/msi.h index 9b552ee58a37..7f6aba9d6b4e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -315,6 +315,8 @@ struct msi_domain_info; * function. * @domain_free_irqs: Optional function to override the default free * function. + * @msi_post_free: Optional function which is invoked after freeing + * all interrupts. * * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. @@ -359,6 +361,8 @@ struct msi_domain_ops { struct device *dev, int nvec); void (*domain_free_irqs)(struct irq_domain *domain, struct device *dev); + void (*msi_post_free)(struct irq_domain *domain, + struct device *dev); }; /** diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 3ccc7f68804b..c71c37d3fa34 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1026,6 +1026,8 @@ void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device lockdep_assert_held(&dev->msi.data->mutex); ops->domain_free_irqs(domain, dev); + if (ops->msi_post_free) + ops->msi_post_free(domain, dev); msi_domain_free_msi_descs(info, dev); } From c4bc51b1dd611fcce53849254c6582334368c715 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:28 +0100 Subject: [PATCH 011/134] powerpc/pseries/msi: Use msi_domain_ops:: Msi_post_free() Use the new msi_post_free() callback which is invoked after the interrupts have been freed to tell the hypervisor about the shutdown. This allows to remove the exposure of __msi_domain_free_irqs(). Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122014.120489922@linutronix.de --- arch/powerpc/platforms/pseries/msi.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index a3a71d37cb9a..3f05507e444d 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -447,21 +447,18 @@ static void pseries_msi_ops_msi_free(struct irq_domain *domain, * RTAS can not disable one MSI at a time. It's all or nothing. Do it * at the end after all IRQs have been freed. */ -static void pseries_msi_domain_free_irqs(struct irq_domain *domain, - struct device *dev) +static void pseries_msi_post_free(struct irq_domain *domain, struct device *dev) { if (WARN_ON_ONCE(!dev_is_pci(dev))) return; - __msi_domain_free_irqs(domain, dev); - rtas_disable_msi(to_pci_dev(dev)); } static struct msi_domain_ops pseries_pci_msi_domain_ops = { .msi_prepare = pseries_msi_ops_prepare, .msi_free = pseries_msi_ops_msi_free, - .domain_free_irqs = pseries_msi_domain_free_irqs, + .msi_post_free = pseries_msi_post_free, }; static void pseries_msi_shutdown(struct irq_data *d) From 057c97a1cd665ebb38841418adcd35f57b33cc21 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:30 +0100 Subject: [PATCH 012/134] genirq/msi: Make __msi_domain_free_irqs() static Now that the last user is gone, confine it to the core code. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122014.179595843@linutronix.de --- include/linux/msi.h | 4 ---- kernel/irq/msi.c | 3 ++- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 7f6aba9d6b4e..ee735ffeabd4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -335,9 +335,6 @@ struct msi_domain_info; * are set to the default implementation if NULL and even when * MSI_FLAG_USE_DEF_DOM_OPS is not set to avoid breaking existing users and * because these callbacks are obviously mandatory. - * - * __msi_domain_free_irqs() is exposed for PPC pseries to handle extra - * work after all interrupts and descriptors have been freed. */ struct msi_domain_ops { irq_hw_number_t (*get_hwirq)(struct msi_domain_info *info, @@ -432,7 +429,6 @@ int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device int nvec); int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); -void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c71c37d3fa34..a2efa006752e 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -463,6 +463,7 @@ static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *de #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); +static void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); static inline void irq_chip_write_msi_msg(struct irq_data *data, struct msi_msg *msg) @@ -978,7 +979,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nve return ret; } -void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +static void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) { struct msi_domain_info *info = domain->host_data; struct irq_data *irqd; From aeef20527c87fed37c6f159d9eafd063a099f6ed Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:32 +0100 Subject: [PATCH 013/134] genirq/irqdomain: Move bus token enum into a seperate header Split the bus token defines out into a seperate header file to avoid inclusion of irqdomain.h in msi.h. Signed-off-by: Thomas Gleixner Reviewed-by: Ashok Raj Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122014.237221143@linutronix.de --- include/linux/irqdomain.h | 22 +--------------------- include/linux/irqdomain_defs.h | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 21 deletions(-) create mode 100644 include/linux/irqdomain_defs.h diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 00d577f90883..a4af7f81661b 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -31,6 +31,7 @@ #define _LINUX_IRQDOMAIN_H #include +#include #include #include #include @@ -68,27 +69,6 @@ struct irq_fwspec { void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, unsigned int count, struct irq_fwspec *fwspec); -/* - * Should several domains have the same device node, but serve - * different purposes (for example one domain is for PCI/MSI, and the - * other for wired IRQs), they can be distinguished using a - * bus-specific token. Most domains are expected to only carry - * DOMAIN_BUS_ANY. - */ -enum irq_domain_bus_token { - DOMAIN_BUS_ANY = 0, - DOMAIN_BUS_WIRED, - DOMAIN_BUS_GENERIC_MSI, - DOMAIN_BUS_PCI_MSI, - DOMAIN_BUS_PLATFORM_MSI, - DOMAIN_BUS_NEXUS, - DOMAIN_BUS_IPI, - DOMAIN_BUS_FSL_MC_MSI, - DOMAIN_BUS_TI_SCI_INTA_MSI, - DOMAIN_BUS_WAKEUP, - DOMAIN_BUS_VMD_MSI, -}; - /** * struct irq_domain_ops - Methods for irq_domain objects * @match: Match an interrupt controller device node to a host, returns diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h new file mode 100644 index 000000000000..69035b4f740a --- /dev/null +++ b/include/linux/irqdomain_defs.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IRQDOMAIN_DEFS_H +#define _LINUX_IRQDOMAIN_DEFS_H + +/* + * Should several domains have the same device node, but serve + * different purposes (for example one domain is for PCI/MSI, and the + * other for wired IRQs), they can be distinguished using a + * bus-specific token. Most domains are expected to only carry + * DOMAIN_BUS_ANY. + */ +enum irq_domain_bus_token { + DOMAIN_BUS_ANY = 0, + DOMAIN_BUS_WIRED, + DOMAIN_BUS_GENERIC_MSI, + DOMAIN_BUS_PCI_MSI, + DOMAIN_BUS_PLATFORM_MSI, + DOMAIN_BUS_NEXUS, + DOMAIN_BUS_IPI, + DOMAIN_BUS_FSL_MC_MSI, + DOMAIN_BUS_TI_SCI_INTA_MSI, + DOMAIN_BUS_WAKEUP, + DOMAIN_BUS_VMD_MSI, +}; + +#endif /* _LINUX_IRQDOMAIN_DEFS_H */ From 22db089a4437a72277677f99717af499560b13f2 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:33 +0100 Subject: [PATCH 014/134] genirq/msi: Add bus token to struct msi_domain_info Add a bus token member to struct msi_domain_info and let msi_create_irq_domain() set the bus token. That allows to remove the bus token updates at the call sites. Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122014.294554462@linutronix.de --- include/linux/msi.h | 19 +++++++++++-------- kernel/irq/msi.c | 7 +++++-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index ee735ffeabd4..2dfd7b2bdfbe 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -16,6 +16,7 @@ * abuse. The only function which is relevant for drivers is msi_get_virq(). */ +#include #include #include #include @@ -365,6 +366,7 @@ struct msi_domain_ops { /** * struct msi_domain_info - MSI interrupt domain data * @flags: Flags to decribe features and capabilities + * @bus_token: The domain bus token * @ops: The callback data structure * @chip: Optional: associated interrupt chip * @chip_data: Optional: associated interrupt chip data @@ -374,14 +376,15 @@ struct msi_domain_ops { * @data: Optional: domain specific data */ struct msi_domain_info { - u32 flags; - struct msi_domain_ops *ops; - struct irq_chip *chip; - void *chip_data; - irq_flow_handler_t handler; - void *handler_data; - const char *handler_name; - void *data; + u32 flags; + enum irq_domain_bus_token bus_token; + struct msi_domain_ops *ops; + struct irq_chip *chip; + void *chip_data; + irq_flow_handler_t handler; + void *handler_data; + const char *handler_name; + void *data; }; /* Flags for msi_domain_info */ diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index a2efa006752e..b46b74788087 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -694,8 +694,11 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, fwnode, &msi_domain_ops, info); - if (domain && !domain->name && info->chip) - domain->name = info->chip->name; + if (domain) { + if (!domain->name && info->chip) + domain->name = info->chip->name; + irq_domain_update_bus_token(domain, info->bus_token); + } return domain; } From 38c0c10ae6a3d386c50e182227f606d8243124b8 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:35 +0100 Subject: [PATCH 015/134] PCI/MSI: Use msi_domain_info:: Bus_token Set the bus token in the msi_domain_info structure and let the core code handle the update. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.352437595@linutronix.de --- drivers/pci/msi/irqdomain.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index e9cf318e6670..7766fa6c475c 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -162,8 +162,6 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent) { - struct irq_domain *domain; - if (WARN_ON(info->flags & MSI_FLAG_LEVEL_CAPABLE)) info->flags &= ~MSI_FLAG_LEVEL_CAPABLE; @@ -178,13 +176,10 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, /* PCI-MSI is oneshot-safe */ info->chip->flags |= IRQCHIP_ONESHOT_SAFE; + /* Let the core update the bus token */ + info->bus_token = DOMAIN_BUS_PCI_MSI; - domain = msi_create_irq_domain(fwnode, info, parent); - if (!domain) - return NULL; - - irq_domain_update_bus_token(domain, DOMAIN_BUS_PCI_MSI); - return domain; + return msi_create_irq_domain(fwnode, info, parent); } EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain); From b2bdda205c0c256d6483231d0afe58a6d68fd3ed Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:37 +0100 Subject: [PATCH 016/134] PCI/MSI: Let the MSI core free descriptors Let the core do the freeing of descriptors and just keep it around for the legacy case. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.409654736@linutronix.de --- drivers/pci/msi/irqdomain.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 7766fa6c475c..edd0cc204e06 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -24,11 +24,12 @@ void pci_msi_teardown_msi_irqs(struct pci_dev *dev) struct irq_domain *domain; domain = dev_get_msi_domain(&dev->dev); - if (domain && irq_domain_is_hierarchy(domain)) + if (domain && irq_domain_is_hierarchy(domain)) { msi_domain_free_irqs_descs_locked(domain, &dev->dev); - else + } else { pci_msi_legacy_teardown_msi_irqs(dev); - msi_free_msi_descs(&dev->dev); + msi_free_msi_descs(&dev->dev); + } } /** @@ -170,6 +171,9 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) pci_msi_domain_update_chip_ops(info); + /* Let the core code free MSI descriptors when freeing interrupts */ + info->flags |= MSI_FLAG_FREE_MSI_DESCS; + info->flags |= MSI_FLAG_ACTIVATE_EARLY | MSI_FLAG_DEV_SYSFS; if (IS_ENABLED(CONFIG_GENERIC_IRQ_RESERVATION_MODE)) info->flags |= MSI_FLAG_MUST_REACTIVATE; From a474d3fbe287625c6c1cfc56c2a456c5fb7c479e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:38 +0100 Subject: [PATCH 017/134] PCI/MSI: Get rid of PCI_MSI_IRQ_DOMAIN What a zoo: PCI_MSI select GENERIC_MSI_IRQ PCI_MSI_IRQ_DOMAIN def_bool y depends on PCI_MSI select GENERIC_MSI_IRQ_DOMAIN Ergo PCI_MSI enables PCI_MSI_IRQ_DOMAIN which in turn selects GENERIC_MSI_IRQ_DOMAIN. So all the dependencies on PCI_MSI_IRQ_DOMAIN are just an indirection to PCI_MSI. Match the reality and just admit that PCI_MSI requires GENERIC_MSI_IRQ_DOMAIN. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.467556921@linutronix.de --- arch/um/drivers/Kconfig | 1 - arch/um/include/asm/pci.h | 2 +- arch/x86/Kconfig | 1 - arch/x86/include/asm/pci.h | 4 +-- drivers/pci/Kconfig | 8 ++--- drivers/pci/controller/Kconfig | 30 ++++++++-------- drivers/pci/controller/dwc/Kconfig | 48 ++++++++++++------------- drivers/pci/controller/mobiveil/Kconfig | 6 ++-- drivers/pci/msi/Makefile | 2 +- drivers/pci/probe.c | 2 -- include/linux/msi.h | 29 ++++++--------- 11 files changed, 58 insertions(+), 75 deletions(-) diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig index 5903e2b598aa..a4f0a19fbe14 100644 --- a/arch/um/drivers/Kconfig +++ b/arch/um/drivers/Kconfig @@ -381,7 +381,6 @@ config UML_PCI_OVER_VIRTIO select UML_IOMEM_EMULATION select UML_DMA_EMULATION select PCI_MSI - select PCI_MSI_IRQ_DOMAIN select PCI_LOCKLESS_CONFIG config UML_PCI_OVER_VIRTIO_DEVICE_ID diff --git a/arch/um/include/asm/pci.h b/arch/um/include/asm/pci.h index 34fe4921b5fa..238d2e7faff8 100644 --- a/arch/um/include/asm/pci.h +++ b/arch/um/include/asm/pci.h @@ -7,7 +7,7 @@ /* Generic PCI */ #include -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +#ifdef CONFIG_PCI_MSI /* * This is a bit of an annoying hack, and it assumes we only have * the virt-pci (if anything). Which is true, but still. diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 67745ceab0db..64f81a10bfc8 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1109,7 +1109,6 @@ config X86_LOCAL_APIC def_bool y depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI select IRQ_DOMAIN_HIERARCHY - select PCI_MSI_IRQ_DOMAIN if PCI_MSI config X86_IO_APIC def_bool y diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 736793d65bcb..c4789de0da09 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -21,7 +21,7 @@ struct pci_sysdata { #ifdef CONFIG_X86_64 void *iommu; /* IOMMU private data */ #endif -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +#ifdef CONFIG_PCI_MSI void *fwnode; /* IRQ domain for MSI assignment */ #endif #if IS_ENABLED(CONFIG_VMD) @@ -52,7 +52,7 @@ static inline int pci_proc_domain(struct pci_bus *bus) } #endif -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +#ifdef CONFIG_PCI_MSI static inline void *_pci_root_bus_fwnode(struct pci_bus *bus) { return to_pci_sysdata(bus)->fwnode; diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index 55c028af4bd9..b7f62214668a 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -38,6 +38,7 @@ source "drivers/pci/pcie/Kconfig" config PCI_MSI bool "Message Signaled Interrupts (MSI and MSI-X)" + select GENERIC_MSI_IRQ_DOMAIN select GENERIC_MSI_IRQ help This allows device drivers to enable MSI (Message Signaled @@ -51,11 +52,6 @@ config PCI_MSI If you don't know what to do here, say Y. -config PCI_MSI_IRQ_DOMAIN - def_bool y - depends on PCI_MSI - select GENERIC_MSI_IRQ_DOMAIN - config PCI_MSI_ARCH_FALLBACKS bool @@ -192,7 +188,7 @@ config PCI_LABEL config PCI_HYPERV tristate "Hyper-V PCI Frontend" - depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && SYSFS + depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && SYSFS select PCI_HYPERV_INTERFACE help The PCI device frontend driver allows the kernel to import arbitrary diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig index bfd9bac37e24..1569d9a3ada0 100644 --- a/drivers/pci/controller/Kconfig +++ b/drivers/pci/controller/Kconfig @@ -19,7 +19,7 @@ config PCI_AARDVARK tristate "Aardvark PCIe controller" depends on (ARCH_MVEBU && ARM64) || COMPILE_TEST depends on OF - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCI_BRIDGE_EMUL help Add support for Aardvark 64bit PCIe Host Controller. This @@ -29,7 +29,7 @@ config PCI_AARDVARK config PCIE_XILINX_NWL bool "NWL PCIe Core" depends on ARCH_ZYNQMP || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI help Say 'Y' here if you want kernel support for Xilinx NWL PCIe controller. The controller can act as Root Port @@ -53,7 +53,7 @@ config PCI_IXP4XX config PCI_TEGRA bool "NVIDIA Tegra PCIe controller" depends on ARCH_TEGRA || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI help Say Y here if you want support for the PCIe host controller found on NVIDIA Tegra SoCs. @@ -70,7 +70,7 @@ config PCI_RCAR_GEN2 config PCIE_RCAR_HOST bool "Renesas R-Car PCIe host controller" depends on ARCH_RENESAS || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI help Say Y here if you want PCIe controller support on R-Car SoCs in host mode. @@ -99,7 +99,7 @@ config PCI_HOST_GENERIC config PCIE_XILINX bool "Xilinx AXI PCIe host bridge support" depends on OF || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI help Say 'Y' here if you want kernel to support the Xilinx AXI PCIe Host Bridge driver. @@ -124,7 +124,7 @@ config PCI_XGENE config PCI_XGENE_MSI bool "X-Gene v1 PCIe MSI feature" depends on PCI_XGENE - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI default y help Say Y here if you want PCIe MSI support for the APM X-Gene v1 SoC. @@ -170,7 +170,7 @@ config PCIE_IPROC_BCMA config PCIE_IPROC_MSI bool "Broadcom iProc PCIe MSI support" depends on PCIE_IPROC_PLATFORM || PCIE_IPROC_BCMA - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI default ARCH_BCM_IPROC help Say Y here if you want to enable MSI support for Broadcom's iProc @@ -186,7 +186,7 @@ config PCIE_ALTERA config PCIE_ALTERA_MSI tristate "Altera PCIe MSI feature" depends on PCIE_ALTERA - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI help Say Y here if you want PCIe MSI support for the Altera FPGA. This MSI driver supports Altera MSI to GIC controller IP. @@ -215,7 +215,7 @@ config PCIE_ROCKCHIP_HOST tristate "Rockchip PCIe host controller" depends on ARCH_ROCKCHIP || COMPILE_TEST depends on OF - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select MFD_SYSCON select PCIE_ROCKCHIP help @@ -239,7 +239,7 @@ config PCIE_MEDIATEK tristate "MediaTek PCIe controller" depends on ARCH_AIROHA || ARCH_MEDIATEK || COMPILE_TEST depends on OF - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI help Say Y here if you want to enable PCIe controller support on MediaTek SoCs. @@ -247,7 +247,7 @@ config PCIE_MEDIATEK config PCIE_MEDIATEK_GEN3 tristate "MediaTek Gen3 PCIe controller" depends on ARCH_MEDIATEK || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI help Adds support for PCIe Gen3 MAC controller for MediaTek SoCs. This PCIe controller is compatible with Gen3, Gen2 and Gen1 speed, @@ -277,7 +277,7 @@ config PCIE_BRCMSTB depends on ARCH_BRCMSTB || ARCH_BCM2835 || ARCH_BCMBCA || \ BMIPS_GENERIC || COMPILE_TEST depends on OF - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI default ARCH_BRCMSTB || BMIPS_GENERIC help Say Y here to enable PCIe host controller support for @@ -285,7 +285,7 @@ config PCIE_BRCMSTB config PCI_HYPERV_INTERFACE tristate "Hyper-V PCI Interface" - depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN + depends on ((X86 && X86_64) || ARM64) && HYPERV && PCI_MSI && PCI_MSI help The Hyper-V PCI Interface is a helper driver allows other drivers to have a common interface with the Hyper-V PCI frontend driver. @@ -303,8 +303,6 @@ config PCI_LOONGSON config PCIE_MICROCHIP_HOST bool "Microchip AXI PCIe host bridge support" depends on PCI_MSI && OF - select PCI_MSI_IRQ_DOMAIN - select GENERIC_MSI_IRQ_DOMAIN select PCI_HOST_COMMON help Say Y here if you want kernel to support the Microchip AXI PCIe @@ -326,7 +324,7 @@ config PCIE_APPLE tristate "Apple PCIe controller" depends on ARCH_APPLE || COMPILE_TEST depends on OF - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCI_HOST_COMMON help Say Y here if you want to enable PCIe controller support on Apple diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index 62ce3abf0f19..f3c462130627 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -21,7 +21,7 @@ config PCI_DRA7XX_HOST tristate "TI DRA7xx PCIe controller Host Mode" depends on SOC_DRA7XX || COMPILE_TEST depends on OF && HAS_IOMEM && TI_PIPE3 - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PCI_DRA7XX default y if SOC_DRA7XX @@ -53,7 +53,7 @@ config PCIE_DW_PLAT config PCIE_DW_PLAT_HOST bool "Platform bus based DesignWare PCIe Controller - Host mode" - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PCIE_DW_PLAT help @@ -67,7 +67,7 @@ config PCIE_DW_PLAT_HOST config PCIE_DW_PLAT_EP bool "Platform bus based DesignWare PCIe Controller - Endpoint mode" - depends on PCI && PCI_MSI_IRQ_DOMAIN + depends on PCI && PCI_MSI depends on PCI_ENDPOINT select PCIE_DW_EP select PCIE_DW_PLAT @@ -83,7 +83,7 @@ config PCIE_DW_PLAT_EP config PCI_EXYNOS tristate "Samsung Exynos PCIe controller" depends on ARCH_EXYNOS || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Enables support for the PCIe controller in the Samsung Exynos SoCs @@ -94,13 +94,13 @@ config PCI_EXYNOS config PCI_IMX6 bool "Freescale i.MX6/7/8 PCIe controller" depends on ARCH_MXC || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST config PCIE_SPEAR13XX bool "STMicroelectronics SPEAr PCIe controller" depends on ARCH_SPEAR13XX || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say Y here if you want PCIe support on SPEAr13XX SoCs. @@ -111,7 +111,7 @@ config PCI_KEYSTONE config PCI_KEYSTONE_HOST bool "PCI Keystone Host Mode" depends on ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PCI_KEYSTONE help @@ -135,7 +135,7 @@ config PCI_KEYSTONE_EP config PCI_LAYERSCAPE bool "Freescale Layerscape PCIe controller - Host mode" depends on OF && (ARM || ARCH_LAYERSCAPE || COMPILE_TEST) - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select MFD_SYSCON help @@ -160,7 +160,7 @@ config PCI_LAYERSCAPE_EP config PCI_HISI depends on OF && (ARM64 || COMPILE_TEST) bool "HiSilicon Hip05 and Hip06 SoCs PCIe controllers" - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PCI_HOST_COMMON help @@ -170,7 +170,7 @@ config PCI_HISI config PCIE_QCOM bool "Qualcomm PCIe controller" depends on OF && (ARCH_QCOM || COMPILE_TEST) - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select CRC8 help @@ -191,7 +191,7 @@ config PCIE_QCOM_EP config PCIE_ARMADA_8K bool "Marvell Armada-8K PCIe controller" depends on ARCH_MVEBU || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say Y here if you want to enable PCIe controller support on @@ -205,7 +205,7 @@ config PCIE_ARTPEC6 config PCIE_ARTPEC6_HOST bool "Axis ARTPEC-6 PCIe controller Host Mode" depends on MACH_ARTPEC6 || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PCIE_ARTPEC6 help @@ -226,7 +226,7 @@ config PCIE_ROCKCHIP_DW_HOST bool "Rockchip DesignWare PCIe controller" select PCIE_DW select PCIE_DW_HOST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI depends on ARCH_ROCKCHIP || COMPILE_TEST depends on OF help @@ -236,7 +236,7 @@ config PCIE_ROCKCHIP_DW_HOST config PCIE_INTEL_GW bool "Intel Gateway PCIe host controller support" depends on OF && (X86 || COMPILE_TEST) - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say 'Y' here to enable PCIe Host controller support on Intel @@ -250,7 +250,7 @@ config PCIE_KEEMBAY config PCIE_KEEMBAY_HOST bool "Intel Keem Bay PCIe controller - Host mode" depends on ARCH_KEEMBAY || COMPILE_TEST - depends on PCI && PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PCIE_KEEMBAY help @@ -262,7 +262,7 @@ config PCIE_KEEMBAY_HOST config PCIE_KEEMBAY_EP bool "Intel Keem Bay PCIe controller - Endpoint mode" depends on ARCH_KEEMBAY || COMPILE_TEST - depends on PCI && PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI depends on PCI_ENDPOINT select PCIE_DW_EP select PCIE_KEEMBAY @@ -275,7 +275,7 @@ config PCIE_KEEMBAY_EP config PCIE_KIRIN depends on OF && (ARM64 || COMPILE_TEST) tristate "HiSilicon Kirin series SoCs PCIe controllers" - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say Y here if you want PCIe controller support @@ -284,7 +284,7 @@ config PCIE_KIRIN config PCIE_HISI_STB bool "HiSilicon STB SoCs PCIe controllers" depends on ARCH_HISI || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say Y here if you want PCIe controller support on HiSilicon STB SoCs @@ -292,7 +292,7 @@ config PCIE_HISI_STB config PCI_MESON tristate "MESON PCIe controller" default m if ARCH_MESON - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say Y here if you want to enable PCI controller support on Amlogic @@ -306,7 +306,7 @@ config PCIE_TEGRA194 config PCIE_TEGRA194_HOST tristate "NVIDIA Tegra194 (and later) PCIe controller - Host Mode" depends on ARCH_TEGRA_194_SOC || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PHY_TEGRA194_P2U select PCIE_TEGRA194 @@ -336,7 +336,7 @@ config PCIE_TEGRA194_EP config PCIE_VISCONTI_HOST bool "Toshiba Visconti PCIe controllers" depends on ARCH_VISCONTI || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say Y here if you want PCIe controller support on Toshiba Visconti SoC. @@ -346,7 +346,7 @@ config PCIE_UNIPHIER bool "Socionext UniPhier PCIe host controllers" depends on ARCH_UNIPHIER || COMPILE_TEST depends on OF && HAS_IOMEM - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST help Say Y here if you want PCIe host controller support on UniPhier SoCs. @@ -365,7 +365,7 @@ config PCIE_UNIPHIER_EP config PCIE_AL bool "Amazon Annapurna Labs PCIe controller" depends on OF && (ARM64 || COMPILE_TEST) - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_DW_HOST select PCI_ECAM help @@ -377,7 +377,7 @@ config PCIE_AL config PCIE_FU740 bool "SiFive FU740 PCIe host controller" - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI depends on SOC_SIFIVE || COMPILE_TEST select PCIE_DW_HOST help diff --git a/drivers/pci/controller/mobiveil/Kconfig b/drivers/pci/controller/mobiveil/Kconfig index e4643fb94e78..1d7a07ba9ccd 100644 --- a/drivers/pci/controller/mobiveil/Kconfig +++ b/drivers/pci/controller/mobiveil/Kconfig @@ -8,14 +8,14 @@ config PCIE_MOBIVEIL config PCIE_MOBIVEIL_HOST bool - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_MOBIVEIL config PCIE_MOBIVEIL_PLAT bool "Mobiveil AXI PCIe controller" depends on ARCH_ZYNQMP || COMPILE_TEST depends on OF - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_MOBIVEIL_HOST help Say Y here if you want to enable support for the Mobiveil AXI PCIe @@ -25,7 +25,7 @@ config PCIE_MOBIVEIL_PLAT config PCIE_LAYERSCAPE_GEN4 bool "Freescale Layerscape PCIe Gen4 controller" depends on ARCH_LAYERSCAPE || COMPILE_TEST - depends on PCI_MSI_IRQ_DOMAIN + depends on PCI_MSI select PCIE_MOBIVEIL_HOST help Say Y here if you want PCIe Gen4 controller support on diff --git a/drivers/pci/msi/Makefile b/drivers/pci/msi/Makefile index 93ef7b9e404d..4e0a7e07965e 100644 --- a/drivers/pci/msi/Makefile +++ b/drivers/pci/msi/Makefile @@ -3,5 +3,5 @@ # Makefile for the PCI/MSI obj-$(CONFIG_PCI) += pcidev_msi.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_PCI_MSI_IRQ_DOMAIN) += irqdomain.o +obj-$(CONFIG_PCI_MSI) += irqdomain.o obj-$(CONFIG_PCI_MSI_ARCH_FALLBACKS) += legacy.o diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index b66fa42c4b1f..fdd7e56ddf40 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -842,7 +842,6 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus) if (!d) d = pci_host_bridge_acpi_msi_domain(bus); -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN /* * If no IRQ domain was found via the OF tree, try looking it up * directly through the fwnode_handle. @@ -854,7 +853,6 @@ static struct irq_domain *pci_host_bridge_msi_domain(struct pci_bus *bus) d = irq_find_matching_fwnode(fwnode, DOMAIN_BUS_PCI_MSI); } -#endif return d; } diff --git a/include/linux/msi.h b/include/linux/msi.h index 2dfd7b2bdfbe..9b8414523916 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -238,15 +238,6 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, } #endif -#ifdef CONFIG_PCI_MSI -struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); -void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); -#else /* CONFIG_PCI_MSI */ -static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) -{ -} -#endif /* CONFIG_PCI_MSI */ - int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index); @@ -259,12 +250,6 @@ static inline void msi_free_msi_descs(struct device *dev) msi_free_msi_descs_range(dev, 0, MSI_MAX_INDEX); } -void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); -void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); - -void pci_msi_mask_irq(struct irq_data *data); -void pci_msi_unmask_irq(struct irq_data *data); - /* * The arch hooks to setup up msi irqs. Default functions are implemented * as weak symbols so that they /can/ be overriden by architecture specific @@ -468,18 +453,26 @@ void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int vir void *platform_msi_get_host_data(struct irq_domain *domain); #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ -#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN +/* PCI specific interfaces */ +#ifdef CONFIG_PCI_MSI +struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc); +void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg); +void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg); +void pci_msi_mask_irq(struct irq_data *data); +void pci_msi_unmask_irq(struct irq_data *data); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); bool pci_dev_has_special_msi_domain(struct pci_dev *pdev); -#else +#else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { return NULL; } -#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ +static inline void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) { } +#endif /* !CONFIG_PCI_MSI */ #endif /* LINUX_MSI_H */ From 13e7accb81d6c07993385af8342238ff22b41ac8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:54:40 +0100 Subject: [PATCH 018/134] genirq: Get rid of GENERIC_MSI_IRQ_DOMAIN Adjust to reality and remove another layer of pointless Kconfig indirection. CONFIG_GENERIC_MSI_IRQ is good enough to serve all purposes. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122014.524842979@linutronix.de --- drivers/base/Makefile | 2 +- drivers/bus/fsl-mc/Kconfig | 2 +- drivers/dma/Kconfig | 2 +- drivers/dma/qcom/hidma.c | 8 ++++---- drivers/iommu/Kconfig | 2 +- drivers/irqchip/Kconfig | 6 +++--- drivers/mailbox/Kconfig | 2 +- drivers/pci/Kconfig | 1 - drivers/perf/Kconfig | 2 +- drivers/soc/ti/Kconfig | 2 +- include/asm-generic/msi.h | 4 ++-- include/linux/device.h | 8 +++----- include/linux/gpio/driver.h | 2 +- include/linux/msi.h | 8 +++----- kernel/irq/Kconfig | 7 +------ kernel/irq/msi.c | 3 --- 16 files changed, 24 insertions(+), 37 deletions(-) diff --git a/drivers/base/Makefile b/drivers/base/Makefile index 83217d243c25..3079bfe53d04 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -22,7 +22,7 @@ obj-$(CONFIG_REGMAP) += regmap/ obj-$(CONFIG_SOC_BUS) += soc.o obj-$(CONFIG_PINCTRL) += pinctrl.o obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o -obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o +obj-$(CONFIG_GENERIC_MSI_IRQ) += platform-msi.o obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o obj-$(CONFIG_GENERIC_ARCH_NUMA) += arch_numa.o obj-$(CONFIG_ACPI) += physical_location.o diff --git a/drivers/bus/fsl-mc/Kconfig b/drivers/bus/fsl-mc/Kconfig index b1fd55901c50..9492342e7d13 100644 --- a/drivers/bus/fsl-mc/Kconfig +++ b/drivers/bus/fsl-mc/Kconfig @@ -8,7 +8,7 @@ config FSL_MC_BUS bool "QorIQ DPAA2 fsl-mc bus driver" depends on OF && (ARCH_LAYERSCAPE || (COMPILE_TEST && (ARM || ARM64 || X86_LOCAL_APIC || PPC))) - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ help Driver to enable the bus infrastructure for the QorIQ DPAA2 architecture. The fsl-mc bus driver handles discovery of diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 7524b62a8870..25e111ab21f8 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -462,7 +462,7 @@ config MV_XOR_V2 select DMA_ENGINE select DMA_ENGINE_RAID select ASYNC_TX_ENABLE_CHANNEL_SWITCH - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ help Enable support for the Marvell version 2 XOR engine. diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c index 210f1a9eb441..04d1c33afc12 100644 --- a/drivers/dma/qcom/hidma.c +++ b/drivers/dma/qcom/hidma.c @@ -610,7 +610,7 @@ static irqreturn_t hidma_chirq_handler(int chirq, void *arg) return hidma_ll_inthandler(chirq, lldev); } -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ static irqreturn_t hidma_chirq_handler_msi(int chirq, void *arg) { struct hidma_lldev **lldevp = arg; @@ -671,7 +671,7 @@ static int hidma_sysfs_init(struct hidma_dev *dev) return device_create_file(dev->ddev.dev, dev->chid_attrs); } -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ static void hidma_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) { struct device *dev = msi_desc_to_dev(desc); @@ -687,7 +687,7 @@ static void hidma_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) static void hidma_free_msis(struct hidma_dev *dmadev) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ struct device *dev = dmadev->ddev.dev; int i, virq; @@ -704,7 +704,7 @@ static void hidma_free_msis(struct hidma_dev *dmadev) static int hidma_request_msi(struct hidma_dev *dmadev, struct platform_device *pdev) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ int rc, i, virq; rc = platform_msi_domain_alloc_irqs(&pdev->dev, HIDMA_MSI_INTS, diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index dc5f7a156ff5..8eaf9b72a995 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -389,7 +389,7 @@ config ARM_SMMU_V3 depends on ARM64 select IOMMU_API select IOMMU_IO_PGTABLE_LPAE - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ help Support for implementations of the ARM System MMU architecture version 3 providing translation support to a PCIe root complex. diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7ef9f5e696d3..ffea6a8ccece 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -38,7 +38,7 @@ config ARM_GIC_V3 config ARM_GIC_V3_ITS bool - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ default ARM_GIC_V3 config ARM_GIC_V3_ITS_PCI @@ -375,7 +375,7 @@ config MVEBU_ICU config MVEBU_ODMI bool - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ config MVEBU_PIC bool @@ -488,7 +488,7 @@ config IMX_MU_MSI default m if ARCH_MXC select IRQ_DOMAIN select IRQ_DOMAIN_HIERARCHY - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ help Provide a driver for the i.MX Messaging Unit block used as a CPU-to-CPU MSI controller. This requires a specially crafted DT diff --git a/drivers/mailbox/Kconfig b/drivers/mailbox/Kconfig index 05d6fae800e3..d7af896cbd7b 100644 --- a/drivers/mailbox/Kconfig +++ b/drivers/mailbox/Kconfig @@ -223,7 +223,7 @@ config BCM_FLEXRM_MBOX tristate "Broadcom FlexRM Mailbox" depends on ARM64 depends on ARCH_BCM_IPROC || COMPILE_TEST - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ default m if ARCH_BCM_IPROC help Mailbox implementation of the Broadcom FlexRM ring manager, diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig index b7f62214668a..9309f2469b41 100644 --- a/drivers/pci/Kconfig +++ b/drivers/pci/Kconfig @@ -38,7 +38,6 @@ source "drivers/pci/pcie/Kconfig" config PCI_MSI bool "Message Signaled Interrupts (MSI and MSI-X)" - select GENERIC_MSI_IRQ_DOMAIN select GENERIC_MSI_IRQ help This allows device drivers to enable MSI (Message Signaled diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 341010f20b77..692ffd56f5cc 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -93,7 +93,7 @@ config ARM_PMU_ACPI config ARM_SMMU_V3_PMU tristate "ARM SMMUv3 Performance Monitors Extension" depends on (ARM64 && ACPI) || (COMPILE_TEST && 64BIT) - depends on GENERIC_MSI_IRQ_DOMAIN + depends on GENERIC_MSI_IRQ help Provides support for the ARM SMMUv3 Performance Monitor Counter Groups (PMCG), which provide monitoring of transactions passing diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig index 7e2fb1c16af1..e9a597e4bdc8 100644 --- a/drivers/soc/ti/Kconfig +++ b/drivers/soc/ti/Kconfig @@ -98,6 +98,6 @@ endif # SOC_TI config TI_SCI_INTA_MSI_DOMAIN bool - select GENERIC_MSI_IRQ_DOMAIN + select GENERIC_MSI_IRQ help Driver to enable Interrupt Aggregator specific MSI Domain. diff --git a/include/asm-generic/msi.h b/include/asm-generic/msi.h index bf910d47e900..124c734ca5d9 100644 --- a/include/asm-generic/msi.h +++ b/include/asm-generic/msi.h @@ -4,7 +4,7 @@ #include -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ #ifndef NUM_MSI_ALLOC_SCRATCHPAD_REGS # define NUM_MSI_ALLOC_SCRATCHPAD_REGS 2 @@ -36,6 +36,6 @@ typedef struct msi_alloc_info { #define GENERIC_MSI_DOMAIN_OPS 1 -#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ +#endif /* CONFIG_GENERIC_MSI_IRQ */ #endif diff --git a/include/linux/device.h b/include/linux/device.h index 424b55df0272..c90a444be1c4 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -378,10 +378,8 @@ struct dev_links_info { * @data: Pointer to MSI device data */ struct dev_msi_info { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN - struct irq_domain *domain; -#endif #ifdef CONFIG_GENERIC_MSI_IRQ + struct irq_domain *domain; struct msi_device_data *data; #endif }; @@ -742,7 +740,7 @@ static inline void set_dev_node(struct device *dev, int node) static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ return dev->msi.domain; #else return NULL; @@ -751,7 +749,7 @@ static inline struct irq_domain *dev_get_msi_domain(const struct device *dev) static inline void dev_set_msi_domain(struct device *dev, struct irq_domain *d) { -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ dev->msi.domain = d; #endif } diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6aeea1071b1b..88ae4513abb5 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -27,7 +27,7 @@ struct gpio_chip; union gpio_irq_fwspec { struct irq_fwspec fwspec; -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ msi_alloc_info_t msiinfo; #endif }; diff --git a/include/linux/msi.h b/include/linux/msi.h index 9b8414523916..8b287148e512 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -79,9 +79,7 @@ void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); #else -static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) -{ -} +static inline void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) { } #endif typedef void (*irq_write_msi_msg_t)(struct msi_desc *desc, @@ -278,7 +276,7 @@ static inline void msi_device_destroy_sysfs(struct device *dev) { } */ bool arch_restore_msi_irqs(struct pci_dev *dev); -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN +#ifdef CONFIG_GENERIC_MSI_IRQ #include @@ -451,7 +449,7 @@ int platform_msi_device_domain_alloc(struct irq_domain *domain, unsigned int vir void platform_msi_device_domain_free(struct irq_domain *domain, unsigned int virq, unsigned int nvec); void *platform_msi_get_host_data(struct irq_domain *domain); -#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ +#endif /* CONFIG_GENERIC_MSI_IRQ */ /* PCI specific interfaces */ #ifdef CONFIG_PCI_MSI diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index db3d174c53d4..b64c44ae4c25 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -86,15 +86,10 @@ config GENERIC_IRQ_IPI depends on SMP select IRQ_DOMAIN_HIERARCHY -# Generic MSI interrupt support +# Generic MSI hierarchical interrupt domain support config GENERIC_MSI_IRQ bool - -# Generic MSI hierarchical interrupt domain support -config GENERIC_MSI_IRQ_DOMAIN - bool select IRQ_DOMAIN_HIERARCHY - select GENERIC_MSI_IRQ config IRQ_MSI_IOMMU bool diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index b46b74788087..4fde91703c9b 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -461,7 +461,6 @@ static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *d static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { } #endif /* !CONFIG_SYSFS */ -#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); static void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); @@ -1058,5 +1057,3 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain) { return (struct msi_domain_info *)domain->host_data; } - -#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */ From db537dd3bf83169d78e5617b75158f64deabcb0b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:42 +0100 Subject: [PATCH 019/134] PCI/MSI: Get rid of externs in msi.h Follow the style of Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.582175082@linutronix.de --- drivers/pci/msi/msi.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index dbeff066bedd..fc92603b33e1 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -5,12 +5,12 @@ #define msix_table_size(flags) ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) -extern int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -extern void pci_msi_teardown_msi_irqs(struct pci_dev *dev); +int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); +void pci_msi_teardown_msi_irqs(struct pci_dev *dev); #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS -extern int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); -extern void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev); +int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); +void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev); #else static inline int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) { From c93fd5266cff2afa908659c817c6aff4d5ed6283 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:43 +0100 Subject: [PATCH 020/134] PCI/MSI: Move mask and unmask helpers to msi.h The upcoming support for per device MSI interrupt domains needs to share some of the inline helpers with the MSI implementation. Move them to the header file. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.640052354@linutronix.de --- drivers/pci/msi/msi.c | 61 +---------------------------- drivers/pci/msi/msi.h | 91 ++++++++++++++++++++++++++++++++++++------- 2 files changed, 78 insertions(+), 74 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 160af9f01669..5c310df55d0d 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -16,7 +16,7 @@ static int pci_msi_enable = 1; int pci_msi_ignore_mask; -static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) +void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) { raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock; unsigned long flags; @@ -32,65 +32,6 @@ static noinline void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 s raw_spin_unlock_irqrestore(lock, flags); } -static inline void pci_msi_mask(struct msi_desc *desc, u32 mask) -{ - pci_msi_update_mask(desc, 0, mask); -} - -static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask) -{ - pci_msi_update_mask(desc, mask, 0); -} - -static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc) -{ - return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE; -} - -/* - * This internal function does not flush PCI writes to the device. All - * users must ensure that they read from the device before either assuming - * that the device state is up to date, or returning out of this file. - * It does not affect the msi_desc::msix_ctrl cache either. Use with care! - */ -static void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl) -{ - void __iomem *desc_addr = pci_msix_desc_addr(desc); - - if (desc->pci.msi_attrib.can_mask) - writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); -} - -static inline void pci_msix_mask(struct msi_desc *desc) -{ - desc->pci.msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT; - pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl); - /* Flush write to device */ - readl(desc->pci.mask_base); -} - -static inline void pci_msix_unmask(struct msi_desc *desc) -{ - desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; - pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl); -} - -static void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask) -{ - if (desc->pci.msi_attrib.is_msix) - pci_msix_mask(desc); - else - pci_msi_mask(desc, mask); -} - -static void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask) -{ - if (desc->pci.msi_attrib.is_msix) - pci_msix_unmask(desc); - else - pci_msi_unmask(desc, mask); -} - /** * pci_msi_mask_irq - Generic IRQ chip callback to mask PCI/MSI interrupts * @data: pointer to irqdata associated to that interrupt diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index fc92603b33e1..d8f62d911f08 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -8,6 +8,83 @@ int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void pci_msi_teardown_msi_irqs(struct pci_dev *dev); +/* Mask/unmask helpers */ +void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set); + +static inline void pci_msi_mask(struct msi_desc *desc, u32 mask) +{ + pci_msi_update_mask(desc, 0, mask); +} + +static inline void pci_msi_unmask(struct msi_desc *desc, u32 mask) +{ + pci_msi_update_mask(desc, mask, 0); +} + +static inline void __iomem *pci_msix_desc_addr(struct msi_desc *desc) +{ + return desc->pci.mask_base + desc->msi_index * PCI_MSIX_ENTRY_SIZE; +} + +/* + * This internal function does not flush PCI writes to the device. All + * users must ensure that they read from the device before either assuming + * that the device state is up to date, or returning out of this file. + * It does not affect the msi_desc::msix_ctrl cache either. Use with care! + */ +static inline void pci_msix_write_vector_ctrl(struct msi_desc *desc, u32 ctrl) +{ + void __iomem *desc_addr = pci_msix_desc_addr(desc); + + if (desc->pci.msi_attrib.can_mask) + writel(ctrl, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL); +} + +static inline void pci_msix_mask(struct msi_desc *desc) +{ + desc->pci.msix_ctrl |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl); + /* Flush write to device */ + readl(desc->pci.mask_base); +} + +static inline void pci_msix_unmask(struct msi_desc *desc) +{ + desc->pci.msix_ctrl &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; + pci_msix_write_vector_ctrl(desc, desc->pci.msix_ctrl); +} + +static inline void __pci_msi_mask_desc(struct msi_desc *desc, u32 mask) +{ + if (desc->pci.msi_attrib.is_msix) + pci_msix_mask(desc); + else + pci_msi_mask(desc, mask); +} + +static inline void __pci_msi_unmask_desc(struct msi_desc *desc, u32 mask) +{ + if (desc->pci.msi_attrib.is_msix) + pci_msix_unmask(desc); + else + pci_msi_unmask(desc, mask); +} + +/* + * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to + * mask all MSI interrupts by clearing the MSI enable bit does not work + * reliably as devices without an INTx disable bit will then generate a + * level IRQ which will never be cleared. + */ +static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) +{ + /* Don't shift by >= width of type */ + if (desc->pci.msi_attrib.multi_cap >= 5) + return 0xffffffff; + return (1 << (1 << desc->pci.msi_attrib.multi_cap)) - 1; +} + +/* Legacy (!IRQDOMAIN) fallbacks */ #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev); @@ -23,17 +100,3 @@ static inline void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev) WARN_ON_ONCE(1); } #endif - -/* - * PCI 2.3 does not specify mask bits for each MSI interrupt. Attempting to - * mask all MSI interrupts by clearing the MSI enable bit does not work - * reliably as devices without an INTx disable bit will then generate a - * level IRQ which will never be cleared. - */ -static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) -{ - /* Don't shift by >= width of type */ - if (desc->pci.msi_attrib.multi_cap >= 5) - return 0xffffffff; - return (1 << (1 << desc->pci.msi_attrib.multi_cap)) - 1; -} From b12d0bec385b7a58b9e83751e6cd9f04ec3b23a4 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:45 +0100 Subject: [PATCH 021/134] PCI/MSI: Move pci_disable_msi() to api.c msi.c is a maze of randomly sorted functions which makes the code unreadable. As a first step split the driver visible API and the internal implementation which also allows proper API documentation via one file. Create drivers/pci/msi/api.c to group all exported device-driver PCI/MSI APIs in one C file. Begin by moving pci_disable_msi() there and add kernel-doc for the function as appropriate. Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.696798036@linutronix.de --- drivers/pci/msi/Makefile | 3 +-- drivers/pci/msi/api.c | 37 +++++++++++++++++++++++++++++++++++++ drivers/pci/msi/msi.c | 22 +++++----------------- drivers/pci/msi/msi.h | 4 ++++ 4 files changed, 47 insertions(+), 19 deletions(-) create mode 100644 drivers/pci/msi/api.c diff --git a/drivers/pci/msi/Makefile b/drivers/pci/msi/Makefile index 4e0a7e07965e..839ff72d72a8 100644 --- a/drivers/pci/msi/Makefile +++ b/drivers/pci/msi/Makefile @@ -2,6 +2,5 @@ # # Makefile for the PCI/MSI obj-$(CONFIG_PCI) += pcidev_msi.o -obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_PCI_MSI) += irqdomain.o +obj-$(CONFIG_PCI_MSI) += api.o msi.o irqdomain.o obj-$(CONFIG_PCI_MSI_ARCH_FALLBACKS) += legacy.o diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c new file mode 100644 index 000000000000..7485942cbe5d --- /dev/null +++ b/drivers/pci/msi/api.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * PCI MSI/MSI-X — Exported APIs for device drivers + * + * Copyright (C) 2003-2004 Intel + * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) + * Copyright (C) 2016 Christoph Hellwig. + * Copyright (C) 2022 Linutronix GmbH + */ + +#include + +#include "msi.h" + +/** + * pci_disable_msi() - Disable MSI interrupt mode on device + * @dev: the PCI device to operate on + * + * Legacy device driver API to disable MSI interrupt mode on device, + * free earlier allocated interrupt vectors, and restore INTx emulation. + * The PCI device Linux IRQ (@dev->irq) is restored to its default + * pin-assertion IRQ. This is the cleanup pair of pci_enable_msi(). + * + * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API + * pair should, in general, be used instead. + */ +void pci_disable_msi(struct pci_dev *dev) +{ + if (!pci_msi_enabled() || !dev || !dev->msi_enabled) + return; + + msi_lock_descs(&dev->dev); + pci_msi_shutdown(dev); + pci_free_msi_irqs(dev); + msi_unlock_descs(&dev->dev); +} +EXPORT_SYMBOL(pci_disable_msi); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 5c310df55d0d..4a1300b74518 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -163,7 +163,7 @@ void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) } EXPORT_SYMBOL_GPL(pci_write_msi_msg); -static void free_msi_irqs(struct pci_dev *dev) +void pci_free_msi_irqs(struct pci_dev *dev) { pci_msi_teardown_msi_irqs(dev); @@ -413,7 +413,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, err: pci_msi_unmask(entry, msi_multi_mask(entry)); - free_msi_irqs(dev); + pci_free_msi_irqs(dev); fail: dev->msi_enabled = 0; unlock: @@ -531,7 +531,7 @@ static int msix_setup_interrupts(struct pci_dev *dev, void __iomem *base, goto out_unlock; out_free: - free_msi_irqs(dev); + pci_free_msi_irqs(dev); out_unlock: msi_unlock_descs(&dev->dev); kfree(masks); @@ -680,7 +680,7 @@ int pci_msi_vec_count(struct pci_dev *dev) } EXPORT_SYMBOL(pci_msi_vec_count); -static void pci_msi_shutdown(struct pci_dev *dev) +void pci_msi_shutdown(struct pci_dev *dev) { struct msi_desc *desc; @@ -701,18 +701,6 @@ static void pci_msi_shutdown(struct pci_dev *dev) pcibios_alloc_irq(dev); } -void pci_disable_msi(struct pci_dev *dev) -{ - if (!pci_msi_enable || !dev || !dev->msi_enabled) - return; - - msi_lock_descs(&dev->dev); - pci_msi_shutdown(dev); - free_msi_irqs(dev); - msi_unlock_descs(&dev->dev); -} -EXPORT_SYMBOL(pci_disable_msi); - /** * pci_msix_vec_count - return the number of device's MSI-X table entries * @dev: pointer to the pci_dev data structure of MSI-X device function @@ -797,7 +785,7 @@ void pci_disable_msix(struct pci_dev *dev) msi_lock_descs(&dev->dev); pci_msix_shutdown(dev); - free_msi_irqs(dev); + pci_free_msi_irqs(dev); msi_unlock_descs(&dev->dev); } EXPORT_SYMBOL(pci_disable_msix); diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index d8f62d911f08..634879277349 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -84,6 +84,10 @@ static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) return (1 << (1 << desc->pci.msi_attrib.multi_cap)) - 1; } +/* MSI internal functions invoked from the public APIs */ +void pci_msi_shutdown(struct pci_dev *dev); +void pci_free_msi_irqs(struct pci_dev *dev); + /* Legacy (!IRQDOMAIN) fallbacks */ #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); From bbda3407982211123caadbfdee23594647bd4516 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:46 +0100 Subject: [PATCH 022/134] PCI/MSI: Move pci_enable_msi() API to api.c To disentangle the maze in msi.c all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_enable_msi() and make its kernel-doc comprehensive. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.755178149@linutronix.de --- drivers/pci/msi/api.c | 23 +++++++++++++++++++++++ drivers/pci/msi/msi.c | 14 ++------------ drivers/pci/msi/msi.h | 1 + 3 files changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 7485942cbe5d..63d7f8f6a284 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -12,6 +12,29 @@ #include "msi.h" +/** + * pci_enable_msi() - Enable MSI interrupt mode on device + * @dev: the PCI device to operate on + * + * Legacy device driver API to enable MSI interrupts mode on device and + * allocate a single interrupt vector. On success, the allocated vector + * Linux IRQ will be saved at @dev->irq. The driver must invoke + * pci_disable_msi() on cleanup. + * + * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API + * pair should, in general, be used instead. + * + * Return: 0 on success, errno otherwise + */ +int pci_enable_msi(struct pci_dev *dev) +{ + int rc = __pci_enable_msi_range(dev, 1, 1, NULL); + if (rc < 0) + return rc; + return 0; +} +EXPORT_SYMBOL(pci_enable_msi); + /** * pci_disable_msi() - Disable MSI interrupt mode on device * @dev: the PCI device to operate on diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 4a1300b74518..98f07ad9af62 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -790,8 +790,8 @@ void pci_disable_msix(struct pci_dev *dev) } EXPORT_SYMBOL(pci_disable_msix); -static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, - struct irq_affinity *affd) +int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, + struct irq_affinity *affd) { int nvec; int rc; @@ -844,16 +844,6 @@ static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, } } -/* deprecated, don't use */ -int pci_enable_msi(struct pci_dev *dev) -{ - int rc = __pci_enable_msi_range(dev, 1, 1, NULL); - if (rc < 0) - return rc; - return 0; -} -EXPORT_SYMBOL(pci_enable_msi); - static int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec, struct irq_affinity *affd, diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index 634879277349..00bb98d5bb0e 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -87,6 +87,7 @@ static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) /* MSI internal functions invoked from the public APIs */ void pci_msi_shutdown(struct pci_dev *dev); void pci_free_msi_irqs(struct pci_dev *dev); +int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd); /* Legacy (!IRQDOMAIN) fallbacks */ #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS From be7496c1ef47e1ba8c4b389ee23178fcf066cc4e Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:48 +0100 Subject: [PATCH 023/134] PCI/MSI: Move pci_enable_msix_range() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_enable_msix_range() and make its kernel-doc comprehensive. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.813792885@linutronix.de --- drivers/pci/msi/api.c | 32 ++++++++++++++++++++++++++++++++ drivers/pci/msi/msi.c | 30 ++++-------------------------- drivers/pci/msi/msi.h | 3 +++ 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 63d7f8f6a284..d48050555d55 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -58,3 +58,35 @@ void pci_disable_msi(struct pci_dev *dev) msi_unlock_descs(&dev->dev); } EXPORT_SYMBOL(pci_disable_msi); + +/** + * pci_enable_msix_range() - Enable MSI-X interrupt mode on device + * @dev: the PCI device to operate on + * @entries: input/output parameter, array of MSI-X configuration entries + * @minvec: minimum required number of MSI-X vectors + * @maxvec: maximum desired number of MSI-X vectors + * + * Legacy device driver API to enable MSI-X interrupt mode on device and + * configure its MSI-X capability structure as appropriate. The passed + * @entries array must have each of its members "entry" field set to a + * desired (valid) MSI-X vector number, where the range of valid MSI-X + * vector numbers can be queried through pci_msix_vec_count(). If + * successful, the driver must invoke pci_disable_msix() on cleanup. + * + * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API + * pair should, in general, be used instead. + * + * Return: number of MSI-X vectors allocated (which might be smaller + * than @maxvecs), where Linux IRQ numbers for such allocated vectors + * are saved back in the @entries array elements' "vector" field. Return + * -ENOSPC if less than @minvecs interrupt vectors are available. + * Return -EINVAL if one of the passed @entries members "entry" field + * was invalid or a duplicate, or if plain MSI interrupts mode was + * earlier enabled on device. Return other errnos otherwise. + */ +int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, + int minvec, int maxvec) +{ + return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); +} +EXPORT_SYMBOL(pci_enable_msix_range); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 98f07ad9af62..6700ef1c734e 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -844,10 +844,10 @@ int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, } } -static int __pci_enable_msix_range(struct pci_dev *dev, - struct msix_entry *entries, int minvec, - int maxvec, struct irq_affinity *affd, - int flags) +int __pci_enable_msix_range(struct pci_dev *dev, + struct msix_entry *entries, int minvec, + int maxvec, struct irq_affinity *affd, + int flags) { int rc, nvec = maxvec; @@ -886,28 +886,6 @@ static int __pci_enable_msix_range(struct pci_dev *dev, } } -/** - * pci_enable_msix_range - configure device's MSI-X capability structure - * @dev: pointer to the pci_dev data structure of MSI-X device function - * @entries: pointer to an array of MSI-X entries - * @minvec: minimum number of MSI-X IRQs requested - * @maxvec: maximum number of MSI-X IRQs requested - * - * Setup the MSI-X capability structure of device function with a maximum - * possible number of interrupts in the range between @minvec and @maxvec - * upon its software driver call to request for MSI-X mode enabled on its - * hardware device function. It returns a negative errno if an error occurs. - * If it succeeds, it returns the actual number of interrupts allocated and - * indicates the successful configuration of MSI-X capability structure - * with new allocated MSI-X interrupts. - **/ -int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, - int minvec, int maxvec) -{ - return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); -} -EXPORT_SYMBOL(pci_enable_msix_range); - /** * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device * @dev: PCI device to operate on diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index 00bb98d5bb0e..8c4a5289432d 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -88,8 +88,11 @@ static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) void pci_msi_shutdown(struct pci_dev *dev); void pci_free_msi_irqs(struct pci_dev *dev); int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd); +int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, + int maxvec, struct irq_affinity *affd, int flags); /* Legacy (!IRQDOMAIN) fallbacks */ + #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS int pci_msi_legacy_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); void pci_msi_legacy_teardown_msi_irqs(struct pci_dev *dev); From 5c0997dc33ac24b7dc0124c2fc1caa37ae39461a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:50 +0100 Subject: [PATCH 024/134] PCI/MSI: Move pci_alloc_irq_vectors() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Make pci_alloc_irq_vectors() a real function instead of wrapper and add proper kernel doc to it. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.870888193@linutronix.de --- drivers/pci/msi/api.c | 33 +++++++++++++++++++++++++++++++++ include/linux/pci.h | 15 +++++++++++---- 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index d48050555d55..1714905943fb 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -90,3 +90,36 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0); } EXPORT_SYMBOL(pci_enable_msix_range); + +/** + * pci_alloc_irq_vectors() - Allocate multiple device interrupt vectors + * @dev: the PCI device to operate on + * @min_vecs: minimum required number of vectors (must be >= 1) + * @max_vecs: maximum desired number of vectors + * @flags: One or more of: + * %PCI_IRQ_MSIX Allow trying MSI-X vector allocations + * %PCI_IRQ_MSI Allow trying MSI vector allocations + * %PCI_IRQ_LEGACY Allow trying legacy INTx interrupts, if + * and only if @min_vecs == 1 + * %PCI_IRQ_AFFINITY Auto-manage IRQs affinity by spreading + * the vectors around available CPUs + * + * Allocate up to @max_vecs interrupt vectors on device. MSI-X irq + * vector allocation has a higher precedence over plain MSI, which has a + * higher precedence over legacy INTx emulation. + * + * Upon a successful allocation, the caller should use pci_irq_vector() + * to get the Linux IRQ number to be passed to request_threaded_irq(). + * The driver must call pci_free_irq_vectors() on cleanup. + * + * Return: number of allocated vectors (which might be smaller than + * @max_vecs), -ENOSPC if less than @min_vecs interrupt vectors are + * available, other errnos otherwise. + */ +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags) +{ + return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, + flags, NULL); +} +EXPORT_SYMBOL(pci_alloc_irq_vectors); diff --git a/include/linux/pci.h b/include/linux/pci.h index 2bda4a4e47e8..243e48f79e82 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1553,6 +1553,8 @@ static inline int pci_enable_msix_exact(struct pci_dev *dev, return rc; return 0; } +int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags); int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, struct irq_affinity *affd); @@ -1586,6 +1588,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, return 1; return -ENOSPC; } +static inline int +pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags) +{ + return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, + flags, NULL); +} static inline void pci_free_irq_vectors(struct pci_dev *dev) { @@ -1898,15 +1907,13 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, { return -ENOSPC; } -#endif /* CONFIG_PCI */ - static inline int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags) { - return pci_alloc_irq_vectors_affinity(dev, min_vecs, max_vecs, flags, - NULL); + return -ENOSPC; } +#endif /* CONFIG_PCI */ /* Include architecture-dependent settings and functions */ From beddb5efb43ee5b1c048e49225f75b03f8d36aac Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:51 +0100 Subject: [PATCH 025/134] PCI/MSI: Move pci_alloc_irq_vectors_affinity() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_alloc_irq_vectors_affinity() and let its kernel-doc reference pci_alloc_irq_vectors() documentation added in parent commit. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.927531290@linutronix.de --- drivers/pci/msi/api.c | 59 +++++++++++++++++++++++++++++++++++++++ drivers/pci/msi/msi.c | 65 ------------------------------------------- 2 files changed, 59 insertions(+), 65 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 1714905943fb..8546749afa6e 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -123,3 +123,62 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, flags, NULL); } EXPORT_SYMBOL(pci_alloc_irq_vectors); + +/** + * pci_alloc_irq_vectors_affinity() - Allocate multiple device interrupt + * vectors with affinity requirements + * @dev: the PCI device to operate on + * @min_vecs: minimum required number of vectors (must be >= 1) + * @max_vecs: maximum desired number of vectors + * @flags: allocation flags, as in pci_alloc_irq_vectors() + * @affd: affinity requirements (can be %NULL). + * + * Same as pci_alloc_irq_vectors(), but with the extra @affd parameter. + * Check that function docs, and &struct irq_affinity, for more details. + */ +int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, + unsigned int max_vecs, unsigned int flags, + struct irq_affinity *affd) +{ + struct irq_affinity msi_default_affd = {0}; + int nvecs = -ENOSPC; + + if (flags & PCI_IRQ_AFFINITY) { + if (!affd) + affd = &msi_default_affd; + } else { + if (WARN_ON(affd)) + affd = NULL; + } + + if (flags & PCI_IRQ_MSIX) { + nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, + affd, flags); + if (nvecs > 0) + return nvecs; + } + + if (flags & PCI_IRQ_MSI) { + nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd); + if (nvecs > 0) + return nvecs; + } + + /* use legacy IRQ if allowed */ + if (flags & PCI_IRQ_LEGACY) { + if (min_vecs == 1 && dev->irq) { + /* + * Invoke the affinity spreading logic to ensure that + * the device driver can adjust queue configuration + * for the single interrupt case. + */ + if (affd) + irq_create_affinity_masks(1, affd); + pci_intx(dev, 1); + return 1; + } + } + + return nvecs; +} +EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6700ef1c734e..a028774f438d 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -886,71 +886,6 @@ int __pci_enable_msix_range(struct pci_dev *dev, } } -/** - * pci_alloc_irq_vectors_affinity - allocate multiple IRQs for a device - * @dev: PCI device to operate on - * @min_vecs: minimum number of vectors required (must be >= 1) - * @max_vecs: maximum (desired) number of vectors - * @flags: flags or quirks for the allocation - * @affd: optional description of the affinity requirements - * - * Allocate up to @max_vecs interrupt vectors for @dev, using MSI-X or MSI - * vectors if available, and fall back to a single legacy vector - * if neither is available. Return the number of vectors allocated, - * (which might be smaller than @max_vecs) if successful, or a negative - * error code on error. If less than @min_vecs interrupt vectors are - * available for @dev the function will fail with -ENOSPC. - * - * To get the Linux IRQ number used for a vector that can be passed to - * request_irq() use the pci_irq_vector() helper. - */ -int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, - unsigned int max_vecs, unsigned int flags, - struct irq_affinity *affd) -{ - struct irq_affinity msi_default_affd = {0}; - int nvecs = -ENOSPC; - - if (flags & PCI_IRQ_AFFINITY) { - if (!affd) - affd = &msi_default_affd; - } else { - if (WARN_ON(affd)) - affd = NULL; - } - - if (flags & PCI_IRQ_MSIX) { - nvecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs, - affd, flags); - if (nvecs > 0) - return nvecs; - } - - if (flags & PCI_IRQ_MSI) { - nvecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, affd); - if (nvecs > 0) - return nvecs; - } - - /* use legacy IRQ if allowed */ - if (flags & PCI_IRQ_LEGACY) { - if (min_vecs == 1 && dev->irq) { - /* - * Invoke the affinity spreading logic to ensure that - * the device driver can adjust queue configuration - * for the single interrupt case. - */ - if (affd) - irq_create_affinity_masks(1, affd); - pci_intx(dev, 1); - return 1; - } - } - - return nvecs; -} -EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); - /** * pci_free_irq_vectors - free previously allocated IRQs for a device * @dev: PCI device to operate on From 017239c8db209307d2acfc0f9a3b104c39f911b3 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:53 +0100 Subject: [PATCH 026/134] PCI/MSI: Move pci_irq_vector() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_irq_vector() and let its kernel-doc match the rest of the file. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122014.984490384@linutronix.de --- drivers/pci/msi/api.c | 23 +++++++++++++++++++++++ drivers/pci/msi/msi.c | 24 ------------------------ 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 8546749afa6e..0f1ec87e3f9f 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -182,3 +182,26 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, return nvecs; } EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); + +/** + * pci_irq_vector() - Get Linux IRQ number of a device interrupt vector + * @dev: the PCI device to operate on + * @nr: device-relative interrupt vector index (0-based); has different + * meanings, depending on interrupt mode + * MSI-X the index in the MSI-X vector table + * MSI the index of the enabled MSI vectors + * INTx must be 0 + * + * Return: the Linux IRQ number, or -EINVAL if @nr is out of range + */ +int pci_irq_vector(struct pci_dev *dev, unsigned int nr) +{ + unsigned int irq; + + if (!dev->msi_enabled && !dev->msix_enabled) + return !nr ? dev->irq : -EINVAL; + + irq = msi_get_virq(&dev->dev, nr); + return irq ? irq : -EINVAL; +} +EXPORT_SYMBOL(pci_irq_vector); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index a028774f438d..38ad2fe4b85c 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -899,30 +899,6 @@ void pci_free_irq_vectors(struct pci_dev *dev) } EXPORT_SYMBOL(pci_free_irq_vectors); -/** - * pci_irq_vector - return Linux IRQ number of a device vector - * @dev: PCI device to operate on - * @nr: Interrupt vector index (0-based) - * - * @nr has the following meanings depending on the interrupt mode: - * MSI-X: The index in the MSI-X vector table - * MSI: The index of the enabled MSI vectors - * INTx: Must be 0 - * - * Return: The Linux interrupt number or -EINVAl if @nr is out of range. - */ -int pci_irq_vector(struct pci_dev *dev, unsigned int nr) -{ - unsigned int irq; - - if (!dev->msi_enabled && !dev->msix_enabled) - return !nr ? dev->irq : -EINVAL; - - irq = msi_get_virq(&dev->dev, nr); - return irq ? irq : -EINVAL; -} -EXPORT_SYMBOL(pci_irq_vector); - /** * pci_irq_get_affinity - return the affinity of a particular MSI vector * @dev: PCI device to operate on From 059f778d66ba88e1acad89a46cfb35eb8703feef Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:54 +0100 Subject: [PATCH 027/134] PCI/MSI: Move pci_free_irq_vectors() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_free_irq_vectors() and make its kernel-doc comprehensive. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.042870570@linutronix.de --- drivers/pci/msi/api.c | 15 +++++++++++++++ drivers/pci/msi/msi.c | 13 ------------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 0f1ec87e3f9f..2ff2a9ccfc47 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -205,3 +205,18 @@ int pci_irq_vector(struct pci_dev *dev, unsigned int nr) return irq ? irq : -EINVAL; } EXPORT_SYMBOL(pci_irq_vector); + +/** + * pci_free_irq_vectors() - Free previously allocated IRQs for a device + * @dev: the PCI device to operate on + * + * Undo the interrupt vector allocations and possible device MSI/MSI-X + * enablement earlier done through pci_alloc_irq_vectors_affinity() or + * pci_alloc_irq_vectors(). + */ +void pci_free_irq_vectors(struct pci_dev *dev) +{ + pci_disable_msix(dev); + pci_disable_msi(dev); +} +EXPORT_SYMBOL(pci_free_irq_vectors); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 38ad2fe4b85c..ed8caf5ac99f 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -886,19 +886,6 @@ int __pci_enable_msix_range(struct pci_dev *dev, } } -/** - * pci_free_irq_vectors - free previously allocated IRQs for a device - * @dev: PCI device to operate on - * - * Undoes the allocations and enabling in pci_alloc_irq_vectors(). - */ -void pci_free_irq_vectors(struct pci_dev *dev) -{ - pci_disable_msix(dev); - pci_disable_msi(dev); -} -EXPORT_SYMBOL(pci_free_irq_vectors); - /** * pci_irq_get_affinity - return the affinity of a particular MSI vector * @dev: PCI device to operate on From 7b50f62776672e7b328455eddc90ceb01b64ac3e Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:56 +0100 Subject: [PATCH 028/134] PCI/MSI: Move pci_msix_vec_count() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_msix_vec_count() and make its kernel-doc comprehensive. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.099461602@linutronix.de --- drivers/pci/msi/api.c | 20 ++++++++++++++++++++ drivers/pci/msi/msi.c | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 2ff2a9ccfc47..83ea38ffa116 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -59,6 +59,26 @@ void pci_disable_msi(struct pci_dev *dev) } EXPORT_SYMBOL(pci_disable_msi); +/** + * pci_msix_vec_count() - Get number of MSI-X interrupt vectors on device + * @dev: the PCI device to operate on + * + * Return: number of MSI-X interrupt vectors available on this device + * (i.e., the device's MSI-X capability structure "table size"), -EINVAL + * if the device is not MSI-X capable, other errnos otherwise. + */ +int pci_msix_vec_count(struct pci_dev *dev) +{ + u16 control; + + if (!dev->msix_cap) + return -EINVAL; + + pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); + return msix_table_size(control); +} +EXPORT_SYMBOL(pci_msix_vec_count); + /** * pci_enable_msix_range() - Enable MSI-X interrupt mode on device * @dev: the PCI device to operate on diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index ed8caf5ac99f..1226d66da992 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -701,26 +701,6 @@ void pci_msi_shutdown(struct pci_dev *dev) pcibios_alloc_irq(dev); } -/** - * pci_msix_vec_count - return the number of device's MSI-X table entries - * @dev: pointer to the pci_dev data structure of MSI-X device function - * This function returns the number of device's MSI-X table entries and - * therefore the number of MSI-X vectors device is capable of sending. - * It returns a negative errno if the device is not capable of sending MSI-X - * interrupts. - **/ -int pci_msix_vec_count(struct pci_dev *dev) -{ - u16 control; - - if (!dev->msix_cap) - return -EINVAL; - - pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); - return msix_table_size(control); -} -EXPORT_SYMBOL(pci_msix_vec_count); - static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd, int flags) { From 18e1926b8c8b7c2249c24057d5f836c578e29f08 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:58 +0100 Subject: [PATCH 029/134] PCI/MSI: Move pci_disable_msix() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_disable_msix() and make its kernel-doc comprehensive. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.156785224@linutronix.de --- drivers/pci/msi/api.c | 24 ++++++++++++++++++++++++ drivers/pci/msi/msi.c | 14 +------------- drivers/pci/msi/msi.h | 1 + 3 files changed, 26 insertions(+), 13 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 83ea38ffa116..20a580bea48f 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -111,6 +111,30 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, } EXPORT_SYMBOL(pci_enable_msix_range); +/** + * pci_disable_msix() - Disable MSI-X interrupt mode on device + * @dev: the PCI device to operate on + * + * Legacy device driver API to disable MSI-X interrupt mode on device, + * free earlier-allocated interrupt vectors, and restore INTx. + * The PCI device Linux IRQ (@dev->irq) is restored to its default pin + * assertion IRQ. This is the cleanup pair of pci_enable_msix_range(). + * + * NOTE: The newer pci_alloc_irq_vectors() / pci_free_irq_vectors() API + * pair should, in general, be used instead. + */ +void pci_disable_msix(struct pci_dev *dev) +{ + if (!pci_msi_enabled() || !dev || !dev->msix_enabled) + return; + + msi_lock_descs(&dev->dev); + pci_msix_shutdown(dev); + pci_free_msi_irqs(dev); + msi_unlock_descs(&dev->dev); +} +EXPORT_SYMBOL(pci_disable_msix); + /** * pci_alloc_irq_vectors() - Allocate multiple device interrupt vectors * @dev: the PCI device to operate on diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 1226d66da992..6fa90d07d2e4 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -736,7 +736,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, return msix_capability_init(dev, entries, nvec, affd); } -static void pci_msix_shutdown(struct pci_dev *dev) +void pci_msix_shutdown(struct pci_dev *dev) { struct msi_desc *desc; @@ -758,18 +758,6 @@ static void pci_msix_shutdown(struct pci_dev *dev) pcibios_alloc_irq(dev); } -void pci_disable_msix(struct pci_dev *dev) -{ - if (!pci_msi_enable || !dev || !dev->msix_enabled) - return; - - msi_lock_descs(&dev->dev); - pci_msix_shutdown(dev); - pci_free_msi_irqs(dev); - msi_unlock_descs(&dev->dev); -} -EXPORT_SYMBOL(pci_disable_msix); - int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd) { diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index 8c4a5289432d..77e2587f7e4f 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -86,6 +86,7 @@ static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) /* MSI internal functions invoked from the public APIs */ void pci_msi_shutdown(struct pci_dev *dev); +void pci_msix_shutdown(struct pci_dev *dev); void pci_free_msi_irqs(struct pci_dev *dev); int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd); int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, From be37b8428b7b7740bbbc29c17cfa2ee42b9e2d8b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:54:59 +0100 Subject: [PATCH 030/134] PCI/MSI: Move pci_irq_get_affinity() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_irq_get_affinity() and let its kernel-doc match rest of the file. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.214792769@linutronix.de --- drivers/pci/msi/api.c | 43 +++++++++++++++++++++++++++++++++++++++++++ drivers/pci/msi/msi.c | 38 -------------------------------------- 2 files changed, 43 insertions(+), 38 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 20a580bea48f..93ddc5570693 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -9,6 +9,7 @@ */ #include +#include #include "msi.h" @@ -250,6 +251,48 @@ int pci_irq_vector(struct pci_dev *dev, unsigned int nr) } EXPORT_SYMBOL(pci_irq_vector); +/** + * pci_irq_get_affinity() - Get a device interrupt vector affinity + * @dev: the PCI device to operate on + * @nr: device-relative interrupt vector index (0-based); has different + * meanings, depending on interrupt mode + * MSI-X the index in the MSI-X vector table + * MSI the index of the enabled MSI vectors + * INTx must be 0 + * + * Return: MSI/MSI-X vector affinity, NULL if @nr is out of range or if + * the MSI(-X) vector was allocated without explicit affinity + * requirements (e.g., by pci_enable_msi(), pci_enable_msix_range(), or + * pci_alloc_irq_vectors() without the %PCI_IRQ_AFFINITY flag). Return a + * generic set of CPU IDs representing all possible CPUs available + * during system boot if the device is in legacy INTx mode. + */ +const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) +{ + int idx, irq = pci_irq_vector(dev, nr); + struct msi_desc *desc; + + if (WARN_ON_ONCE(irq <= 0)) + return NULL; + + desc = irq_get_msi_desc(irq); + /* Non-MSI does not have the information handy */ + if (!desc) + return cpu_possible_mask; + + /* MSI[X] interrupts can be allocated without affinity descriptor */ + if (!desc->affinity) + return NULL; + + /* + * MSI has a mask array in the descriptor. + * MSI-X has a single mask. + */ + idx = dev->msi_enabled ? nr : 0; + return &desc->affinity[idx].mask; +} +EXPORT_SYMBOL(pci_irq_get_affinity); + /** * pci_free_irq_vectors() - Free previously allocated IRQs for a device * @dev: the PCI device to operate on diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 6fa90d07d2e4..d78646d1c116 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -854,44 +854,6 @@ int __pci_enable_msix_range(struct pci_dev *dev, } } -/** - * pci_irq_get_affinity - return the affinity of a particular MSI vector - * @dev: PCI device to operate on - * @nr: device-relative interrupt vector index (0-based). - * - * @nr has the following meanings depending on the interrupt mode: - * MSI-X: The index in the MSI-X vector table - * MSI: The index of the enabled MSI vectors - * INTx: Must be 0 - * - * Return: A cpumask pointer or NULL if @nr is out of range - */ -const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) -{ - int idx, irq = pci_irq_vector(dev, nr); - struct msi_desc *desc; - - if (WARN_ON_ONCE(irq <= 0)) - return NULL; - - desc = irq_get_msi_desc(irq); - /* Non-MSI does not have the information handy */ - if (!desc) - return cpu_possible_mask; - - /* MSI[X] interrupts can be allocated without affinity descriptor */ - if (!desc->affinity) - return NULL; - - /* - * MSI has a mask array in the descriptor. - * MSI-X has a single mask. - */ - idx = dev->msi_enabled ? nr : 0; - return &desc->affinity[idx].mask; -} -EXPORT_SYMBOL(pci_irq_get_affinity); - struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) { return to_pci_dev(desc->dev); From 897a0b6aa8c7ee0015e8d1f781e8e61069aafe16 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:55:01 +0100 Subject: [PATCH 031/134] PCI/MSI: Move pci_msi_enabled() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_msi_enabled() and make its kernel-doc comprehensive. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.271447896@linutronix.de --- drivers/pci/msi/api.c | 12 ++++++++++++ drivers/pci/msi/msi.c | 14 +------------- drivers/pci/msi/msi.h | 3 +++ 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 93ddc5570693..49ae3a3f8282 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -307,3 +307,15 @@ void pci_free_irq_vectors(struct pci_dev *dev) pci_disable_msi(dev); } EXPORT_SYMBOL(pci_free_irq_vectors); + +/** + * pci_msi_enabled() - Are MSI(-X) interrupts enabled system-wide? + * + * Return: true if MSI has not been globally disabled through ACPI FADT, + * PCI bridge quirks, or the "pci=nomsi" kernel command-line option. + */ +int pci_msi_enabled(void) +{ + return pci_msi_enable; +} +EXPORT_SYMBOL(pci_msi_enabled); diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index d78646d1c116..59c33bc7fe81 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -13,7 +13,7 @@ #include "../pci.h" #include "msi.h" -static int pci_msi_enable = 1; +int pci_msi_enable = 1; int pci_msi_ignore_mask; void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) @@ -864,15 +864,3 @@ void pci_no_msi(void) { pci_msi_enable = 0; } - -/** - * pci_msi_enabled - is MSI enabled? - * - * Returns true if MSI has not been disabled by the command-line option - * pci=nomsi. - **/ -int pci_msi_enabled(void) -{ - return pci_msi_enable; -} -EXPORT_SYMBOL(pci_msi_enabled); diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index 77e2587f7e4f..f3f4ede53171 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -84,6 +84,9 @@ static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) return (1 << (1 << desc->pci.msi_attrib.multi_cap)) - 1; } +/* Subsystem variables */ +extern int pci_msi_enable; + /* MSI internal functions invoked from the public APIs */ void pci_msi_shutdown(struct pci_dev *dev); void pci_msix_shutdown(struct pci_dev *dev); From 57127da98bc87688324cc2d29927b340d7754701 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:55:03 +0100 Subject: [PATCH 032/134] PCI/MSI: Move pci_msi_restore_state() to api.c To disentangle the maze in msi.c, all exported device-driver MSI APIs are now to be grouped in one file, api.c. Move pci_msi_enabled() and add kernel-doc for the function. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.331584998@linutronix.de --- drivers/pci/msi/api.c | 15 +++++++++++++++ drivers/pci/msi/msi.c | 11 ++--------- drivers/pci/msi/msi.h | 2 ++ 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 49ae3a3f8282..6c3ad4863850 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -308,6 +308,21 @@ void pci_free_irq_vectors(struct pci_dev *dev) } EXPORT_SYMBOL(pci_free_irq_vectors); +/** + * pci_restore_msi_state() - Restore cached MSI(-X) state on device + * @dev: the PCI device to operate on + * + * Write the Linux-cached MSI(-X) state back on device. This is + * typically useful upon system resume, or after an error-recovery PCI + * adapter reset. + */ +void pci_restore_msi_state(struct pci_dev *dev) +{ + __pci_restore_msi_state(dev); + __pci_restore_msix_state(dev); +} +EXPORT_SYMBOL_GPL(pci_restore_msi_state); + /** * pci_msi_enabled() - Are MSI(-X) interrupts enabled system-wide? * diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 59c33bc7fe81..a5d168c823ff 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -199,7 +199,7 @@ bool __weak arch_restore_msi_irqs(struct pci_dev *dev) return true; } -static void __pci_restore_msi_state(struct pci_dev *dev) +void __pci_restore_msi_state(struct pci_dev *dev) { struct msi_desc *entry; u16 control; @@ -231,7 +231,7 @@ static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); } -static void __pci_restore_msix_state(struct pci_dev *dev) +void __pci_restore_msix_state(struct pci_dev *dev) { struct msi_desc *entry; bool write_msg; @@ -257,13 +257,6 @@ static void __pci_restore_msix_state(struct pci_dev *dev) pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); } -void pci_restore_msi_state(struct pci_dev *dev) -{ - __pci_restore_msi_state(dev); - __pci_restore_msix_state(dev); -} -EXPORT_SYMBOL_GPL(pci_restore_msi_state); - static void pcim_msi_release(void *pcidev) { struct pci_dev *dev = pcidev; diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index f3f4ede53171..8170ef2c5ad0 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -94,6 +94,8 @@ void pci_free_msi_irqs(struct pci_dev *dev); int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, struct irq_affinity *affd); int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec, struct irq_affinity *affd, int flags); +void __pci_restore_msi_state(struct pci_dev *dev); +void __pci_restore_msix_state(struct pci_dev *dev); /* Legacy (!IRQDOMAIN) fallbacks */ From 88614075a952b1af50f5fb10c958e311f6b4f68a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:55:04 +0100 Subject: [PATCH 033/134] Documentation: PCI: Add reference to PCI/MSI device driver APIs All exported device-driver MSI APIs are now grouped in one place at drivers/pci/msi/api.c with comprehensive kernel-docs added. Reference these kernel-docs in the official PCI/MSI howto. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.397739421@linutronix.de --- Documentation/PCI/msi-howto.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/Documentation/PCI/msi-howto.rst b/Documentation/PCI/msi-howto.rst index aa2046af69f7..8ae461e97c54 100644 --- a/Documentation/PCI/msi-howto.rst +++ b/Documentation/PCI/msi-howto.rst @@ -285,3 +285,13 @@ to bridges between the PCI root and the device, MSIs are disabled. It is also worth checking the device driver to see whether it supports MSIs. For example, it may contain calls to pci_alloc_irq_vectors() with the PCI_IRQ_MSI or PCI_IRQ_MSIX flags. + + +List of device drivers MSI(-X) APIs +=================================== + +The PCI/MSI subystem has a dedicated C file for its exported device driver +APIs — `drivers/pci/msi/api.c`. The following functions are exported: + +.. kernel-doc:: drivers/pci/msi/api.c + :export: From 12910ffd189e23d8996e0d19b723518accf57b76 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Nov 2022 14:55:06 +0100 Subject: [PATCH 034/134] PCI/MSI: Reorder functions in msi.c There is no way to navigate msi.c without banging the head against the wall every now and then because MSI and MSI-X specific functions are intermingled and the code flow is completely non-obvious. Reorder everthing so common helpers, MSI and MSI-X specific functions are grouped together. Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.459089736@linutronix.de --- drivers/pci/msi/msi.c | 581 +++++++++++++++++++++--------------------- 1 file changed, 297 insertions(+), 284 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index a5d168c823ff..380e65155b93 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -16,6 +16,97 @@ int pci_msi_enable = 1; int pci_msi_ignore_mask; +/** + * pci_msi_supported - check whether MSI may be enabled on a device + * @dev: pointer to the pci_dev data structure of MSI device function + * @nvec: how many MSIs have been requested? + * + * Look at global flags, the device itself, and its parent buses + * to determine if MSI/-X are supported for the device. If MSI/-X is + * supported return 1, else return 0. + **/ +static int pci_msi_supported(struct pci_dev *dev, int nvec) +{ + struct pci_bus *bus; + + /* MSI must be globally enabled and supported by the device */ + if (!pci_msi_enable) + return 0; + + if (!dev || dev->no_msi) + return 0; + + /* + * You can't ask to have 0 or less MSIs configured. + * a) it's stupid .. + * b) the list manipulation code assumes nvec >= 1. + */ + if (nvec < 1) + return 0; + + /* + * Any bridge which does NOT route MSI transactions from its + * secondary bus to its primary bus must set NO_MSI flag on + * the secondary pci_bus. + * + * The NO_MSI flag can either be set directly by: + * - arch-specific PCI host bus controller drivers (deprecated) + * - quirks for specific PCI bridges + * + * or indirectly by platform-specific PCI host bridge drivers by + * advertising the 'msi_domain' property, which results in + * the NO_MSI flag when no MSI domain is found for this bridge + * at probe time. + */ + for (bus = dev->bus; bus; bus = bus->parent) + if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) + return 0; + + return 1; +} + +static void pcim_msi_release(void *pcidev) +{ + struct pci_dev *dev = pcidev; + + dev->is_msi_managed = false; + pci_free_irq_vectors(dev); +} + +/* + * Needs to be separate from pcim_release to prevent an ordering problem + * vs. msi_device_data_release() in the MSI core code. + */ +static int pcim_setup_msi_release(struct pci_dev *dev) +{ + int ret; + + if (!pci_is_managed(dev) || dev->is_msi_managed) + return 0; + + ret = devm_add_action(&dev->dev, pcim_msi_release, dev); + if (!ret) + dev->is_msi_managed = true; + return ret; +} + +/* + * Ordering vs. devres: msi device data has to be installed first so that + * pcim_msi_release() is invoked before it on device release. + */ +static int pci_setup_msi_context(struct pci_dev *dev) +{ + int ret = msi_setup_device_data(&dev->dev); + + if (!ret) + ret = pcim_setup_msi_release(dev); + return ret; +} + +/* + * Helper functions for mask/unmask and MSI message handling + */ + void pci_msi_update_mask(struct msi_desc *desc, u32 clear, u32 set) { raw_spinlock_t *lock = &to_pci_dev(desc->dev)->msi_lock; @@ -163,15 +254,8 @@ void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) } EXPORT_SYMBOL_GPL(pci_write_msi_msg); -void pci_free_msi_irqs(struct pci_dev *dev) -{ - pci_msi_teardown_msi_irqs(dev); - if (dev->msix_base) { - iounmap(dev->msix_base); - dev->msix_base = NULL; - } -} +/* PCI/MSI specific functionality */ static void pci_intx_for_msi(struct pci_dev *dev, int enable) { @@ -190,111 +274,6 @@ static void pci_msi_set_enable(struct pci_dev *dev, int enable) pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); } -/* - * Architecture override returns true when the PCI MSI message should be - * written by the generic restore function. - */ -bool __weak arch_restore_msi_irqs(struct pci_dev *dev) -{ - return true; -} - -void __pci_restore_msi_state(struct pci_dev *dev) -{ - struct msi_desc *entry; - u16 control; - - if (!dev->msi_enabled) - return; - - entry = irq_get_msi_desc(dev->irq); - - pci_intx_for_msi(dev, 0); - pci_msi_set_enable(dev, 0); - if (arch_restore_msi_irqs(dev)) - __pci_write_msi_msg(entry, &entry->msg); - - pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); - pci_msi_update_mask(entry, 0, 0); - control &= ~PCI_MSI_FLAGS_QSIZE; - control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; - pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); -} - -static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) -{ - u16 ctrl; - - pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); - ctrl &= ~clear; - ctrl |= set; - pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); -} - -void __pci_restore_msix_state(struct pci_dev *dev) -{ - struct msi_desc *entry; - bool write_msg; - - if (!dev->msix_enabled) - return; - - /* route the table */ - pci_intx_for_msi(dev, 0); - pci_msix_clear_and_set_ctrl(dev, 0, - PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); - - write_msg = arch_restore_msi_irqs(dev); - - msi_lock_descs(&dev->dev); - msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { - if (write_msg) - __pci_write_msi_msg(entry, &entry->msg); - pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); - } - msi_unlock_descs(&dev->dev); - - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); -} - -static void pcim_msi_release(void *pcidev) -{ - struct pci_dev *dev = pcidev; - - dev->is_msi_managed = false; - pci_free_irq_vectors(dev); -} - -/* - * Needs to be separate from pcim_release to prevent an ordering problem - * vs. msi_device_data_release() in the MSI core code. - */ -static int pcim_setup_msi_release(struct pci_dev *dev) -{ - int ret; - - if (!pci_is_managed(dev) || dev->is_msi_managed) - return 0; - - ret = devm_add_action(&dev->dev, pcim_msi_release, dev); - if (!ret) - dev->is_msi_managed = true; - return ret; -} - -/* - * Ordering vs. devres: msi device data has to be installed first so that - * pcim_msi_release() is invoked before it on device release. - */ -static int pci_setup_msi_context(struct pci_dev *dev) -{ - int ret = msi_setup_device_data(&dev->dev); - - if (!ret) - ret = pcim_setup_msi_release(dev); - return ret; -} - static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, struct irq_affinity_desc *masks) { @@ -415,6 +394,149 @@ unlock: return ret; } +int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, + struct irq_affinity *affd) +{ + int nvec; + int rc; + + if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0) + return -EINVAL; + + /* Check whether driver already requested MSI-X IRQs */ + if (dev->msix_enabled) { + pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); + return -EINVAL; + } + + if (maxvec < minvec) + return -ERANGE; + + if (WARN_ON_ONCE(dev->msi_enabled)) + return -EINVAL; + + nvec = pci_msi_vec_count(dev); + if (nvec < 0) + return nvec; + if (nvec < minvec) + return -ENOSPC; + + if (nvec > maxvec) + nvec = maxvec; + + rc = pci_setup_msi_context(dev); + if (rc) + return rc; + + for (;;) { + if (affd) { + nvec = irq_calc_affinity_vectors(minvec, nvec, affd); + if (nvec < minvec) + return -ENOSPC; + } + + rc = msi_capability_init(dev, nvec, affd); + if (rc == 0) + return nvec; + + if (rc < 0) + return rc; + if (rc < minvec) + return -ENOSPC; + + nvec = rc; + } +} + +/** + * pci_msi_vec_count - Return the number of MSI vectors a device can send + * @dev: device to report about + * + * This function returns the number of MSI vectors a device requested via + * Multiple Message Capable register. It returns a negative errno if the + * device is not capable sending MSI interrupts. Otherwise, the call succeeds + * and returns a power of two, up to a maximum of 2^5 (32), according to the + * MSI specification. + **/ +int pci_msi_vec_count(struct pci_dev *dev) +{ + int ret; + u16 msgctl; + + if (!dev->msi_cap) + return -EINVAL; + + pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); + ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); + + return ret; +} +EXPORT_SYMBOL(pci_msi_vec_count); + +/* + * Architecture override returns true when the PCI MSI message should be + * written by the generic restore function. + */ +bool __weak arch_restore_msi_irqs(struct pci_dev *dev) +{ + return true; +} + +void __pci_restore_msi_state(struct pci_dev *dev) +{ + struct msi_desc *entry; + u16 control; + + if (!dev->msi_enabled) + return; + + entry = irq_get_msi_desc(dev->irq); + + pci_intx_for_msi(dev, 0); + pci_msi_set_enable(dev, 0); + if (arch_restore_msi_irqs(dev)) + __pci_write_msi_msg(entry, &entry->msg); + + pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control); + pci_msi_update_mask(entry, 0, 0); + control &= ~PCI_MSI_FLAGS_QSIZE; + control |= (entry->pci.msi_attrib.multiple << 4) | PCI_MSI_FLAGS_ENABLE; + pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control); +} + +void pci_msi_shutdown(struct pci_dev *dev) +{ + struct msi_desc *desc; + + if (!pci_msi_enable || !dev || !dev->msi_enabled) + return; + + pci_msi_set_enable(dev, 0); + pci_intx_for_msi(dev, 1); + dev->msi_enabled = 0; + + /* Return the device with MSI unmasked as initial states */ + desc = msi_first_desc(&dev->dev, MSI_DESC_ALL); + if (!WARN_ON_ONCE(!desc)) + pci_msi_unmask(desc, msi_multi_mask(desc)); + + /* Restore dev->irq to its default pin-assertion IRQ */ + dev->irq = desc->pci.msi_attrib.default_irq; + pcibios_alloc_irq(dev); +} + +/* PCI/MSI-X specific functionality */ + +static void pci_msix_clear_and_set_ctrl(struct pci_dev *dev, u16 clear, u16 set) +{ + u16 ctrl; + + pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &ctrl); + ctrl &= ~clear; + ctrl |= set; + pci_write_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, ctrl); +} + static void __iomem *msix_map_region(struct pci_dev *dev, unsigned int nr_entries) { @@ -599,101 +721,6 @@ out_disable: return ret; } -/** - * pci_msi_supported - check whether MSI may be enabled on a device - * @dev: pointer to the pci_dev data structure of MSI device function - * @nvec: how many MSIs have been requested? - * - * Look at global flags, the device itself, and its parent buses - * to determine if MSI/-X are supported for the device. If MSI/-X is - * supported return 1, else return 0. - **/ -static int pci_msi_supported(struct pci_dev *dev, int nvec) -{ - struct pci_bus *bus; - - /* MSI must be globally enabled and supported by the device */ - if (!pci_msi_enable) - return 0; - - if (!dev || dev->no_msi) - return 0; - - /* - * You can't ask to have 0 or less MSIs configured. - * a) it's stupid .. - * b) the list manipulation code assumes nvec >= 1. - */ - if (nvec < 1) - return 0; - - /* - * Any bridge which does NOT route MSI transactions from its - * secondary bus to its primary bus must set NO_MSI flag on - * the secondary pci_bus. - * - * The NO_MSI flag can either be set directly by: - * - arch-specific PCI host bus controller drivers (deprecated) - * - quirks for specific PCI bridges - * - * or indirectly by platform-specific PCI host bridge drivers by - * advertising the 'msi_domain' property, which results in - * the NO_MSI flag when no MSI domain is found for this bridge - * at probe time. - */ - for (bus = dev->bus; bus; bus = bus->parent) - if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI) - return 0; - - return 1; -} - -/** - * pci_msi_vec_count - Return the number of MSI vectors a device can send - * @dev: device to report about - * - * This function returns the number of MSI vectors a device requested via - * Multiple Message Capable register. It returns a negative errno if the - * device is not capable sending MSI interrupts. Otherwise, the call succeeds - * and returns a power of two, up to a maximum of 2^5 (32), according to the - * MSI specification. - **/ -int pci_msi_vec_count(struct pci_dev *dev) -{ - int ret; - u16 msgctl; - - if (!dev->msi_cap) - return -EINVAL; - - pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &msgctl); - ret = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1); - - return ret; -} -EXPORT_SYMBOL(pci_msi_vec_count); - -void pci_msi_shutdown(struct pci_dev *dev) -{ - struct msi_desc *desc; - - if (!pci_msi_enable || !dev || !dev->msi_enabled) - return; - - pci_msi_set_enable(dev, 0); - pci_intx_for_msi(dev, 1); - dev->msi_enabled = 0; - - /* Return the device with MSI unmasked as initial states */ - desc = msi_first_desc(&dev->dev, MSI_DESC_ALL); - if (!WARN_ON_ONCE(!desc)) - pci_msi_unmask(desc, msi_multi_mask(desc)); - - /* Restore dev->irq to its default pin-assertion IRQ */ - dev->irq = desc->pci.msi_attrib.default_irq; - pcibios_alloc_irq(dev); -} - static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd, int flags) { @@ -729,82 +756,6 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, return msix_capability_init(dev, entries, nvec, affd); } -void pci_msix_shutdown(struct pci_dev *dev) -{ - struct msi_desc *desc; - - if (!pci_msi_enable || !dev || !dev->msix_enabled) - return; - - if (pci_dev_is_disconnected(dev)) { - dev->msix_enabled = 0; - return; - } - - /* Return the device with MSI-X masked as initial states */ - msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) - pci_msix_mask(desc); - - pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); - pci_intx_for_msi(dev, 1); - dev->msix_enabled = 0; - pcibios_alloc_irq(dev); -} - -int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, - struct irq_affinity *affd) -{ - int nvec; - int rc; - - if (!pci_msi_supported(dev, minvec) || dev->current_state != PCI_D0) - return -EINVAL; - - /* Check whether driver already requested MSI-X IRQs */ - if (dev->msix_enabled) { - pci_info(dev, "can't enable MSI (MSI-X already enabled)\n"); - return -EINVAL; - } - - if (maxvec < minvec) - return -ERANGE; - - if (WARN_ON_ONCE(dev->msi_enabled)) - return -EINVAL; - - nvec = pci_msi_vec_count(dev); - if (nvec < 0) - return nvec; - if (nvec < minvec) - return -ENOSPC; - - if (nvec > maxvec) - nvec = maxvec; - - rc = pci_setup_msi_context(dev); - if (rc) - return rc; - - for (;;) { - if (affd) { - nvec = irq_calc_affinity_vectors(minvec, nvec, affd); - if (nvec < minvec) - return -ENOSPC; - } - - rc = msi_capability_init(dev, nvec, affd); - if (rc == 0) - return nvec; - - if (rc < 0) - return rc; - if (rc < minvec) - return -ENOSPC; - - nvec = rc; - } -} - int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec, struct irq_affinity *affd, @@ -847,6 +798,68 @@ int __pci_enable_msix_range(struct pci_dev *dev, } } +void __pci_restore_msix_state(struct pci_dev *dev) +{ + struct msi_desc *entry; + bool write_msg; + + if (!dev->msix_enabled) + return; + + /* route the table */ + pci_intx_for_msi(dev, 0); + pci_msix_clear_and_set_ctrl(dev, 0, + PCI_MSIX_FLAGS_ENABLE | PCI_MSIX_FLAGS_MASKALL); + + write_msg = arch_restore_msi_irqs(dev); + + msi_lock_descs(&dev->dev); + msi_for_each_desc(entry, &dev->dev, MSI_DESC_ALL) { + if (write_msg) + __pci_write_msi_msg(entry, &entry->msg); + pci_msix_write_vector_ctrl(entry, entry->pci.msix_ctrl); + } + msi_unlock_descs(&dev->dev); + + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); +} + +void pci_msix_shutdown(struct pci_dev *dev) +{ + struct msi_desc *desc; + + if (!pci_msi_enable || !dev || !dev->msix_enabled) + return; + + if (pci_dev_is_disconnected(dev)) { + dev->msix_enabled = 0; + return; + } + + /* Return the device with MSI-X masked as initial states */ + msi_for_each_desc(desc, &dev->dev, MSI_DESC_ALL) + pci_msix_mask(desc); + + pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0); + pci_intx_for_msi(dev, 1); + dev->msix_enabled = 0; + pcibios_alloc_irq(dev); +} + +/* Common interfaces */ + +void pci_free_msi_irqs(struct pci_dev *dev) +{ + pci_msi_teardown_msi_irqs(dev); + + if (dev->msix_base) { + iounmap(dev->msix_base); + dev->msix_base = NULL; + } +} + +/* Misc. infrastructure */ + struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc) { return to_pci_dev(desc->dev); From bab65e48cb064d67b488efb6888c06fd977a8245 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:55:07 +0100 Subject: [PATCH 035/134] PCI/MSI: Sanitize MSI-X checks There is no point in doing the same sanity checks over and over in a loop during MSI-X enablement. Put them in front of the loop and return early when they fail. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.516946468@linutronix.de --- drivers/pci/msi/msi.c | 65 ++++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 380e65155b93..794ec97c5b70 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -721,47 +721,31 @@ out_disable: return ret; } -static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, - int nvec, struct irq_affinity *affd, int flags) +static bool pci_msix_validate_entries(struct msix_entry *entries, int nvec, int hwsize) { - int nr_entries; int i, j; - if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) - return -EINVAL; + if (!entries) + return true; - nr_entries = pci_msix_vec_count(dev); - if (nr_entries < 0) - return nr_entries; - if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL)) - return nr_entries; + for (i = 0; i < nvec; i++) { + /* Entry within hardware limit? */ + if (entries[i].entry >= hwsize) + return false; - if (entries) { - /* Check for any invalid entries */ - for (i = 0; i < nvec; i++) { - if (entries[i].entry >= nr_entries) - return -EINVAL; /* invalid entry */ - for (j = i + 1; j < nvec; j++) { - if (entries[i].entry == entries[j].entry) - return -EINVAL; /* duplicate entry */ - } + /* Check for duplicate entries */ + for (j = i + 1; j < nvec; j++) { + if (entries[i].entry == entries[j].entry) + return false; } } - - /* Check whether driver already requested for MSI IRQ */ - if (dev->msi_enabled) { - pci_info(dev, "can't enable MSI-X (MSI IRQ already assigned)\n"); - return -EINVAL; - } - return msix_capability_init(dev, entries, nvec, affd); + return true; } -int __pci_enable_msix_range(struct pci_dev *dev, - struct msix_entry *entries, int minvec, - int maxvec, struct irq_affinity *affd, - int flags) +int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, + int maxvec, struct irq_affinity *affd, int flags) { - int rc, nvec = maxvec; + int hwsize, rc, nvec = maxvec; if (maxvec < minvec) return -ERANGE; @@ -774,6 +758,23 @@ int __pci_enable_msix_range(struct pci_dev *dev, if (WARN_ON_ONCE(dev->msix_enabled)) return -EINVAL; + if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) + return -EINVAL; + + hwsize = pci_msix_vec_count(dev); + if (hwsize < 0) + return hwsize; + + if (!pci_msix_validate_entries(entries, nvec, hwsize)) + return -EINVAL; + + /* PCI_IRQ_VIRTUAL is a horrible hack! */ + if (nvec > hwsize && !(flags & PCI_IRQ_VIRTUAL)) + nvec = hwsize; + + if (nvec < minvec) + return -ENOSPC; + rc = pci_setup_msi_context(dev); if (rc) return rc; @@ -785,7 +786,7 @@ int __pci_enable_msix_range(struct pci_dev *dev, return -ENOSPC; } - rc = __pci_enable_msix(dev, entries, nvec, affd, flags); + rc = msix_capability_init(dev, entries, nvec, affd); if (rc == 0) return nvec; From d2a463b297415ca6dd4d60bb1c867dd7c931587b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:55:09 +0100 Subject: [PATCH 036/134] PCI/MSI: Reject multi-MSI early When hierarchical MSI interrupt domains are enabled then there is no point to do tons of work and detect the missing support for multi-MSI late in the allocation path. Just query the domain feature flags right away. The query function is going to be used for other purposes later and has a mode argument which influences the result: ALLOW_LEGACY returns true when: - there is no irq domain attached (legacy support) - there is a irq domain attached which has the feature flag set DENY_LEGACY returns only true when: - there is a irq domain attached which has the feature flag set This allows to use the function universally without ifdeffery in the calling code. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.574339988@linutronix.de --- drivers/pci/msi/irqdomain.c | 22 ++++++++++++++++++++++ drivers/pci/msi/msi.c | 4 ++++ drivers/pci/msi/msi.h | 9 +++++++++ 3 files changed, 35 insertions(+) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index edd0cc204e06..666ed21f2924 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -187,6 +187,28 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, } EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain); +/** + * pci_msi_domain_supports - Check for support of a particular feature flag + * @pdev: The PCI device to operate on + * @feature_mask: The feature mask to check for (full match) + * @mode: If ALLOW_LEGACY this grants the feature when there is no irq domain + * associated to the device. If DENY_LEGACY the lack of an irq domain + * makes the feature unsupported + */ +bool pci_msi_domain_supports(struct pci_dev *pdev, unsigned int feature_mask, + enum support_mode mode) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + + domain = dev_get_msi_domain(&pdev->dev); + + if (!domain || !irq_domain_is_hierarchy(domain)) + return mode == ALLOW_LEGACY; + info = domain->host_data; + return (info->flags & feature_mask) == feature_mask; +} + /* * Users of the generic MSI infrastructure expect a device to have a single ID, * so with DMA aliases we have to pick the least-worst compromise. Devices with diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 794ec97c5b70..bc84647c6dbd 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -347,6 +347,10 @@ static int msi_capability_init(struct pci_dev *dev, int nvec, struct msi_desc *entry; int ret; + /* Reject multi-MSI early on irq domain enabled architectures */ + if (nvec > 1 && !pci_msi_domain_supports(dev, MSI_FLAG_MULTI_PCI_MSI, ALLOW_LEGACY)) + return 1; + /* * Disable MSI during setup in the hardware, but mark it enabled * so that setup code can evaluate it. diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index 8170ef2c5ad0..9d75b6fb3e0c 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -97,6 +97,15 @@ int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int void __pci_restore_msi_state(struct pci_dev *dev); void __pci_restore_msix_state(struct pci_dev *dev); +/* irq_domain related functionality */ + +enum support_mode { + ALLOW_LEGACY, + DENY_LEGACY, +}; + +bool pci_msi_domain_supports(struct pci_dev *dev, unsigned int feature_mask, enum support_mode mode); + /* Legacy (!IRQDOMAIN) fallbacks */ #ifdef CONFIG_PCI_MSI_ARCH_FALLBACKS From 99f3d279765725920aa5924fa445537a20129a6f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:55:11 +0100 Subject: [PATCH 037/134] PCI/MSI: Reject MSI-X early Similar to PCI multi-MSI reject MSI-X enablement when a irq domain is attached to the device which does not support MSI-X. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.631728309@linutronix.de --- drivers/pci/msi/msi.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index bc84647c6dbd..0740acd10bde 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -762,6 +762,10 @@ int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int if (WARN_ON_ONCE(dev->msix_enabled)) return -EINVAL; + /* Check MSI-X early on irq domain enabled architectures */ + if (!pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX, ALLOW_LEGACY)) + return -ENOTSUPP; + if (!pci_msi_supported(dev, nvec) || dev->current_state != PCI_D0) return -EINVAL; From 4644d22eb673f173252610a93aaaba4c2bff7b41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:55:12 +0100 Subject: [PATCH 038/134] PCI/MSI: Validate MSI-X contiguous restriction early With interrupt domains the sanity check for MSI-X vector validation can be done _before_ any allocation happens. The sanity check only applies to the allocation functions which have an 'entries' array argument. The entries array is filled by the caller with the requested MSI-X indices. Some drivers have gaps in the index space which is not supported on all architectures. The PCI/MSI irq domain has a 'feature' bit to enforce this validation late during the allocation phase. Just do it right away before doing any other work along with the other sanity checks on that array. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.691357406@linutronix.de --- drivers/pci/msi/msi.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 0740acd10bde..b94f6da3f32f 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -725,13 +725,17 @@ out_disable: return ret; } -static bool pci_msix_validate_entries(struct msix_entry *entries, int nvec, int hwsize) +static bool pci_msix_validate_entries(struct pci_dev *dev, struct msix_entry *entries, + int nvec, int hwsize) { + bool nogap; int i, j; if (!entries) return true; + nogap = pci_msi_domain_supports(dev, MSI_FLAG_MSIX_CONTIGUOUS, DENY_LEGACY); + for (i = 0; i < nvec; i++) { /* Entry within hardware limit? */ if (entries[i].entry >= hwsize) @@ -742,6 +746,9 @@ static bool pci_msix_validate_entries(struct msix_entry *entries, int nvec, int if (entries[i].entry == entries[j].entry) return false; } + /* Check for unsupported gaps */ + if (nogap && entries[i].entry != i) + return false; } return true; } @@ -773,7 +780,7 @@ int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int if (hwsize < 0) return hwsize; - if (!pci_msix_validate_entries(entries, nvec, hwsize)) + if (!pci_msix_validate_entries(dev, entries, nvec, hwsize)) return -EINVAL; /* PCI_IRQ_VIRTUAL is a horrible hack! */ From 9c03b2589da2b2b259a960f2a325731f6b38d115 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:55:14 +0100 Subject: [PATCH 039/134] PCI/MSI: Remove redundant msi_check() callback All these sanity checks are now done _before_ any allocation work happens. No point in doing it twice. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Bjorn Helgaas Link: https://lore.kernel.org/r/20221111122015.749446904@linutronix.de --- drivers/pci/msi/irqdomain.c | 48 ------------------------------------- 1 file changed, 48 deletions(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 666ed21f2924..dea2e8c4dfa4 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -64,51 +64,6 @@ static irq_hw_number_t pci_msi_domain_calc_hwirq(struct msi_desc *desc) (pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27; } -static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc) -{ - return !desc->pci.msi_attrib.is_msix && desc->nvec_used > 1; -} - -/** - * pci_msi_domain_check_cap - Verify that @domain supports the capabilities - * for @dev - * @domain: The interrupt domain to check - * @info: The domain info for verification - * @dev: The device to check - * - * Returns: - * 0 if the functionality is supported - * 1 if Multi MSI is requested, but the domain does not support it - * -ENOTSUPP otherwise - */ -static int pci_msi_domain_check_cap(struct irq_domain *domain, - struct msi_domain_info *info, - struct device *dev) -{ - struct msi_desc *desc = msi_first_desc(dev, MSI_DESC_ALL); - - /* Special handling to support __pci_enable_msi_range() */ - if (pci_msi_desc_is_multi_msi(desc) && - !(info->flags & MSI_FLAG_MULTI_PCI_MSI)) - return 1; - - if (desc->pci.msi_attrib.is_msix) { - if (!(info->flags & MSI_FLAG_PCI_MSIX)) - return -ENOTSUPP; - - if (info->flags & MSI_FLAG_MSIX_CONTIGUOUS) { - unsigned int idx = 0; - - /* Check for gaps in the entry indices */ - msi_for_each_desc(desc, dev, MSI_DESC_ALL) { - if (desc->msi_index != idx++) - return -ENOTSUPP; - } - } - } - return 0; -} - static void pci_msi_domain_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) { @@ -118,7 +73,6 @@ static void pci_msi_domain_set_desc(msi_alloc_info_t *arg, static struct msi_domain_ops pci_msi_domain_ops_default = { .set_desc = pci_msi_domain_set_desc, - .msi_check = pci_msi_domain_check_cap, }; static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info) @@ -130,8 +84,6 @@ static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info) } else { if (ops->set_desc == NULL) ops->set_desc = pci_msi_domain_set_desc; - if (ops->msi_check == NULL) - ops->msi_check = pci_msi_domain_check_cap; } } From 2569f62ca473584a8ba389f455fc00805389f7da Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:55:15 +0100 Subject: [PATCH 040/134] genirq/msi: Remove msi_domain_ops:: Msi_check() No more users. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122015.807616900@linutronix.de --- include/linux/msi.h | 4 ---- kernel/irq/msi.c | 17 +---------------- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 8b287148e512..1ce9d5e0bfa3 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -292,7 +292,6 @@ struct msi_domain_info; * @get_hwirq: Retrieve the resulting hw irq number * @msi_init: Domain specific init function for MSI interrupts * @msi_free: Domain specific function to free a MSI interrupts - * @msi_check: Callback for verification of the domain/info/dev data * @msi_prepare: Prepare the allocation of the interrupts in the domain * @set_desc: Set the msi descriptor for an interrupt * @domain_alloc_irqs: Optional function to override the default allocation @@ -330,9 +329,6 @@ struct msi_domain_ops { void (*msi_free)(struct irq_domain *domain, struct msi_domain_info *info, unsigned int virq); - int (*msi_check)(struct irq_domain *domain, - struct msi_domain_info *info, - struct device *dev); int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 4fde91703c9b..4b99f37355d5 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -617,17 +617,9 @@ static int msi_domain_ops_init(struct irq_domain *domain, return 0; } -static int msi_domain_ops_check(struct irq_domain *domain, - struct msi_domain_info *info, - struct device *dev) -{ - return 0; -} - static struct msi_domain_ops msi_domain_ops_default = { .get_hwirq = msi_domain_ops_get_hwirq, .msi_init = msi_domain_ops_init, - .msi_check = msi_domain_ops_check, .msi_prepare = msi_domain_ops_prepare, .set_desc = msi_domain_ops_set_desc, .domain_alloc_irqs = __msi_domain_alloc_irqs, @@ -655,8 +647,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info) ops->get_hwirq = msi_domain_ops_default.get_hwirq; if (ops->msi_init == NULL) ops->msi_init = msi_domain_ops_default.msi_init; - if (ops->msi_check == NULL) - ops->msi_check = msi_domain_ops_default.msi_check; if (ops->msi_prepare == NULL) ops->msi_prepare = msi_domain_ops_default.msi_prepare; if (ops->set_desc == NULL) @@ -707,13 +697,8 @@ int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; - int ret; - ret = ops->msi_check(domain, info, dev); - if (ret == 0) - ret = ops->msi_prepare(domain, dev, nvec, arg); - - return ret; + return ops->msi_prepare(domain, dev, nvec, arg); } int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, From d474d92d70250d43e7ce0c7cb8623f31ee7c40f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 11 Nov 2022 14:55:17 +0100 Subject: [PATCH 041/134] x86/apic: Remove X86_IRQ_ALLOC_CONTIGUOUS_VECTORS Now that the PCI/MSI core code does early checking for multi-MSI support X86_IRQ_ALLOC_CONTIGUOUS_VECTORS is not required anymore. Remove the flag and rely on MSI_FLAG_MULTI_PCI_MSI. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Link: https://lore.kernel.org/r/20221111122015.865042356@linutronix.de --- arch/x86/include/asm/irqdomain.h | 4 +--- arch/x86/kernel/apic/msi.c | 6 ++---- arch/x86/kernel/apic/vector.c | 4 ---- drivers/iommu/amd/iommu.c | 7 ------- drivers/iommu/intel/irq_remapping.c | 7 ------- drivers/pci/controller/pci-hyperv.c | 15 +-------------- 6 files changed, 4 insertions(+), 39 deletions(-) diff --git a/arch/x86/include/asm/irqdomain.h b/arch/x86/include/asm/irqdomain.h index 125c23b7bad3..30c325c235c0 100644 --- a/arch/x86/include/asm/irqdomain.h +++ b/arch/x86/include/asm/irqdomain.h @@ -7,9 +7,7 @@ #ifdef CONFIG_X86_LOCAL_APIC enum { - /* Allocate contiguous CPU vectors */ - X86_IRQ_ALLOC_CONTIGUOUS_VECTORS = 0x1, - X86_IRQ_ALLOC_LEGACY = 0x2, + X86_IRQ_ALLOC_LEGACY = 0x1, }; extern int x86_fwspec_is_ioapic(struct irq_fwspec *fwspec); diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 7517eb05bdc1..248a6a5c0ad8 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -161,12 +161,10 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) { init_irq_alloc_info(arg, NULL); - if (to_pci_dev(dev)->msix_enabled) { + if (to_pci_dev(dev)->msix_enabled) arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; - } else { + else arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; - arg->flags |= X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - } return 0; } diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 3e6f6b448f6a..c1efebd27e6c 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -539,10 +539,6 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, if (disable_apic) return -ENXIO; - /* Currently vector allocator can't guarantee contiguous allocations */ - if ((info->flags & X86_IRQ_ALLOC_CONTIGUOUS_VECTORS) && nr_irqs > 1) - return -ENOSYS; - /* * Catch any attempt to touch the cascade interrupt on a PIC * equipped system. diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 8ece86484b27..72dfe57f5802 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3297,13 +3297,6 @@ static int irq_remapping_alloc(struct irq_domain *domain, unsigned int virq, if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI) return -EINVAL; - /* - * With IRQ remapping enabled, don't need contiguous CPU vectors - * to support multiple MSI interrupts. - */ - if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) - info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - sbdf = get_devid(info); if (sbdf < 0) return -EINVAL; diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 0b80a272e9a7..a914eba787ac 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1337,13 +1337,6 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain, if (nr_irqs > 1 && info->type != X86_IRQ_ALLOC_TYPE_PCI_MSI) return -EINVAL; - /* - * With IRQ remapping enabled, don't need contiguous CPU vectors - * to support multiple MSI interrupts. - */ - if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) - info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); if (ret < 0) return ret; diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ba64284eaf9f..1dee55d16a4d 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -611,20 +611,7 @@ static unsigned int hv_msi_get_int_vector(struct irq_data *data) return cfg->vector; } -static int hv_msi_prepare(struct irq_domain *domain, struct device *dev, - int nvec, msi_alloc_info_t *info) -{ - int ret = pci_msi_prepare(domain, dev, nvec, info); - - /* - * By using the interrupt remapper in the hypervisor IOMMU, contiguous - * CPU vectors is not needed for multi-MSI - */ - if (info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) - info->flags &= ~X86_IRQ_ALLOC_CONTIGUOUS_VECTORS; - - return ret; -} +#define hv_msi_prepare pci_msi_prepare /** * hv_arch_irq_unmask() - "Unmask" the IRQ by setting its current From 9d4c8175b8298a361d0ea91f435b1b4ed2ea06e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:34:00 +0100 Subject: [PATCH 042/134] bus: fsl-mc: Remove linux/msi.h includes Neither dprc-driver.c nor fsl-mc-bus.c need anything from linux/msi.h. Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20221113202428.511591041@linutronix.de --- drivers/bus/fsl-mc/dprc-driver.c | 1 - drivers/bus/fsl-mc/fsl-mc-bus.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/bus/fsl-mc/dprc-driver.c b/drivers/bus/fsl-mc/dprc-driver.c index 5e70f9775a0e..4c84be378bf2 100644 --- a/drivers/bus/fsl-mc/dprc-driver.c +++ b/drivers/bus/fsl-mc/dprc-driver.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include "fsl-mc-private.h" diff --git a/drivers/bus/fsl-mc/fsl-mc-bus.c b/drivers/bus/fsl-mc/fsl-mc-bus.c index 6143dbf31f31..774f307844b4 100644 --- a/drivers/bus/fsl-mc/fsl-mc-bus.c +++ b/drivers/bus/fsl-mc/fsl-mc-bus.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include From 20e2e09c0998ef0c325edeb00560a8ff67b35913 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:34:07 +0100 Subject: [PATCH 043/134] soc: fsl: dpio: Remove linux/msi.h include Nothing in this file needs anything from linux/msi.h Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20221113202428.760225831@linutronix.de --- drivers/soc/fsl/dpio/dpio-driver.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/soc/fsl/dpio/dpio-driver.c b/drivers/soc/fsl/dpio/dpio-driver.c index 5a2edc48dd79..74eace3109a1 100644 --- a/drivers/soc/fsl/dpio/dpio-driver.c +++ b/drivers/soc/fsl/dpio/dpio-driver.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include From 616eb7bf325fbf25df2b5046536b50fe896d4ab6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:34:08 +0100 Subject: [PATCH 044/134] vfio/fsl-mc: Remove linux/msi.h include Nothing in this file needs anything from linux/msi.h Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Acked-by: Alex Williamson Link: https://lore.kernel.org/r/20221113202428.826924043@linutronix.de --- drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c index 7b428eac3d3e..64d01f3fb13d 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc_intr.c @@ -8,7 +8,6 @@ #include #include #include -#include #include "linux/fsl/mc.h" #include "vfio_fsl_mc_private.h" From d9dcb63677fcdf0a5a8cd3c138ab174cf1e9b871 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Nov 2022 21:34:10 +0100 Subject: [PATCH 045/134] iommu/of: Remove linux/msi.h include Nothing in this file needs anything from linux/msi.h Signed-off-by: Thomas Gleixner Acked-by: Robin Murphy Link: https://lore.kernel.org/r/20221113202428.889624434@linutronix.de --- drivers/iommu/of_iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 5696314ae69e..00d98f08732f 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include From f2756526450d19bca28714565062a8f286c05049 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 23 Nov 2022 22:30:13 -0800 Subject: [PATCH 046/134] genirq/irqreturn: Fix kernel-doc warnings irqreturn.h:6: warning: missing initial short description on line: * enum irqreturn irqreturn.h:15: warning: Enum value 'IRQ_NONE' not described in enum 'irqreturn' irqreturn.h:15: warning: Enum value 'IRQ_HANDLED' not described in enum 'irqreturn' irqreturn.h:15: warning: Enum value 'IRQ_WAKE_THREAD' not described in enum 'irqreturn' Signed-off-by: Randy Dunlap Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20221124063013.28479-1-rdunlap@infradead.org --- include/linux/irqreturn.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h index bd4c066ad39b..d426c7ad92bf 100644 --- a/include/linux/irqreturn.h +++ b/include/linux/irqreturn.h @@ -3,10 +3,10 @@ #define _LINUX_IRQRETURN_H /** - * enum irqreturn - * @IRQ_NONE interrupt was not from this device or was not handled - * @IRQ_HANDLED interrupt was handled by this device - * @IRQ_WAKE_THREAD handler requests to wake the handler thread + * enum irqreturn - irqreturn type values + * @IRQ_NONE: interrupt was not from this device or was not handled + * @IRQ_HANDLED: interrupt was handled by this device + * @IRQ_WAKE_THREAD: handler requests to wake the handler thread */ enum irqreturn { IRQ_NONE = (0 << 0), From 855d4ca4bdb366aab3d43408b74e02ab629d1d55 Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Mon, 14 Nov 2022 19:38:23 +0800 Subject: [PATCH 047/134] irqchip: loongarch-cpu: add DT support LoongArch is coming to support booting with FDT, so DT support of this driver is desired. Signed-off-by: Liu Peibao Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221114113824.1880-2-liupeibao@loongson.cn --- drivers/irqchip/irq-loongarch-cpu.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 741612ba6a52..a28b7c549654 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -92,6 +92,25 @@ static const struct irq_domain_ops loongarch_cpu_intc_irq_domain_ops = { .xlate = irq_domain_xlate_onecell, }; +#ifdef CONFIG_OF +int __init loongarch_cpu_irq_of_init(struct device_node *of_node, + struct device_node *parent) +{ + cpuintc_handle = of_node_to_fwnode(of_node); + + irq_domain = irq_domain_create_linear(cpuintc_handle, EXCCODE_INT_NUM, + &loongarch_cpu_intc_irq_domain_ops, NULL); + if (!irq_domain) + panic("Failed to add irqdomain for loongarch CPU"); + + set_handle_irq(&handle_cpu_irq); + + return 0; +} +IRQCHIP_DECLARE(cpu_intc, "loongson,cpu-interrupt-controller", + loongarch_cpu_irq_of_init); +#endif + static int __init liointc_parse_madt(union acpi_subtable_headers *header, const unsigned long end) From 6b2748ada244c7597e9b677a0bdda4e8781a8d8f Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Mon, 14 Nov 2022 19:38:24 +0800 Subject: [PATCH 048/134] dt-bindings: interrupt-controller: add yaml for LoongArch CPU interrupt controller Current LoongArch compatible CPUs support 14 CPU IRQs. We can describe how the 14 IRQs are wired to the platform's internal interrupt controller by devicetree. Signed-off-by: Liu Peibao Reviewed-by: Krzysztof Kozlowski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221114113824.1880-3-liupeibao@loongson.cn --- .../loongarch,cpu-interrupt-controller.yaml | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml b/Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml new file mode 100644 index 000000000000..2a1cf885c99d --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/loongarch,cpu-interrupt-controller.yaml @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/loongarch,cpu-interrupt-controller.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: LoongArch CPU Interrupt Controller + +maintainers: + - Liu Peibao + +properties: + compatible: + const: loongarch,cpu-interrupt-controller + + '#interrupt-cells': + const: 1 + + interrupt-controller: true + +additionalProperties: false + +required: + - compatible + - '#interrupt-cells' + - interrupt-controller + +examples: + - | + interrupt-controller { + compatible = "loongarch,cpu-interrupt-controller"; + #interrupt-cells = <1>; + interrupt-controller; + }; From d0c50cc4b957b2cf6e43cec4998d212b5abe9220 Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Sat, 22 Oct 2022 15:59:52 +0800 Subject: [PATCH 049/134] ACPI / PCI: fix LPIC IRQ model default PCI IRQ polarity On LoongArch based systems, the PCI devices (e.g. SATA controllers and PCI-to-PCI bridge controllers) in Loongson chipsets output high-level interrupt signal to the interrupt controller they are connected (see Loongson 7A1000 Bridge User Manual v2.00, sec 5.3, "For the bridge chip, AC97 DMA interrupts are edge triggered, gpio interrupts can be configured to be level triggered or edge triggered as needed, and the rest of the interrupts are level triggered and active high."), while the IRQs are active low from the perspective of PCI (see Conventional PCI spec r3.0, sec 2.2.6, "Interrupts on PCI are optional and defined as level sensitive, asserted low."), which means that the interrupt output of PCI devices plugged into PCI-to-PCI bridges of Loongson chipset will be also converted to high-level. So high level triggered type is required to be passed to acpi_register_gsi() when creating mappings for PCI devices. Signed-off-by: Jianmin Lv Reviewed-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221022075955.11726-2-lvjianmin@loongson.cn --- drivers/acpi/pci_irq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c index 08e15774fb9f..ff30ceca2203 100644 --- a/drivers/acpi/pci_irq.c +++ b/drivers/acpi/pci_irq.c @@ -387,13 +387,15 @@ int acpi_pci_irq_enable(struct pci_dev *dev) u8 pin; int triggering = ACPI_LEVEL_SENSITIVE; /* - * On ARM systems with the GIC interrupt model, level interrupts + * On ARM systems with the GIC interrupt model, or LoongArch + * systems with the LPIC interrupt model, level interrupts * are always polarity high by specification; PCI legacy * IRQs lines are inverted before reaching the interrupt * controller and must therefore be considered active high * as default. */ - int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC ? + int polarity = acpi_irq_model == ACPI_IRQ_MODEL_GIC || + acpi_irq_model == ACPI_IRQ_MODEL_LPIC ? ACPI_ACTIVE_HIGH : ACPI_ACTIVE_LOW; char *link = NULL; char link_desc[16]; From c7c00138015975c8f0e268564249cc47d8de632c Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Sat, 22 Oct 2022 15:59:53 +0800 Subject: [PATCH 050/134] irqchip/loongson-pch-pic: Fix translate callback for DT path In DT path of translate callback, if fwspec->param_count==1 and of_node is non-null, fwspec->param[1] will be accessed, which is introduced from previous commit bcdd75c596c8 (irqchip/loongson-pch-pic: Add ACPI init support). Before the patch, for non-null of_node, translate callback (use irq_domain_translate_twocell()) will return -EINVAL if fwspec->param_count < 2, so the check in the patch is added. Fixes: bcdd75c596c8 ("irqchip/loongson-pch-pic: Add ACPI init support") Signed-off-by: Jianmin Lv Reviewed-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221022075955.11726-3-lvjianmin@loongson.cn --- drivers/irqchip/irq-loongson-pch-pic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index c01b9c257005..03493cda65a3 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -159,6 +159,9 @@ static int pch_pic_domain_translate(struct irq_domain *d, return -EINVAL; if (of_node) { + if (fwspec->param_count < 2) + return -EINVAL; + *hwirq = fwspec->param[0] + priv->ht_vec_base; *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; } else { From 25f3514aab3748bfef4a279ed599f836ac83e62a Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Sat, 22 Oct 2022 15:59:54 +0800 Subject: [PATCH 051/134] irqchip/loongson-pch-pic: Support to set IRQ type for ACPI path For ACPI path, the translate callback used IRQ_TYPE_NONE and ignored the IRQ type in fwspec->param[1]. For supporting to set type for IRQs of the irqdomain, fwspec->param[1] should be used to get IRQ type. Signed-off-by: Jianmin Lv Reviewed-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221022075955.11726-4-lvjianmin@loongson.cn --- drivers/irqchip/irq-loongson-pch-pic.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index 03493cda65a3..a26a3f59d4a5 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -155,9 +155,6 @@ static int pch_pic_domain_translate(struct irq_domain *d, struct pch_pic *priv = d->host_data; struct device_node *of_node = to_of_node(fwspec->fwnode); - if (fwspec->param_count < 1) - return -EINVAL; - if (of_node) { if (fwspec->param_count < 2) return -EINVAL; @@ -165,8 +162,14 @@ static int pch_pic_domain_translate(struct irq_domain *d, *hwirq = fwspec->param[0] + priv->ht_vec_base; *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; } else { + if (fwspec->param_count < 1) + return -EINVAL; + *hwirq = fwspec->param[0] - priv->gsi_base; - *type = IRQ_TYPE_NONE; + if (fwspec->param_count > 1) + *type = fwspec->param[1] & IRQ_TYPE_SENSE_MASK; + else + *type = IRQ_TYPE_NONE; } return 0; From 17343d0b4039196517ab5c40d8fce3e8d394c526 Mon Sep 17 00:00:00 2001 From: Jianmin Lv Date: Sat, 22 Oct 2022 15:59:55 +0800 Subject: [PATCH 052/134] irqchip/loongson-liointc: Support to set IRQ type for ACPI path For ACPI path, the xlate callback used IRQ_TYPE_NONE and ignored the IRQ type in intspec[1]. For supporting to set type for IRQs of the irqdomain, intspec[1] should be used to get IRQ type. Signed-off-by: Jianmin Lv Reviewed-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221022075955.11726-5-lvjianmin@loongson.cn --- drivers/irqchip/irq-loongson-liointc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 0da8716f8f24..838c8fa2d868 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -167,7 +167,12 @@ static int liointc_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, if (WARN_ON(intsize < 1)) return -EINVAL; *out_hwirq = intspec[0] - GSI_MIN_CPU_IRQ; - *out_type = IRQ_TYPE_NONE; + + if (intsize > 1) + *out_type = intspec[1] & IRQ_TYPE_SENSE_MASK; + else + *out_type = IRQ_TYPE_NONE; + return 0; } From 70f7b6c008b37a0beb956e25a6c167edfd4b259e Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 20 Oct 2022 22:25:35 +0800 Subject: [PATCH 053/134] irqchip/loongson-htvec: Add ACPI init support HTVECINTC stands for "HyperTransport Interrupts" that described in Section 14.3 of "Loongson 3A5000 Processor Reference Manual". For more information please refer Documentation/loongarch/irq-chip-model.rst. Though the extended model is the recommended one, there are still some legacy model machines. So we add ACPI init support for HTVECINTC. Co-developed-by: Jianmin Lv Signed-off-by: Jianmin Lv Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020142535.1725573-1-chenhuacai@loongson.cn --- arch/loongarch/include/asm/irq.h | 2 +- drivers/irqchip/Kconfig | 1 + drivers/irqchip/irq-loongson-htvec.c | 159 +++++++++++++++++++------ drivers/irqchip/irq-loongson-liointc.c | 25 +++- 4 files changed, 146 insertions(+), 41 deletions(-) diff --git a/arch/loongarch/include/asm/irq.h b/arch/loongarch/include/asm/irq.h index d06d4542b634..9d3d36e41afe 100644 --- a/arch/loongarch/include/asm/irq.h +++ b/arch/loongarch/include/asm/irq.h @@ -93,7 +93,7 @@ int liointc_acpi_init(struct irq_domain *parent, int eiointc_acpi_init(struct irq_domain *parent, struct acpi_madt_eio_pic *acpi_eiointc); -struct irq_domain *htvec_acpi_init(struct irq_domain *parent, +int htvec_acpi_init(struct irq_domain *parent, struct acpi_madt_ht_pic *acpi_htvec); int pch_lpc_acpi_init(struct irq_domain *parent, struct acpi_madt_lpc_pic *acpi_pchlpc); diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7ef9f5e696d3..17396e6e42fc 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -576,6 +576,7 @@ config IRQ_LOONGARCH_CPU select GENERIC_IRQ_CHIP select IRQ_DOMAIN select GENERIC_IRQ_EFFECTIVE_AFF_MASK + select LOONGSON_HTVEC select LOONGSON_LIOINTC select LOONGSON_EIOINTC select LOONGSON_PCH_PIC diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c index 60a335d7e64e..8b06082616cb 100644 --- a/drivers/irqchip/irq-loongson-htvec.c +++ b/drivers/irqchip/irq-loongson-htvec.c @@ -20,7 +20,6 @@ /* Registers */ #define HTVEC_EN_OFF 0x20 #define HTVEC_MAX_PARENT_IRQ 8 - #define VEC_COUNT_PER_REG 32 #define VEC_REG_IDX(irq_id) ((irq_id) / VEC_COUNT_PER_REG) #define VEC_REG_BIT(irq_id) ((irq_id) % VEC_COUNT_PER_REG) @@ -32,6 +31,8 @@ struct htvec { raw_spinlock_t htvec_lock; }; +static struct htvec *htvec_priv; + static void htvec_irq_dispatch(struct irq_desc *desc) { int i; @@ -155,64 +156,144 @@ static void htvec_reset(struct htvec *priv) } } -static int htvec_of_init(struct device_node *node, - struct device_node *parent) +static int htvec_init(phys_addr_t addr, unsigned long size, + int num_parents, int parent_irq[], struct fwnode_handle *domain_handle) { + int i; struct htvec *priv; - int err, parent_irq[8], i; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + priv->num_parents = num_parents; + priv->base = ioremap(addr, size); raw_spin_lock_init(&priv->htvec_lock); - priv->base = of_iomap(node, 0); - if (!priv->base) { - err = -ENOMEM; - goto free_priv; + + /* Setup IRQ domain */ + priv->htvec_domain = irq_domain_create_linear(domain_handle, + (VEC_COUNT_PER_REG * priv->num_parents), + &htvec_domain_ops, priv); + if (!priv->htvec_domain) { + pr_err("loongson-htvec: cannot add IRQ domain\n"); + goto iounmap_base; } + htvec_reset(priv); + + for (i = 0; i < priv->num_parents; i++) { + irq_set_chained_handler_and_data(parent_irq[i], + htvec_irq_dispatch, priv); + } + + htvec_priv = priv; + + return 0; + +iounmap_base: + iounmap(priv->base); + kfree(priv); + + return -EINVAL; +} + +#ifdef CONFIG_OF + +static int htvec_of_init(struct device_node *node, + struct device_node *parent) +{ + int i, err; + int parent_irq[8]; + int num_parents = 0; + struct resource res; + + if (of_address_to_resource(node, 0, &res)) + return -EINVAL; + /* Interrupt may come from any of the 8 interrupt lines */ for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) { parent_irq[i] = irq_of_parse_and_map(node, i); if (parent_irq[i] <= 0) break; - priv->num_parents++; + num_parents++; } - if (!priv->num_parents) { - pr_err("Failed to get parent irqs\n"); - err = -ENODEV; - goto iounmap_base; - } - - priv->htvec_domain = irq_domain_create_linear(of_node_to_fwnode(node), - (VEC_COUNT_PER_REG * priv->num_parents), - &htvec_domain_ops, priv); - if (!priv->htvec_domain) { - pr_err("Failed to create IRQ domain\n"); - err = -ENOMEM; - goto irq_dispose; - } - - htvec_reset(priv); - - for (i = 0; i < priv->num_parents; i++) - irq_set_chained_handler_and_data(parent_irq[i], - htvec_irq_dispatch, priv); + err = htvec_init(res.start, resource_size(&res), + num_parents, parent_irq, of_node_to_fwnode(node)); + if (err < 0) + return err; return 0; - -irq_dispose: - for (; i > 0; i--) - irq_dispose_mapping(parent_irq[i - 1]); -iounmap_base: - iounmap(priv->base); -free_priv: - kfree(priv); - - return err; } IRQCHIP_DECLARE(htvec, "loongson,htvec-1.0", htvec_of_init); + +#endif + +#ifdef CONFIG_ACPI +static int __init pch_pic_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_bio_pic *pchpic_entry = (struct acpi_madt_bio_pic *)header; + + return pch_pic_acpi_init(htvec_priv->htvec_domain, pchpic_entry); +} + +static int __init pch_msi_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header; + + return pch_msi_acpi_init(htvec_priv->htvec_domain, pchmsi_entry); +} + +static int __init acpi_cascade_irqdomain_init(void) +{ + int r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_BIO_PIC, pch_pic_parse_madt, 0); + if (r < 0) + return r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 0); + if (r < 0) + return r; + + return 0; +} + +int __init htvec_acpi_init(struct irq_domain *parent, + struct acpi_madt_ht_pic *acpi_htvec) +{ + int i, ret; + int num_parents, parent_irq[8]; + struct fwnode_handle *domain_handle; + + if (!acpi_htvec) + return -EINVAL; + + num_parents = HTVEC_MAX_PARENT_IRQ; + + domain_handle = irq_domain_alloc_fwnode(&acpi_htvec->address); + if (!domain_handle) { + pr_err("Unable to allocate domain handle\n"); + return -ENOMEM; + } + + /* Interrupt may come from any of the 8 interrupt lines */ + for (i = 0; i < HTVEC_MAX_PARENT_IRQ; i++) + parent_irq[i] = irq_create_mapping(parent, acpi_htvec->cascade[i]); + + ret = htvec_init(acpi_htvec->address, acpi_htvec->size, + num_parents, parent_irq, domain_handle); + + if (ret == 0) + ret = acpi_cascade_irqdomain_init(); + else + irq_domain_free_fwnode(domain_handle); + + return ret; +} + +#endif diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 838c8fa2d868..a4a7cccd1181 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -354,6 +354,26 @@ IRQCHIP_DECLARE(loongson_liointc_2_0, "loongson,liointc-2.0", liointc_of_init); #endif #ifdef CONFIG_ACPI +static int __init htintc_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_ht_pic *htintc_entry = (struct acpi_madt_ht_pic *)header; + struct irq_domain *parent = irq_find_matching_fwnode(liointc_handle, DOMAIN_BUS_ANY); + + return htvec_acpi_init(parent, htintc_entry); +} + +static int __init acpi_cascade_irqdomain_init(void) +{ + int r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_HT_PIC, htintc_parse_madt, 0); + if (r < 0) + return r; + + return 0; +} + int __init liointc_acpi_init(struct irq_domain *parent, struct acpi_madt_lio_pic *acpi_liointc) { int ret; @@ -370,9 +390,12 @@ int __init liointc_acpi_init(struct irq_domain *parent, struct acpi_madt_lio_pic pr_err("Unable to allocate domain handle\n"); return -ENOMEM; } + ret = liointc_init(acpi_liointc->address, acpi_liointc->size, 1, domain_handle, NULL); - if (ret) + if (ret == 0) + ret = acpi_cascade_irqdomain_init(); + else irq_domain_free_fwnode(domain_handle); return ret; From 1be356c9326d68c9b0161ca004a41f203864d7ee Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 20 Oct 2022 15:35:24 +0800 Subject: [PATCH 054/134] irqchip/loongson-htvec: Add suspend/resume support Add suspend/resume support for HTVEC irqchip, which is needed for upcoming suspend/hibernation. Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020073527.541845-2-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongson-htvec.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/irqchip/irq-loongson-htvec.c b/drivers/irqchip/irq-loongson-htvec.c index 8b06082616cb..fc8bf1f5d41b 100644 --- a/drivers/irqchip/irq-loongson-htvec.c +++ b/drivers/irqchip/irq-loongson-htvec.c @@ -16,6 +16,7 @@ #include #include #include +#include /* Registers */ #define HTVEC_EN_OFF 0x20 @@ -29,6 +30,7 @@ struct htvec { void __iomem *base; struct irq_domain *htvec_domain; raw_spinlock_t htvec_lock; + u32 saved_vec_en[HTVEC_MAX_PARENT_IRQ]; }; static struct htvec *htvec_priv; @@ -156,6 +158,29 @@ static void htvec_reset(struct htvec *priv) } } +static int htvec_suspend(void) +{ + int i; + + for (i = 0; i < htvec_priv->num_parents; i++) + htvec_priv->saved_vec_en[i] = readl(htvec_priv->base + HTVEC_EN_OFF + 4 * i); + + return 0; +} + +static void htvec_resume(void) +{ + int i; + + for (i = 0; i < htvec_priv->num_parents; i++) + writel(htvec_priv->saved_vec_en[i], htvec_priv->base + HTVEC_EN_OFF + 4 * i); +} + +static struct syscore_ops htvec_syscore_ops = { + .suspend = htvec_suspend, + .resume = htvec_resume, +}; + static int htvec_init(phys_addr_t addr, unsigned long size, int num_parents, int parent_irq[], struct fwnode_handle *domain_handle) { @@ -188,6 +213,8 @@ static int htvec_init(phys_addr_t addr, unsigned long size, htvec_priv = priv; + register_syscore_ops(&htvec_syscore_ops); + return 0; iounmap_base: From a90335c2dfb4ffe572816f77a3c4f2d1d388d724 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 20 Oct 2022 15:35:25 +0800 Subject: [PATCH 055/134] irqchip/loongson-eiointc: Add suspend/resume support Add suspend/resume support for EIOINTC irqchip, which is needed for upcoming suspend/hibernation. Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020073527.541845-3-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongson-eiointc.c | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 16e9af8d8b1e..5cf4b800e499 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -17,6 +17,7 @@ #include #include #include +#include #define EIOINTC_REG_NODEMAP 0x14a0 #define EIOINTC_REG_IPMAP 0x14c0 @@ -301,6 +302,37 @@ static struct irq_domain *acpi_get_vec_parent(int node, struct acpi_vector_group return NULL; } +static int eiointc_suspend(void) +{ + return 0; +} + +static void eiointc_resume(void) +{ + int i, j; + struct irq_desc *desc; + struct irq_data *irq_data; + + eiointc_router_init(0); + + for (i = 0; i < nr_pics; i++) { + for (j = 0; j < VEC_COUNT; j++) { + desc = irq_resolve_mapping(eiointc_priv[i]->eiointc_domain, j); + if (desc && desc->handle_irq && desc->handle_irq != handle_bad_irq) { + raw_spin_lock(&desc->lock); + irq_data = &desc->irq_data; + eiointc_set_irq_affinity(irq_data, irq_data->common->affinity, 0); + raw_spin_unlock(&desc->lock); + } + } + } +} + +static struct syscore_ops eiointc_syscore_ops = { + .suspend = eiointc_suspend, + .resume = eiointc_resume, +}; + static int __init pch_pic_parse_madt(union acpi_subtable_headers *header, const unsigned long end) @@ -380,6 +412,7 @@ int __init eiointc_acpi_init(struct irq_domain *parent, parent_irq = irq_create_mapping(parent, acpi_eiointc->cascade); irq_set_chained_handler_and_data(parent_irq, eiointc_irq_dispatch, priv); + register_syscore_ops(&eiointc_syscore_ops); cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_LOONGARCH_STARTING, "irqchip/loongarch/intc:starting", eiointc_router_init, NULL); From 1ed008a2c3310ada91e86bd96b354212a9025a61 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 20 Oct 2022 15:35:26 +0800 Subject: [PATCH 056/134] irqchip/loongson-pch-pic: Add suspend/resume support Add suspend/resume support for PCH-PIC irqchip, which is needed for upcoming suspend/hibernation. Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020073527.541845-4-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongson-pch-pic.c | 47 ++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index a26a3f59d4a5..1fd015e77ecf 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -15,6 +15,7 @@ #include #include #include +#include /* Registers */ #define PCH_PIC_MASK 0x20 @@ -42,6 +43,9 @@ struct pch_pic { raw_spinlock_t pic_lock; u32 vec_count; u32 gsi_base; + u32 saved_vec_en[PIC_REG_COUNT]; + u32 saved_vec_pol[PIC_REG_COUNT]; + u32 saved_vec_edge[PIC_REG_COUNT]; }; static struct pch_pic *pch_pic_priv[MAX_IO_PICS]; @@ -145,6 +149,7 @@ static struct irq_chip pch_pic_irq_chip = { .irq_ack = pch_pic_ack_irq, .irq_set_affinity = irq_chip_set_affinity_parent, .irq_set_type = pch_pic_set_type, + .flags = IRQCHIP_SKIP_SET_WAKE, }; static int pch_pic_domain_translate(struct irq_domain *d, @@ -234,6 +239,46 @@ static void pch_pic_reset(struct pch_pic *priv) } } +static int pch_pic_suspend(void) +{ + int i, j; + + for (i = 0; i < nr_pics; i++) { + for (j = 0; j < PIC_REG_COUNT; j++) { + pch_pic_priv[i]->saved_vec_pol[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_POL + 4 * j); + pch_pic_priv[i]->saved_vec_edge[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_EDGE + 4 * j); + pch_pic_priv[i]->saved_vec_en[j] = + readl(pch_pic_priv[i]->base + PCH_PIC_MASK + 4 * j); + } + } + + return 0; +} + +static void pch_pic_resume(void) +{ + int i, j; + + for (i = 0; i < nr_pics; i++) { + pch_pic_reset(pch_pic_priv[i]); + for (j = 0; j < PIC_REG_COUNT; j++) { + writel(pch_pic_priv[i]->saved_vec_pol[j], + pch_pic_priv[i]->base + PCH_PIC_POL + 4 * j); + writel(pch_pic_priv[i]->saved_vec_edge[j], + pch_pic_priv[i]->base + PCH_PIC_EDGE + 4 * j); + writel(pch_pic_priv[i]->saved_vec_en[j], + pch_pic_priv[i]->base + PCH_PIC_MASK + 4 * j); + } + } +} + +static struct syscore_ops pch_pic_syscore_ops = { + .suspend = pch_pic_suspend, + .resume = pch_pic_resume, +}; + static int pch_pic_init(phys_addr_t addr, unsigned long size, int vec_base, struct irq_domain *parent_domain, struct fwnode_handle *domain_handle, u32 gsi_base) @@ -266,6 +311,8 @@ static int pch_pic_init(phys_addr_t addr, unsigned long size, int vec_base, pch_pic_handle[nr_pics] = domain_handle; pch_pic_priv[nr_pics++] = priv; + register_syscore_ops(&pch_pic_syscore_ops); + return 0; iounmap_base: From c7c75e32f8a61854c38326aef276e3a58dc7fd08 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 20 Oct 2022 15:35:27 +0800 Subject: [PATCH 057/134] irqchip/loongson-pch-lpc: Add suspend/resume support Add suspend/resume support for PCH-LPC irqchip, which is needed for upcoming suspend/hibernation. Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020073527.541845-5-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongson-pch-lpc.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/drivers/irqchip/irq-loongson-pch-lpc.c b/drivers/irqchip/irq-loongson-pch-lpc.c index bf2324910a75..9b35492fb6be 100644 --- a/drivers/irqchip/irq-loongson-pch-lpc.c +++ b/drivers/irqchip/irq-loongson-pch-lpc.c @@ -13,6 +13,7 @@ #include #include #include +#include /* Registers */ #define LPC_INT_CTL 0x00 @@ -34,6 +35,7 @@ struct pch_lpc { u32 saved_reg_pol; }; +static struct pch_lpc *pch_lpc_priv; struct fwnode_handle *pch_lpc_handle; static void lpc_irq_ack(struct irq_data *d) @@ -147,6 +149,26 @@ static int pch_lpc_disabled(struct pch_lpc *priv) (readl(priv->base + LPC_INT_STS) == 0xffffffff); } +static int pch_lpc_suspend(void) +{ + pch_lpc_priv->saved_reg_ctl = readl(pch_lpc_priv->base + LPC_INT_CTL); + pch_lpc_priv->saved_reg_ena = readl(pch_lpc_priv->base + LPC_INT_ENA); + pch_lpc_priv->saved_reg_pol = readl(pch_lpc_priv->base + LPC_INT_POL); + return 0; +} + +static void pch_lpc_resume(void) +{ + writel(pch_lpc_priv->saved_reg_ctl, pch_lpc_priv->base + LPC_INT_CTL); + writel(pch_lpc_priv->saved_reg_ena, pch_lpc_priv->base + LPC_INT_ENA); + writel(pch_lpc_priv->saved_reg_pol, pch_lpc_priv->base + LPC_INT_POL); +} + +static struct syscore_ops pch_lpc_syscore_ops = { + .suspend = pch_lpc_suspend, + .resume = pch_lpc_resume, +}; + int __init pch_lpc_acpi_init(struct irq_domain *parent, struct acpi_madt_lpc_pic *acpi_pchlpc) { @@ -191,7 +213,10 @@ int __init pch_lpc_acpi_init(struct irq_domain *parent, parent_irq = irq_create_fwspec_mapping(&fwspec); irq_set_chained_handler_and_data(parent_irq, lpc_irq_dispatch, priv); + pch_lpc_priv = priv; pch_lpc_handle = irq_handle; + register_syscore_ops(&pch_lpc_syscore_ops); + return 0; free_irq_handle: From d46b99656a16c450681985661a249dbb18e55cc4 Mon Sep 17 00:00:00 2001 From: Konrad Dybcio Date: Sat, 22 Oct 2022 01:55:23 +0200 Subject: [PATCH 058/134] irqchip/apple-aic: Mark aic_info structs __initconst These structs hold information used only at init time that never gets modified, hence mark them __initconst. Signed-off-by: Konrad Dybcio Suggested-by: Marc Zyngier Reviewed-by: Eric Curtin Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221021235523.76585-1-konrad.dybcio@somainline.org --- drivers/irqchip/irq-apple-aic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index 1c2813ad8bbe..ae3437f03e6c 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -248,14 +248,14 @@ struct aic_info { bool fast_ipi; }; -static const struct aic_info aic1_info = { +static const struct aic_info aic1_info __initconst = { .version = 1, .event = AIC_EVENT, .target_cpu = AIC_TARGET_CPU, }; -static const struct aic_info aic1_fipi_info = { +static const struct aic_info aic1_fipi_info __initconst = { .version = 1, .event = AIC_EVENT, @@ -264,7 +264,7 @@ static const struct aic_info aic1_fipi_info = { .fast_ipi = true, }; -static const struct aic_info aic2_info = { +static const struct aic_info aic2_info __initconst = { .version = 2, .irq_cfg = AIC2_IRQ_CFG, From 915649da01de13961f9d6a891b6db5a6255ac0b2 Mon Sep 17 00:00:00 2001 From: wangjianli Date: Sat, 22 Oct 2022 13:46:55 +0800 Subject: [PATCH 059/134] irqchip/mips-gic: Drop repeated word in comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Delete the redundant word 'the'. Signed-off-by: wangjianli Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221022054655.36496-1-wangjianli@cdjrlc.com --- drivers/irqchip/irq-mips-gic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 1ba0f1555c80..1a6a7a672ad7 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -494,7 +494,7 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int virq, map = GIC_MAP_PIN_MAP_TO_PIN | gic_cpu_pin; /* - * If adding support for more per-cpu interrupts, keep the the + * If adding support for more per-cpu interrupts, keep the * array in gic_all_vpes_irq_cpu_online() in sync. */ switch (intr) { From f9ee20c85b3a3ba0afd3672630ec4f93d339f015 Mon Sep 17 00:00:00 2001 From: Shang XiaoJing Date: Thu, 24 Nov 2022 14:51:50 +0800 Subject: [PATCH 060/134] irqchip: gic-pm: Use pm_runtime_resume_and_get() in gic_probe() gic_probe() calls pm_runtime_get_sync() and added fail path as rpm_put to put usage_counter. However, pm_runtime_get_sync() will increment usage_counter even it failed. Fix it by replacing it with pm_runtime_resume_and_get() to keep usage counter balanced. Fixes: 9c8edddfc992 ("irqchip/gic: Add platform driver for non-root GICs that require RPM") Signed-off-by: Shang XiaoJing Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124065150.22809-1-shangxiaojing@huawei.com --- drivers/irqchip/irq-gic-pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-pm.c b/drivers/irqchip/irq-gic-pm.c index b60e1853593f..3989d16f997b 100644 --- a/drivers/irqchip/irq-gic-pm.c +++ b/drivers/irqchip/irq-gic-pm.c @@ -102,7 +102,7 @@ static int gic_probe(struct platform_device *pdev) pm_runtime_enable(dev); - ret = pm_runtime_get_sync(dev); + ret = pm_runtime_resume_and_get(dev); if (ret < 0) goto rpm_disable; From 9869f37aa4ee2a2e08536529dab4ccda9e23ce0c Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 21 Nov 2022 16:16:22 +0100 Subject: [PATCH 061/134] irqchip/al-fic: Drop obsolete dependency on COMPILE_TEST Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it is possible to test-build any driver which depends on OF on any architecture by explicitly selecting OF. Therefore depending on COMPILE_TEST as an alternative is no longer needed. It is actually better to always build such drivers with OF enabled, so that the test builds are closer to how each driver will actually be built on its intended target. Building them without OF may not test much as the compiler will optimize out potentially large parts of the code. In the worst case, this could even pop false positive warnings. Dropping COMPILE_TEST here improves the quality of our testing and avoids wasting time on non-existent issues. Signed-off-by: Jean Delvare Cc: Talel Shenhar Cc: Thomas Gleixner Cc: Marc Zyngier Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221121161622.6294a899@endymion.delvare --- drivers/irqchip/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 7ef9f5e696d3..de43144da4a3 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -86,7 +86,7 @@ config ALPINE_MSI config AL_FIC bool "Amazon's Annapurna Labs Fabric Interrupt Controller" - depends on OF || COMPILE_TEST + depends on OF select GENERIC_IRQ_CHIP select IRQ_DOMAIN help From 4e08a286b1f7a0a32828d6411255296e4ef51fa6 Mon Sep 17 00:00:00 2001 From: ye xingchen Date: Thu, 17 Nov 2022 19:16:50 +0800 Subject: [PATCH 062/134] irqchip/st: Use device_get_match_data() to simplify the code Directly get the match data with device_get_match_data(). Signed-off-by: ye xingchen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/202211171916504943604@zte.com.cn --- drivers/irqchip/irq-st.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/drivers/irqchip/irq-st.c b/drivers/irqchip/irq-st.c index 801551e46a7b..1b83512b29c6 100644 --- a/drivers/irqchip/irq-st.c +++ b/drivers/irqchip/irq-st.c @@ -153,18 +153,13 @@ static int st_irq_syscfg_enable(struct platform_device *pdev) static int st_irq_syscfg_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; - const struct of_device_id *match; struct st_irq_syscfg *ddata; ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL); if (!ddata) return -ENOMEM; - match = of_match_device(st_irq_syscfg_match, &pdev->dev); - if (!match) - return -ENODEV; - - ddata->syscfg = (unsigned int)match->data; + ddata->syscfg = (unsigned int) device_get_match_data(&pdev->dev); ddata->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscfg"); if (IS_ERR(ddata->regmap)) { From 4208d4faf36573a507b5e5de17abe342e9276759 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 15 Nov 2022 09:25:32 +0000 Subject: [PATCH 063/134] irqchip/wpcm450: Fix memory leak in wpcm450_aic_of_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If of_iomap() failed, 'aic' should be freed before return. Otherwise there is a memory leak. Fixes: fead4dd49663 ("irqchip: Add driver for WPCM450 interrupt controller") Signed-off-by: Wei Yongjun Reviewed-by: Jonathan Neuschäfer Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221115092532.1704032-1-weiyongjun@huaweicloud.com --- drivers/irqchip/irq-wpcm450-aic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-wpcm450-aic.c b/drivers/irqchip/irq-wpcm450-aic.c index 0dcbeb1a05a1..91df62a64cd9 100644 --- a/drivers/irqchip/irq-wpcm450-aic.c +++ b/drivers/irqchip/irq-wpcm450-aic.c @@ -146,6 +146,7 @@ static int __init wpcm450_aic_of_init(struct device_node *node, aic->regs = of_iomap(node, 0); if (!aic->regs) { pr_err("Failed to map WPCM450 AIC registers\n"); + kfree(aic); return -ENOMEM; } From d502c558fd2b190c9125e8da54bef3f302fa9b15 Mon Sep 17 00:00:00 2001 From: Aidan MacDonald Date: Sat, 12 Nov 2022 15:27:01 +0000 Subject: [PATCH 064/134] irqchip/sl28cpld: Replace irqchip mask_invert with unmask_base Remove use of the deprecated mask_invert flag. Inverted mask registers (where a '1' bit enables an IRQ) can be described more directly as an unmask register. Signed-off-by: Aidan MacDonald Reviewed-by: Michael Walle Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221112152701.41990-1-aidanmacdonald.0x0@gmail.com --- drivers/irqchip/irq-sl28cpld.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-sl28cpld.c b/drivers/irqchip/irq-sl28cpld.c index fbb354413ffa..f2172240172c 100644 --- a/drivers/irqchip/irq-sl28cpld.c +++ b/drivers/irqchip/irq-sl28cpld.c @@ -65,8 +65,7 @@ static int sl28cpld_intc_probe(struct platform_device *pdev) irqchip->chip.num_irqs = ARRAY_SIZE(sl28cpld_irqs); irqchip->chip.num_regs = 1; irqchip->chip.status_base = base + INTC_IP; - irqchip->chip.mask_base = base + INTC_IE; - irqchip->chip.mask_invert = true; + irqchip->chip.unmask_base = base + INTC_IE; irqchip->chip.ack_base = base + INTC_IP; return devm_regmap_add_irq_chip_fwnode(dev, dev_fwnode(dev), From 4a60a3cdcf1875c965095eb9e22c3d12bbc5a53d Mon Sep 17 00:00:00 2001 From: Liu Peibao Date: Fri, 4 Nov 2022 19:07:12 +0800 Subject: [PATCH 065/134] irqchip/loongson-liointc: Fix improper error handling in liointc_init() For cores less than 4, eg, loongson2k1000 with 2 cores, the of_property_match_string() may return with an error value, which causes that liointc could not work. At least isr0 is what should be checked like previous commit b2c4c3969fd7 ("irqchip/loongson-liointc: irqchip add 2.0 version") did. Fixes: 0858ed035a85 ("irqchip/loongson-liointc: Add ACPI init support") Signed-off-by: Liu Peibao Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221104110712.23300-1-liupeibao@loongson.cn --- drivers/irqchip/irq-loongson-liointc.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-loongson-liointc.c b/drivers/irqchip/irq-loongson-liointc.c index 0da8716f8f24..c4584e2f0ad3 100644 --- a/drivers/irqchip/irq-loongson-liointc.c +++ b/drivers/irqchip/irq-loongson-liointc.c @@ -207,10 +207,13 @@ static int liointc_init(phys_addr_t addr, unsigned long size, int revision, "reg-names", core_reg_names[i]); if (index < 0) - goto out_iounmap; + continue; priv->core_isr[i] = of_iomap(node, index); } + + if (!priv->core_isr[0]) + goto out_iounmap; } /* Setup IRQ domain */ From f5259b045c19f6e997bd12d53a5f76663537c1fd Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Sat, 26 Nov 2022 13:48:05 -0600 Subject: [PATCH 066/134] irqchip/sifive-plic: Support wake IRQs The PLIC does not define any special method for marking interrupts as wakeup-capable, so it should have the IRQCHIP_SKIP_SET_WAKE flag set. Signed-off-by: Samuel Holland Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221126194805.19431-1-samuel@sholland.org --- drivers/irqchip/irq-sifive-plic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-sifive-plic.c b/drivers/irqchip/irq-sifive-plic.c index 2f4784860df5..ff47bd0dec45 100644 --- a/drivers/irqchip/irq-sifive-plic.c +++ b/drivers/irqchip/irq-sifive-plic.c @@ -187,7 +187,8 @@ static struct irq_chip plic_edge_chip = { .irq_set_affinity = plic_set_affinity, #endif .irq_set_type = plic_irq_set_type, - .flags = IRQCHIP_AFFINITY_PRE_STARTUP, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static struct irq_chip plic_chip = { @@ -201,7 +202,8 @@ static struct irq_chip plic_chip = { .irq_set_affinity = plic_set_affinity, #endif .irq_set_type = plic_irq_set_type, - .flags = IRQCHIP_AFFINITY_PRE_STARTUP, + .flags = IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP, }; static int plic_irq_set_type(struct irq_data *d, unsigned int type) From 7f3974ef4e31d730e7aa902b30800ba0962277da Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 28 Nov 2022 10:22:14 +0100 Subject: [PATCH 067/134] dt-bindings: interrupt-controller: mediatek,cirq: Migrate to dt schema Migrate mediatek,cirq.txt to dt schema as mediatek,mtk-cirq.yaml. While at it, I've also fixed some typos that were present in the original txt binding, as it was suggesting that the compatible string would have "mediatek,cirq" as compatible but, in reality, that's supposed to be "mediatek,mtk-cirq" instead. Little rewording on property descriptions also happened for them to be more concise. Signed-off-by: AngeloGioacchino Del Regno Reviewed-by: Krzysztof Kozlowski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221128092217.36552-2-angelogioacchino.delregno@collabora.com --- .../interrupt-controller/mediatek,cirq.txt | 33 --------- .../mediatek,mtk-cirq.yaml | 67 +++++++++++++++++++ 2 files changed, 67 insertions(+), 33 deletions(-) delete mode 100644 Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt create mode 100644 Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt b/Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt deleted file mode 100644 index 5865f4f2c69d..000000000000 --- a/Documentation/devicetree/bindings/interrupt-controller/mediatek,cirq.txt +++ /dev/null @@ -1,33 +0,0 @@ -* Mediatek 27xx cirq - -In Mediatek SOCs, the CIRQ is a low power interrupt controller designed to -work outside MCUSYS which comprises with Cortex-Ax cores,CCI and GIC. -The external interrupts (outside MCUSYS) will feed through CIRQ and connect -to GIC in MCUSYS. When CIRQ is enabled, it will record the edge-sensitive -interrupts and generate a pulse signal to parent interrupt controller when -flush command is executed. With CIRQ, MCUSYS can be completely turned off -to improve the system power consumption without losing interrupts. - -Required properties: -- compatible: should be one of - - "mediatek,mt2701-cirq" for mt2701 CIRQ - - "mediatek,mt8135-cirq" for mt8135 CIRQ - - "mediatek,mt8173-cirq" for mt8173 CIRQ - and "mediatek,cirq" as a fallback. -- interrupt-controller : Identifies the node as an interrupt controller. -- #interrupt-cells : Use the same format as specified by GIC in arm,gic.txt. -- reg: Physical base address of the cirq registers and length of memory - mapped region. -- mediatek,ext-irq-range: Identifies external irq number range in different - SOCs. - -Example: - cirq: interrupt-controller@10204000 { - compatible = "mediatek,mt2701-cirq", - "mediatek,mtk-cirq"; - interrupt-controller; - #interrupt-cells = <3>; - interrupt-parent = <&sysirq>; - reg = <0 0x10204000 0 0x400>; - mediatek,ext-irq-start = <32 200>; - }; diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml b/Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml new file mode 100644 index 000000000000..4f1132c077ff --- /dev/null +++ b/Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/interrupt-controller/mediatek,mtk-cirq.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: MediaTek System Interrupt Controller + +maintainers: + - Youlin Pei + +description: + In MediaTek SoCs, the CIRQ is a low power interrupt controller designed to + work outside of MCUSYS which comprises with Cortex-Ax cores, CCI and GIC. + The external interrupts (outside MCUSYS) will feed through CIRQ and connect + to GIC in MCUSYS. When CIRQ is enabled, it will record the edge-sensitive + interrupts and generate a pulse signal to parent interrupt controller when + flush command is executed. With CIRQ, MCUSYS can be completely turned off + to improve the system power consumption without losing interrupts. + + +properties: + compatible: + items: + - enum: + - mediatek,mt2701-cirq + - mediatek,mt8135-cirq + - mediatek,mt8173-cirq + - const: mediatek,mtk-cirq + + reg: + maxItems: 1 + + '#interrupt-cells': + const: 3 + + interrupt-controller: true + + mediatek,ext-irq-range: + $ref: /schemas/types.yaml#/definitions/uint32-array + items: + - description: First CIRQ interrupt + - description: Last CIRQ interrupt + description: + Identifies the range of external interrupts in different SoCs + +required: + - compatible + - reg + - '#interrupt-cells' + - interrupt-controller + - mediatek,ext-irq-range + +additionalProperties: false + +examples: + - | + #include + + cirq: interrupt-controller@10204000 { + compatible = "mediatek,mt2701-cirq", "mediatek,mtk-cirq"; + reg = <0x10204000 0x400>; + #interrupt-cells = <3>; + interrupt-controller; + interrupt-parent = <&sysirq>; + mediatek,ext-irq-range = <32 200>; + }; From 85de640c6b89eb76fc314a3b451d054bedd5e9dc Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 28 Nov 2022 10:22:15 +0100 Subject: [PATCH 068/134] dt-bindings: interrupt-controller: mediatek,cirq: Document MT8192 Add compatible to support the SYS_CIRQ controller found on MT8192. Signed-off-by: AngeloGioacchino Del Regno Acked-by: Krzysztof Kozlowski Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221128092217.36552-3-angelogioacchino.delregno@collabora.com --- .../bindings/interrupt-controller/mediatek,mtk-cirq.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml b/Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml index 4f1132c077ff..fdcb4d8db818 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/mediatek,mtk-cirq.yaml @@ -26,6 +26,7 @@ properties: - mediatek,mt2701-cirq - mediatek,mt8135-cirq - mediatek,mt8173-cirq + - mediatek,mt8192-cirq - const: mediatek,mtk-cirq reg: From 45ac01959edcecfa5d6652c2397d3866e55b0da8 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 28 Nov 2022 10:22:16 +0100 Subject: [PATCH 069/134] irqchip/irq-mtk-cirq: Move register offsets to const array In preparation to add support for new SoCs having a different register layout, add an enumeration that documents register offsets and move the definitions for the same to a u32 array; Selecting the right register offsets array is done by adding an of_device_id array containing all of the currently supported compatible strings pointing to the "v1" offsets array (as data): since no devicetree declares the `mediatek,mtk-cirq` compatible without a SoC-specific one, it wasn't necessary to provide any legacy fallback. Every usage of the aforemementioned definitions was changed to get a register address through a newly introduced `mtk_cirq_reg()` accessor. This change brings no functional changes. Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221128092217.36552-4-angelogioacchino.delregno@collabora.com --- drivers/irqchip/irq-mtk-cirq.c | 82 +++++++++++++++++++++++++++------- 1 file changed, 65 insertions(+), 17 deletions(-) diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c index 9bca0918078e..4776ed6492fb 100644 --- a/drivers/irqchip/irq-mtk-cirq.c +++ b/drivers/irqchip/irq-mtk-cirq.c @@ -15,14 +15,29 @@ #include #include -#define CIRQ_ACK 0x40 -#define CIRQ_MASK_SET 0xc0 -#define CIRQ_MASK_CLR 0x100 -#define CIRQ_SENS_SET 0x180 -#define CIRQ_SENS_CLR 0x1c0 -#define CIRQ_POL_SET 0x240 -#define CIRQ_POL_CLR 0x280 -#define CIRQ_CONTROL 0x300 +enum mtk_cirq_regoffs_index { + CIRQ_STA, + CIRQ_ACK, + CIRQ_MASK_SET, + CIRQ_MASK_CLR, + CIRQ_SENS_SET, + CIRQ_SENS_CLR, + CIRQ_POL_SET, + CIRQ_POL_CLR, + CIRQ_CONTROL +}; + +static const u32 mtk_cirq_regoffs_v1[] = { + [CIRQ_STA] = 0x0, + [CIRQ_ACK] = 0x40, + [CIRQ_MASK_SET] = 0xc0, + [CIRQ_MASK_CLR] = 0x100, + [CIRQ_SENS_SET] = 0x180, + [CIRQ_SENS_CLR] = 0x1c0, + [CIRQ_POL_SET] = 0x240, + [CIRQ_POL_CLR] = 0x280, + [CIRQ_CONTROL] = 0x300, +}; #define CIRQ_EN 0x1 #define CIRQ_EDGE 0x2 @@ -32,18 +47,32 @@ struct mtk_cirq_chip_data { void __iomem *base; unsigned int ext_irq_start; unsigned int ext_irq_end; + const u32 *offsets; struct irq_domain *domain; }; static struct mtk_cirq_chip_data *cirq_data; -static void mtk_cirq_write_mask(struct irq_data *data, unsigned int offset) +static void __iomem *mtk_cirq_reg(struct mtk_cirq_chip_data *chip_data, + enum mtk_cirq_regoffs_index idx) +{ + return chip_data->base + chip_data->offsets[idx]; +} + +static void __iomem *mtk_cirq_irq_reg(struct mtk_cirq_chip_data *chip_data, + enum mtk_cirq_regoffs_index idx, + unsigned int cirq_num) +{ + return mtk_cirq_reg(chip_data, idx) + (cirq_num / 32) * 4; +} + +static void mtk_cirq_write_mask(struct irq_data *data, enum mtk_cirq_regoffs_index idx) { struct mtk_cirq_chip_data *chip_data = data->chip_data; unsigned int cirq_num = data->hwirq; u32 mask = 1 << (cirq_num % 32); - writel_relaxed(mask, chip_data->base + offset + (cirq_num / 32) * 4); + writel_relaxed(mask, mtk_cirq_irq_reg(chip_data, idx, cirq_num)); } static void mtk_cirq_mask(struct irq_data *data) @@ -160,6 +189,7 @@ static const struct irq_domain_ops cirq_domain_ops = { #ifdef CONFIG_PM_SLEEP static int mtk_cirq_suspend(void) { + void __iomem *reg; u32 value, mask; unsigned int irq, hwirq_num; bool pending, masked; @@ -200,31 +230,34 @@ static int mtk_cirq_suspend(void) continue; } + reg = mtk_cirq_irq_reg(cirq_data, CIRQ_ACK, i); mask = 1 << (i % 32); - writel_relaxed(mask, cirq_data->base + CIRQ_ACK + (i / 32) * 4); + writel_relaxed(mask, reg); } /* set edge_only mode, record edge-triggerd interrupts */ /* enable cirq */ - value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); + reg = mtk_cirq_reg(cirq_data, CIRQ_CONTROL); + value = readl_relaxed(reg); value |= (CIRQ_EDGE | CIRQ_EN); - writel_relaxed(value, cirq_data->base + CIRQ_CONTROL); + writel_relaxed(value, reg); return 0; } static void mtk_cirq_resume(void) { + void __iomem *reg = mtk_cirq_reg(cirq_data, CIRQ_CONTROL); u32 value; /* flush recorded interrupts, will send signals to parent controller */ - value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); - writel_relaxed(value | CIRQ_FLUSH, cirq_data->base + CIRQ_CONTROL); + value = readl_relaxed(reg); + writel_relaxed(value | CIRQ_FLUSH, reg); /* disable cirq */ - value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); + value = readl_relaxed(reg); value &= ~(CIRQ_EDGE | CIRQ_EN); - writel_relaxed(value, cirq_data->base + CIRQ_CONTROL); + writel_relaxed(value, reg); } static struct syscore_ops mtk_cirq_syscore_ops = { @@ -240,10 +273,18 @@ static void mtk_cirq_syscore_init(void) static inline void mtk_cirq_syscore_init(void) {} #endif +static const struct of_device_id mtk_cirq_of_match[] = { + { .compatible = "mediatek,mt2701-cirq", .data = &mtk_cirq_regoffs_v1 }, + { .compatible = "mediatek,mt8135-cirq", .data = &mtk_cirq_regoffs_v1 }, + { .compatible = "mediatek,mt8173-cirq", .data = &mtk_cirq_regoffs_v1 }, + { /* sentinel */ } +}; + static int __init mtk_cirq_of_init(struct device_node *node, struct device_node *parent) { struct irq_domain *domain, *domain_parent; + const struct of_device_id *match; unsigned int irq_num; int ret; @@ -274,6 +315,13 @@ static int __init mtk_cirq_of_init(struct device_node *node, if (ret) goto out_unmap; + match = of_match_node(mtk_cirq_of_match, node); + if (!match) { + ret = -ENODEV; + goto out_unmap; + } + cirq_data->offsets = match->data; + irq_num = cirq_data->ext_irq_end - cirq_data->ext_irq_start + 1; domain = irq_domain_add_hierarchy(domain_parent, 0, irq_num, node, From 5c4e0aac0b2a27168844da49cee2c5dff2925d22 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 28 Nov 2022 10:22:17 +0100 Subject: [PATCH 070/134] irqchip/irq-mtk-cirq: Add support for System CIRQ on MT8192 On some SoCs the System CIRQ register layout is slightly different, as there are more registers per function and in some cases other differences later in the layout: this is seen on at least MT8192, but it's also valid for some other "contemporary" SoCs both for Chromebooks and for smartphones. Add the new "v2" register layout and use it if the compatible "mediatek,mt8192-cirq" is found. Signed-off-by: AngeloGioacchino Del Regno Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221128092217.36552-5-angelogioacchino.delregno@collabora.com --- drivers/irqchip/irq-mtk-cirq.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c index 4776ed6492fb..76bc0283e3b9 100644 --- a/drivers/irqchip/irq-mtk-cirq.c +++ b/drivers/irqchip/irq-mtk-cirq.c @@ -39,6 +39,18 @@ static const u32 mtk_cirq_regoffs_v1[] = { [CIRQ_CONTROL] = 0x300, }; +static const u32 mtk_cirq_regoffs_v2[] = { + [CIRQ_STA] = 0x0, + [CIRQ_ACK] = 0x80, + [CIRQ_MASK_SET] = 0x180, + [CIRQ_MASK_CLR] = 0x200, + [CIRQ_SENS_SET] = 0x300, + [CIRQ_SENS_CLR] = 0x380, + [CIRQ_POL_SET] = 0x480, + [CIRQ_POL_CLR] = 0x500, + [CIRQ_CONTROL] = 0x600, +}; + #define CIRQ_EN 0x1 #define CIRQ_EDGE 0x2 #define CIRQ_FLUSH 0x4 @@ -277,6 +289,7 @@ static const struct of_device_id mtk_cirq_of_match[] = { { .compatible = "mediatek,mt2701-cirq", .data = &mtk_cirq_regoffs_v1 }, { .compatible = "mediatek,mt8135-cirq", .data = &mtk_cirq_regoffs_v1 }, { .compatible = "mediatek,mt8173-cirq", .data = &mtk_cirq_regoffs_v1 }, + { .compatible = "mediatek,mt8192-cirq", .data = &mtk_cirq_regoffs_v2 }, { /* sentinel */ } }; From 3d12938dbc048ecb193fec69898d95f6b4813a4b Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Thu, 20 Oct 2022 22:25:14 +0800 Subject: [PATCH 071/134] irqchip/loongarch: Adjust acpi_cascade_irqdomain_init() and sub-routines 1, Adjust the return of acpi_cascade_irqdomain_init() and check its return value. 2, Combine unnecessary short lines to one long line. Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221020142514.1725514-1-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongarch-cpu.c | 30 +++++++++++++++----------- drivers/irqchip/irq-loongson-eiointc.c | 30 +++++++++++++++----------- drivers/irqchip/irq-loongson-pch-pic.c | 15 +++++++------ 3 files changed, 44 insertions(+), 31 deletions(-) diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 741612ba6a52..fdec3e9cfacf 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -92,18 +92,16 @@ static const struct irq_domain_ops loongarch_cpu_intc_irq_domain_ops = { .xlate = irq_domain_xlate_onecell, }; -static int __init -liointc_parse_madt(union acpi_subtable_headers *header, - const unsigned long end) +static int __init liointc_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) { struct acpi_madt_lio_pic *liointc_entry = (struct acpi_madt_lio_pic *)header; return liointc_acpi_init(irq_domain, liointc_entry); } -static int __init -eiointc_parse_madt(union acpi_subtable_headers *header, - const unsigned long end) +static int __init eiointc_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) { struct acpi_madt_eio_pic *eiointc_entry = (struct acpi_madt_eio_pic *)header; @@ -112,16 +110,24 @@ eiointc_parse_madt(union acpi_subtable_headers *header, static int __init acpi_cascade_irqdomain_init(void) { - acpi_table_parse_madt(ACPI_MADT_TYPE_LIO_PIC, - liointc_parse_madt, 0); - acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC, - eiointc_parse_madt, 0); + int r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_LIO_PIC, liointc_parse_madt, 0); + if (r < 0) + return r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_EIO_PIC, eiointc_parse_madt, 0); + if (r < 0) + return r; + return 0; } static int __init cpuintc_acpi_init(union acpi_subtable_headers *header, const unsigned long end) { + int ret; + if (irq_domain) return 0; @@ -139,9 +145,9 @@ static int __init cpuintc_acpi_init(union acpi_subtable_headers *header, set_handle_irq(&handle_cpu_irq); acpi_set_irq_model(ACPI_IRQ_MODEL_LPIC, lpic_get_gsi_domain_id); acpi_set_gsi_to_irq_fallback(lpic_gsi_to_irq); - acpi_cascade_irqdomain_init(); + ret = acpi_cascade_irqdomain_init(); - return 0; + return ret; } IRQCHIP_ACPI_DECLARE(cpuintc_v1, ACPI_MADT_TYPE_CORE_PIC, diff --git a/drivers/irqchip/irq-loongson-eiointc.c b/drivers/irqchip/irq-loongson-eiointc.c index 5cf4b800e499..d15fd38c1756 100644 --- a/drivers/irqchip/irq-loongson-eiointc.c +++ b/drivers/irqchip/irq-loongson-eiointc.c @@ -333,9 +333,8 @@ static struct syscore_ops eiointc_syscore_ops = { .resume = eiointc_resume, }; -static int __init -pch_pic_parse_madt(union acpi_subtable_headers *header, - const unsigned long end) +static int __init pch_pic_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) { struct acpi_madt_bio_pic *pchpic_entry = (struct acpi_madt_bio_pic *)header; unsigned int node = (pchpic_entry->address >> 44) & 0xf; @@ -347,9 +346,8 @@ pch_pic_parse_madt(union acpi_subtable_headers *header, return -EINVAL; } -static int __init -pch_msi_parse_madt(union acpi_subtable_headers *header, - const unsigned long end) +static int __init pch_msi_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) { struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header; struct irq_domain *parent = acpi_get_vec_parent(eiointc_priv[nr_pics - 1]->node, msi_group); @@ -362,17 +360,23 @@ pch_msi_parse_madt(union acpi_subtable_headers *header, static int __init acpi_cascade_irqdomain_init(void) { - acpi_table_parse_madt(ACPI_MADT_TYPE_BIO_PIC, - pch_pic_parse_madt, 0); - acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, - pch_msi_parse_madt, 1); + int r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_BIO_PIC, pch_pic_parse_madt, 0); + if (r < 0) + return r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_MSI_PIC, pch_msi_parse_madt, 1); + if (r < 0) + return r; + return 0; } int __init eiointc_acpi_init(struct irq_domain *parent, struct acpi_madt_eio_pic *acpi_eiointc) { - int i, parent_irq; + int i, ret, parent_irq; unsigned long node_map; struct eiointc_priv *priv; @@ -419,9 +423,9 @@ int __init eiointc_acpi_init(struct irq_domain *parent, acpi_set_vec_parent(acpi_eiointc->node, priv->eiointc_domain, pch_group); acpi_set_vec_parent(acpi_eiointc->node, priv->eiointc_domain, msi_group); - acpi_cascade_irqdomain_init(); + ret = acpi_cascade_irqdomain_init(); - return 0; + return ret; out_free_handle: irq_domain_free_fwnode(priv->domain_handle); diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c index 1fd015e77ecf..437f1af693d0 100644 --- a/drivers/irqchip/irq-loongson-pch-pic.c +++ b/drivers/irqchip/irq-loongson-pch-pic.c @@ -378,9 +378,8 @@ int find_pch_pic(u32 gsi) return -1; } -static int __init -pch_lpc_parse_madt(union acpi_subtable_headers *header, - const unsigned long end) +static int __init pch_lpc_parse_madt(union acpi_subtable_headers *header, + const unsigned long end) { struct acpi_madt_lpc_pic *pchlpc_entry = (struct acpi_madt_lpc_pic *)header; @@ -389,8 +388,12 @@ pch_lpc_parse_madt(union acpi_subtable_headers *header, static int __init acpi_cascade_irqdomain_init(void) { - acpi_table_parse_madt(ACPI_MADT_TYPE_LPC_PIC, - pch_lpc_parse_madt, 0); + int r; + + r = acpi_table_parse_madt(ACPI_MADT_TYPE_LPC_PIC, pch_lpc_parse_madt, 0); + if (r < 0) + return r; + return 0; } @@ -417,7 +420,7 @@ int __init pch_pic_acpi_init(struct irq_domain *parent, } if (acpi_pchpic->id == 0) - acpi_cascade_irqdomain_init(); + ret = acpi_cascade_irqdomain_init(); return ret; } From 5e279739d7312b8958ec816fa38dba2725638503 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Tue, 1 Nov 2022 22:13:51 +0100 Subject: [PATCH 072/134] irqchip/gic: Use kstrtobool() instead of strtobool() strtobool() is the same as kstrtobool(). However, the latter is more used within the kernel. In order to remove strtobool() and slightly simplify kstrtox.h, switch to the other function name. While at it, include the corresponding header file () Signed-off-by: Christophe JAILLET Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/755c4083122071bb27aa8ed5d98156a07bb63a39.1667336095.git.christophe.jaillet@wanadoo.fr --- drivers/irqchip/irq-gic-v3.c | 3 ++- drivers/irqchip/irq-gic.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 34d58567b78d..997104d4338e 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -1171,7 +1172,7 @@ static bool gicv3_nolpi; static int __init gicv3_nolpi_cfg(char *buf) { - return strtobool(buf, &gicv3_nolpi); + return kstrtobool(buf, &gicv3_nolpi); } early_param("irqchip.gicv3_nolpi", gicv3_nolpi_cfg); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 4c7bae0ec8f9..799f86d84b43 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -19,6 +19,7 @@ */ #include #include +#include #include #include #include @@ -1332,7 +1333,7 @@ static bool gicv2_force_probe; static int __init gicv2_force_probe_cfg(char *buf) { - return strtobool(buf, &gicv2_force_probe); + return kstrtobool(buf, &gicv2_force_probe); } early_param("irqchip.gicv2_force_probe", gicv2_force_probe_cfg); From 9049e1ca41983ab773d7ea244bee86d7835ec9f5 Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Mon, 28 Nov 2022 23:16:12 +0800 Subject: [PATCH 073/134] genirq/irqdesc: Don't try to remove non-existing sysfs files Fault injection tests trigger warnings like this: kernfs: can not remove 'chip_name', no directory WARNING: CPU: 0 PID: 253 at fs/kernfs/dir.c:1616 kernfs_remove_by_name_ns+0xce/0xe0 RIP: 0010:kernfs_remove_by_name_ns+0xce/0xe0 Call Trace: remove_files.isra.1+0x3f/0xb0 sysfs_remove_group+0x68/0xe0 sysfs_remove_groups+0x41/0x70 __kobject_del+0x45/0xc0 kobject_del+0x29/0x40 free_desc+0x42/0x70 irq_free_descs+0x5e/0x90 The reason is that the interrupt descriptor sysfs handling does not roll back on a failing kobject_add() during allocation. If the descriptor is freed later on, kobject_del() is invoked with a not added kobject resulting in the above warnings. A proper rollback in case of a kobject_add() failure would be the straight forward solution. But this is not possible due to the way how interrupt descriptor sysfs handling works. Interrupt descriptors are allocated before sysfs becomes available. So the sysfs files for the early allocated descriptors are added later in the boot process. At this point there can be nothing useful done about a failing kobject_add(). For consistency the interrupt descriptor allocation always treats kobject_add() failures as non-critical and just emits a warning. To solve this problem, keep track in the interrupt descriptor whether kobject_add() was successful or not and make the invocation of kobject_del() conditional on that. [ tglx: Massage changelog, comments and use a state bit. ] Fixes: ecb3f394c5db ("genirq: Expose interrupt information through sysfs") Signed-off-by: Yang Yingliang Signed-off-by: Thomas Gleixner Reviewed-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/20221128151612.1786122-1-yangyingliang@huawei.com --- kernel/irq/internals.h | 2 ++ kernel/irq/irqdesc.c | 15 +++++++++------ 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index f09c60393e55..5fdc0b557579 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -52,6 +52,7 @@ enum { * IRQS_PENDING - irq is pending and replayed later * IRQS_SUSPENDED - irq is suspended * IRQS_NMI - irq line is used to deliver NMIs + * IRQS_SYSFS - descriptor has been added to sysfs */ enum { IRQS_AUTODETECT = 0x00000001, @@ -64,6 +65,7 @@ enum { IRQS_SUSPENDED = 0x00000800, IRQS_TIMINGS = 0x00001000, IRQS_NMI = 0x00002000, + IRQS_SYSFS = 0x00004000, }; #include "debug.h" diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index a91f9001103c..fd0996274401 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -288,22 +288,25 @@ static void irq_sysfs_add(int irq, struct irq_desc *desc) if (irq_kobj_base) { /* * Continue even in case of failure as this is nothing - * crucial. + * crucial and failures in the late irq_sysfs_init() + * cannot be rolled back. */ if (kobject_add(&desc->kobj, irq_kobj_base, "%d", irq)) pr_warn("Failed to add kobject for irq %d\n", irq); + else + desc->istate |= IRQS_SYSFS; } } static void irq_sysfs_del(struct irq_desc *desc) { /* - * If irq_sysfs_init() has not yet been invoked (early boot), then - * irq_kobj_base is NULL and the descriptor was never added. - * kobject_del() complains about a object with no parent, so make - * it conditional. + * Only invoke kobject_del() when kobject_add() was successfully + * invoked for the descriptor. This covers both early boot, where + * sysfs is not initialized yet, and the case of a failed + * kobject_add() invocation. */ - if (irq_kobj_base) + if (desc->istate & IRQS_SYSFS) kobject_del(&desc->kobj); } From 3ae977d0e4e3a2a2ccc912ca2d20c9430508ecdd Mon Sep 17 00:00:00 2001 From: Sean Anderson Date: Thu, 1 Dec 2022 16:28:07 -0500 Subject: [PATCH 074/134] irqchip/ls-extirq: Fix endianness detection parent is the interrupt parent, not the parent of node. Use node->parent. This fixes endianness detection on big-endian platforms. Fixes: 1b00adce8afd ("irqchip/ls-extirq: Fix invalid wait context by avoiding to use regmap") Signed-off-by: Sean Anderson Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221201212807.616191-1-sean.anderson@seco.com --- drivers/irqchip/irq-ls-extirq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index d8d48b1f7c29..139f26b0a6ef 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c @@ -203,7 +203,7 @@ ls_extirq_of_init(struct device_node *node, struct device_node *parent) if (ret) goto err_parse_map; - priv->big_endian = of_device_is_big_endian(parent); + priv->big_endian = of_device_is_big_endian(node->parent); priv->is_ls1021a_or_ls1043a = of_device_is_compatible(node, "fsl,ls1021a-extirq") || of_device_is_compatible(node, "fsl,ls1043a-extirq"); raw_spin_lock_init(&priv->lock); From 065abd13a63f40318162eeca6c0215fc5cbb9b0a Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Mon, 5 Dec 2022 12:47:08 +0800 Subject: [PATCH 075/134] irqchip/loongarch-cpu: Fix a missing prototype warning 1, Rename loongarch_cpu_irq_of_init() to cpuintc_of_init() in order to keep the same style as the ACPI version. 2, Fix a missing prototype warning by adding a "static" modifier. Fixes: 855d4ca4bdb366aab3d4 ("irqchip: loongarch-cpu: add DT support") Reported-by: kernel test robot Cc: Peibao Liu Signed-off-by: Huacai Chen Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221205044708.2054022-1-chenhuacai@loongson.cn --- drivers/irqchip/irq-loongarch-cpu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index a28b7c549654..738d69c4d4a3 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -93,7 +93,7 @@ static const struct irq_domain_ops loongarch_cpu_intc_irq_domain_ops = { }; #ifdef CONFIG_OF -int __init loongarch_cpu_irq_of_init(struct device_node *of_node, +static int __init cpuintc_of_init(struct device_node *of_node, struct device_node *parent) { cpuintc_handle = of_node_to_fwnode(of_node); @@ -107,8 +107,7 @@ int __init loongarch_cpu_irq_of_init(struct device_node *of_node, return 0; } -IRQCHIP_DECLARE(cpu_intc, "loongson,cpu-interrupt-controller", - loongarch_cpu_irq_of_init); +IRQCHIP_DECLARE(cpu_intc, "loongson,cpu-interrupt-controller", cpuintc_of_init); #endif static int __init From 6842694c5088925d216529d4a2358ab5ee8bb862 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Sat, 3 Dec 2022 17:05:11 +0700 Subject: [PATCH 076/134] PCI/MSI: Use bullet lists in kernel-doc comments of api.c Use bullet-list RST syntax for kernel-doc parameters' flags and interrupt mode descriptions. Otherwise Sphinx produces "Unexpected identation" errors and warnings. Fixes: 5c0997dc33ac24 ("PCI/MSI: Move pci_alloc_irq_vectors() to api.c") Fixes: 017239c8db2093 ("PCI/MSI: Move pci_irq_vector() to api.c") Fixes: be37b8428b7b77 ("PCI/MSI: Move pci_irq_get_affinity() to api.c") Reported-by: Stephen Rothwell Suggested-by: Ahmed S. Darwish Signed-off-by: Bagas Sanjaya Signed-off-by: Thomas Gleixner Acked-by: Ahmed S. Darwish Link: https://lore.kernel.org/r/20221203100511.222136-1-bagasdotme@gmail.com --- drivers/pci/msi/api.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 6c3ad4863850..2d46a0cfd541 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -142,12 +142,15 @@ EXPORT_SYMBOL(pci_disable_msix); * @min_vecs: minimum required number of vectors (must be >= 1) * @max_vecs: maximum desired number of vectors * @flags: One or more of: - * %PCI_IRQ_MSIX Allow trying MSI-X vector allocations - * %PCI_IRQ_MSI Allow trying MSI vector allocations - * %PCI_IRQ_LEGACY Allow trying legacy INTx interrupts, if - * and only if @min_vecs == 1 - * %PCI_IRQ_AFFINITY Auto-manage IRQs affinity by spreading - * the vectors around available CPUs + * + * * %PCI_IRQ_MSIX Allow trying MSI-X vector allocations + * * %PCI_IRQ_MSI Allow trying MSI vector allocations + * + * * %PCI_IRQ_LEGACY Allow trying legacy INTx interrupts, if + * and only if @min_vecs == 1 + * + * * %PCI_IRQ_AFFINITY Auto-manage IRQs affinity by spreading + * the vectors around available CPUs * * Allocate up to @max_vecs interrupt vectors on device. MSI-X irq * vector allocation has a higher precedence over plain MSI, which has a @@ -232,10 +235,11 @@ EXPORT_SYMBOL(pci_alloc_irq_vectors_affinity); * pci_irq_vector() - Get Linux IRQ number of a device interrupt vector * @dev: the PCI device to operate on * @nr: device-relative interrupt vector index (0-based); has different - * meanings, depending on interrupt mode - * MSI-X the index in the MSI-X vector table - * MSI the index of the enabled MSI vectors - * INTx must be 0 + * meanings, depending on interrupt mode: + * + * * MSI-X the index in the MSI-X vector table + * * MSI the index of the enabled MSI vectors + * * INTx must be 0 * * Return: the Linux IRQ number, or -EINVAL if @nr is out of range */ @@ -255,10 +259,11 @@ EXPORT_SYMBOL(pci_irq_vector); * pci_irq_get_affinity() - Get a device interrupt vector affinity * @dev: the PCI device to operate on * @nr: device-relative interrupt vector index (0-based); has different - * meanings, depending on interrupt mode - * MSI-X the index in the MSI-X vector table - * MSI the index of the enabled MSI vectors - * INTx must be 0 + * meanings, depending on interrupt mode: + * + * * MSI-X the index in the MSI-X vector table + * * MSI the index of the enabled MSI vectors + * * INTx must be 0 * * Return: MSI/MSI-X vector affinity, NULL if @nr is out of range or if * the MSI(-X) vector was allocated without explicit affinity From 3dad5f9ad99b77dfd234d31e2ea3d77f620efc09 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:08 +0100 Subject: [PATCH 077/134] genirq/msi: Move IRQ_DOMAIN_MSI_NOMASK_QUIRK to MSI flags It's truly a MSI only flag and for the upcoming per device MSI domains this must be in the MSI flags so it can be set during domain setup without exposing this quirk outside of x86. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.454246167@linutronix.de --- arch/x86/kernel/apic/msi.c | 5 ++--- include/linux/irqdomain.h | 9 +-------- include/linux/msi.h | 6 ++++++ kernel/irq/msi.c | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 248a6a5c0ad8..71c87513327e 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -176,7 +176,8 @@ static struct msi_domain_ops pci_msi_domain_ops = { static struct msi_domain_info pci_msi_domain_info = { .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX, + MSI_FLAG_PCI_MSIX | MSI_FLAG_NOMASK_QUIRK, + .ops = &pci_msi_domain_ops, .chip = &pci_msi_controller, .handler = handle_edge_irq, @@ -200,8 +201,6 @@ struct irq_domain * __init native_create_pci_msi_domain(void) if (!d) { irq_domain_free_fwnode(fn); pr_warn("Failed to initialize PCI-MSI irqdomain.\n"); - } else { - d->flags |= IRQ_DOMAIN_MSI_NOMASK_QUIRK; } return d; } diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a4af7f81661b..c42c36947f95 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -186,15 +186,8 @@ enum { /* Irq domain implements MSI remapping */ IRQ_DOMAIN_FLAG_MSI_REMAP = (1 << 5), - /* - * Quirk to handle MSI implementations which do not provide - * masking. Currently known to affect x86, but partially - * handled in core code. - */ - IRQ_DOMAIN_MSI_NOMASK_QUIRK = (1 << 6), - /* Irq domain doesn't translate anything */ - IRQ_DOMAIN_FLAG_NO_MAP = (1 << 7), + IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved diff --git a/include/linux/msi.h b/include/linux/msi.h index 1ce9d5e0bfa3..2d87e000bd45 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -399,6 +399,12 @@ enum { MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 9), /* Free MSI descriptors */ MSI_FLAG_FREE_MSI_DESCS = (1 << 10), + /* + * Quirk to handle MSI implementations which do not provide + * masking. Currently known to affect x86, but has to be partially + * handled in the core MSI code. + */ + MSI_FLAG_NOMASK_QUIRK = (1 << 11), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 4b99f37355d5..c37c0be4d646 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -875,7 +875,7 @@ static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev * MSI affinity setting requires a special quirk (X86) when * reservation mode is active. */ - if (domain->flags & IRQ_DOMAIN_MSI_NOMASK_QUIRK) + if (info->flags & MSI_FLAG_NOMASK_QUIRK) vflags |= VIRQ_NOMASK_QUIRK; } From f876ea3b7969329d6472ae4126496bb03c89d89a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:10 +0100 Subject: [PATCH 078/134] genirq/irqdomain: Make struct irqdomain readable Tabular alignment of both kernel-doc and the actual struct declaration make visual parsing way more conveniant. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.514944367@linutronix.de --- include/linux/irqdomain.h | 74 +++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index c42c36947f95..b1fdd8d309cf 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -117,53 +117,53 @@ struct irq_domain_chip_generic; /** * struct irq_domain - Hardware interrupt number translation object - * @link: Element in global irq_domain list. - * @name: Name of interrupt domain - * @ops: pointer to irq_domain methods - * @host_data: private data pointer for use by owner. Not touched by irq_domain - * core code. - * @flags: host per irq_domain flags - * @mapcount: The number of mapped interrupts + * @link: Element in global irq_domain list. + * @name: Name of interrupt domain + * @ops: Pointer to irq_domain methods + * @host_data: Private data pointer for use by owner. Not touched by irq_domain + * core code. + * @flags: Per irq_domain flags + * @mapcount: The number of mapped interrupts * - * Optional elements - * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy - * to swap it for the of_node via the irq_domain_get_of_node accessor - * @gc: Pointer to a list of generic chips. There is a helper function for - * setting up one or more generic chips for interrupt controllers - * drivers using the generic chip library which uses this pointer. - * @dev: Pointer to a device that the domain represent, and that will be - * used for power management purposes. - * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * Optional elements: + * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy + * to swap it for the of_node via the irq_domain_get_of_node accessor + * @gc: Pointer to a list of generic chips. There is a helper function for + * setting up one or more generic chips for interrupt controllers + * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to a device that can be utilized for power management + * purposes related to the irq domain. + * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * - * Revmap data, used internally by irq_domain - * @revmap_size: Size of the linear map table @revmap[] - * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map - * @revmap_mutex: Lock for the revmap - * @revmap: Linear table of irq_data pointers + * Revmap data, used internally by the irq domain code: + * @revmap_size: Size of the linear map table @revmap[] + * @revmap_tree: Radix map tree for hwirqs that don't fit in the linear map + * @revmap_mutex: Lock for the revmap + * @revmap: Linear table of irq_data pointers */ struct irq_domain { - struct list_head link; - const char *name; - const struct irq_domain_ops *ops; - void *host_data; - unsigned int flags; - unsigned int mapcount; + struct list_head link; + const char *name; + const struct irq_domain_ops *ops; + void *host_data; + unsigned int flags; + unsigned int mapcount; /* Optional data */ - struct fwnode_handle *fwnode; - enum irq_domain_bus_token bus_token; - struct irq_domain_chip_generic *gc; - struct device *dev; + struct fwnode_handle *fwnode; + enum irq_domain_bus_token bus_token; + struct irq_domain_chip_generic *gc; + struct device *dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - struct irq_domain *parent; + struct irq_domain *parent; #endif /* reverse map data. The linear map gets appended to the irq_domain */ - irq_hw_number_t hwirq_max; - unsigned int revmap_size; - struct radix_tree_root revmap_tree; - struct mutex revmap_mutex; - struct irq_data __rcu *revmap[]; + irq_hw_number_t hwirq_max; + unsigned int revmap_size; + struct radix_tree_root revmap_tree; + struct mutex revmap_mutex; + struct irq_data __rcu *revmap[]; }; /* Irq domain flags */ From 6a9fc4190ca23fcf327de666e1a98dbdcdd2d210 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:12 +0100 Subject: [PATCH 079/134] genirq/irqdomain: Rename irq_domain::dev to irq_domain:: Pm_dev irq_domain::dev is a misnomer as it's usually the rule that a device pointer points to something which is directly related to the instance. irq_domain::dev can point to some other device for power management to ensure that this underlying device is not powered down when an interrupt is allocated. The upcoming per device MSI domains really require a pointer to the device which instantiated the irq domain and not to some random other device which is required for power management down the chain. Rename irq_domain::dev to irq_domain::pm_dev and fixup the few sites which use that pointer. Conversion was done with the help of coccinelle. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.574541683@linutronix.de --- drivers/irqchip/irq-gic.c | 4 ++-- include/linux/irqdomain.h | 6 +++--- kernel/irq/chip.c | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 4c7bae0ec8f9..08834e5ac19c 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -401,8 +401,8 @@ static void gic_irq_print_chip(struct irq_data *d, struct seq_file *p) { struct gic_chip_data *gic = irq_data_get_irq_chip_data(d); - if (gic->domain->dev) - seq_printf(p, gic->domain->dev->of_node->name); + if (gic->domain->pm_dev) + seq_printf(p, gic->domain->pm_dev->of_node->name); else seq_printf(p, "GIC-%d", (int)(gic - &gic_data[0])); } diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index b1fdd8d309cf..aa76da82fa36 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -131,7 +131,7 @@ struct irq_domain_chip_generic; * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. - * @dev: Pointer to a device that can be utilized for power management + * @pm_dev: Pointer to a device that can be utilized for power management * purposes related to the irq domain. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains * @@ -153,7 +153,7 @@ struct irq_domain { struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; - struct device *dev; + struct device *pm_dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif @@ -206,7 +206,7 @@ static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev) { if (d) - d->dev = dev; + d->pm_dev = dev; } #ifdef CONFIG_IRQ_DOMAIN diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 8ac37e8e738a..49e7bc871fec 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1561,10 +1561,10 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) return 0; } -static struct device *irq_get_parent_device(struct irq_data *data) +static struct device *irq_get_pm_device(struct irq_data *data) { if (data->domain) - return data->domain->dev; + return data->domain->pm_dev; return NULL; } @@ -1578,7 +1578,7 @@ static struct device *irq_get_parent_device(struct irq_data *data) */ int irq_chip_pm_get(struct irq_data *data) { - struct device *dev = irq_get_parent_device(data); + struct device *dev = irq_get_pm_device(data); int retval = 0; if (IS_ENABLED(CONFIG_PM) && dev) @@ -1597,7 +1597,7 @@ int irq_chip_pm_get(struct irq_data *data) */ int irq_chip_pm_put(struct irq_data *data) { - struct device *dev = irq_get_parent_device(data); + struct device *dev = irq_get_pm_device(data); int retval = 0; if (IS_ENABLED(CONFIG_PM) && dev) From 6b6941f6653ec8017e3822b55bb9eae245f0ed99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:14 +0100 Subject: [PATCH 080/134] genirq/msi: Create msi_api.h Create a API header for MSI specific functions which are relevant to device drivers. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.632679220@linutronix.de --- include/linux/msi.h | 6 ++++-- include/linux/msi_api.h | 15 +++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) create mode 100644 include/linux/msi_api.h diff --git a/include/linux/msi.h b/include/linux/msi.h index 2d87e000bd45..9f72494dcc3e 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -13,11 +13,14 @@ * * Regular device drivers have no business with any of these functions and * especially storing MSI descriptor pointers in random code is considered - * abuse. The only function which is relevant for drivers is msi_get_virq(). + * abuse. + * + * Device driver relevant functions are available in */ #include #include +#include #include #include #include @@ -188,7 +191,6 @@ struct msi_device_data { int msi_setup_device_data(struct device *dev); -unsigned int msi_get_virq(struct device *dev, unsigned int index); void msi_lock_descs(struct device *dev); void msi_unlock_descs(struct device *dev); diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h new file mode 100644 index 000000000000..57d27cfcde2d --- /dev/null +++ b/include/linux/msi_api.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef LINUX_MSI_API_H +#define LINUX_MSI_API_H + +/* + * APIs which are relevant for device driver code for allocating and + * freeing MSI interrupts and querying the associations between + * hardware/software MSI indices and the Linux interrupt number. + */ + +struct device; + +unsigned int msi_get_virq(struct device *dev, unsigned int index); + +#endif From b749e6d31c88cff2202b968aaba246e5d7379038 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:15 +0100 Subject: [PATCH 081/134] genirq/irqdomain: Provide IRQ_DOMAIN_FLAG_MSI_PARENT The new PCI/IMS (Interrupt Message Store) functionality is allowing hardware vendors to provide implementation specific storage for the MSI messages. This can be device memory and also host/guest memory, e.g. in queue memory which is shared with the hardware. This requires device specific MSI interrupt domains, which cannot be achieved by expanding the existing PCI/MSI interrupt domain concept which is a global interrupt domain shared by all PCI devices on a particular (IOMMU) segment: |--- device 1 [Vector]---[Remapping]---[PCI/MSI]--|... |--- device N This works because the PCI/MSI[-X] space is uniform, but falls apart with PCI/IMS which is implementation defined and must be available along with PCI/MSI[-X] on the same device. To support PCI/MSI[-X] plus PCI/IMS on the same device it is required to rework the PCI/MSI interrupt domain hierarchy concept in the following way: |--- [PCI/MSI] device 1 [Vector]---[Remapping]---|... |--- [PCI/MSI] device N That allows in the next step to create multiple interrupt domains per device: |--- [PCI/MSI] device 1 |--- [PCI/IMS] device 1 [Vector]---[Remapping]---|... |--- [PCI/MSI] device N |--- [PCI/IMS] device N So the domain which previously created the global PCI/MSI domain must now act as parent domain for the per device domains. The hierarchy depth is the same as before, but the PCI/MSI domains are then device specific and not longer global. Provide IRQ_DOMAIN_FLAG_MSI_PARENT, which allows to identify these parent domains, along with helpers to query it. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.690038274@linutronix.de --- include/linux/irqdomain.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index aa76da82fa36..f837db9a5339 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -189,6 +189,9 @@ enum { /* Irq domain doesn't translate anything */ IRQ_DOMAIN_FLAG_NO_MAP = (1 << 6), + /* Irq domain is a MSI parent domain */ + IRQ_DOMAIN_FLAG_MSI_PARENT = (1 << 8), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -551,6 +554,11 @@ static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); +static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; +} + #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) @@ -596,6 +604,12 @@ irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) { return false; } + +static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) +{ + return false; +} + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ From e71c5d0bb1885b73c746b634895b85d135ce2c87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:17 +0100 Subject: [PATCH 082/134] genirq/irqdomain: Provide IRQ_DOMAIN_FLAG_MSI_DEVICE Similar to marking parent MSI domains it's required to identify per device domains. Add flag and helpers. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.747627287@linutronix.de --- include/linux/irqdomain.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index f837db9a5339..24b76688c2fb 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -192,6 +192,9 @@ enum { /* Irq domain is a MSI parent domain */ IRQ_DOMAIN_FLAG_MSI_PARENT = (1 << 8), + /* Irq domain is a MSI device domain */ + IRQ_DOMAIN_FLAG_MSI_DEVICE = (1 << 9), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -559,6 +562,11 @@ static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) return domain->flags & IRQ_DOMAIN_FLAG_MSI_PARENT; } +static inline bool irq_domain_is_msi_device(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE; +} + #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) @@ -610,6 +618,11 @@ static inline bool irq_domain_is_msi_parent(struct irq_domain *domain) return false; } +static inline bool irq_domain_is_msi_device(struct irq_domain *domain) +{ + return false; +} + #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #else /* CONFIG_IRQ_DOMAIN */ From 3e86a3a3ed031dd498e614db0fa082a58d700df9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:19 +0100 Subject: [PATCH 083/134] genirq/msi: Check for invalid MSI parent domain usage In the upcoming per device MSI domain concept the MSI parent domains are not allowed to be used as regular MSI domains where the MSI allocation/free operations are applicable. Add appropriate checks. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.806128070@linutronix.de --- kernel/irq/msi.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c37c0be4d646..5939dc613559 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -937,13 +937,21 @@ int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device lockdep_assert_held(&dev->msi.data->mutex); + if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) { + ret = -EINVAL; + goto free; + } + + /* Frees allocated descriptors in case of failure. */ ret = msi_domain_add_simple_msi_descs(info, dev, nvec); if (ret) - return ret; + goto free; ret = ops->domain_alloc_irqs(domain, dev, nvec); - if (ret) - msi_domain_free_irqs_descs_locked(domain, dev); + if (!ret) + return 0; +free: + msi_domain_free_irqs_descs_locked(domain, dev); return ret; } @@ -1013,6 +1021,9 @@ void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device lockdep_assert_held(&dev->msi.data->mutex); + if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) + return; + ops->domain_free_irqs(domain, dev); if (ops->msi_post_free) ops->msi_post_free(domain, dev); From f1139f905bd2d8bec59be0c1404b592279ae0ac9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:20 +0100 Subject: [PATCH 084/134] genirq/msi: Move xarray into a separate struct and create an array The upcoming support for multiple MSI domains per device requires storage for the MSI descriptors and in a second step storage for the irqdomain pointers. Move the xarray into a separate data structure msi_dev_domain and create an array with size 1 in msi_device_data, which can be expanded later when the support for per device domains is implemented. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.864887773@linutronix.de --- include/linux/msi.h | 13 +++++++++++-- include/linux/msi_api.h | 8 ++++++++ kernel/irq/msi.c | 32 ++++++++++++++++++++++---------- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 9f72494dcc3e..f7b9c41a4f54 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -173,19 +173,28 @@ enum msi_desc_filter { MSI_DESC_ASSOCIATED, }; + +/** + * struct msi_dev_domain - The internals of MSI domain info per device + * @store: Xarray for storing MSI descriptor pointers + */ +struct msi_dev_domain { + struct xarray store; +}; + /** * msi_device_data - MSI per device data * @properties: MSI properties which are interesting to drivers * @platform_data: Platform-MSI specific data * @mutex: Mutex protecting the MSI descriptor store - * @__store: Xarray for storing MSI descriptor pointers + * @__domains: Internal data for per device MSI domains * @__iter_idx: Index to search the next entry for iterators */ struct msi_device_data { unsigned long properties; struct platform_msi_priv_data *platform_data; struct mutex mutex; - struct xarray __store; + struct msi_dev_domain __domains[MSI_MAX_DEVICE_IRQDOMAINS]; unsigned long __iter_idx; }; diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h index 57d27cfcde2d..4dbbce68a217 100644 --- a/include/linux/msi_api.h +++ b/include/linux/msi_api.h @@ -10,6 +10,14 @@ struct device; +/* + * Per device interrupt domain related constants. + */ +enum msi_domain_ids { + MSI_DEFAULT_DOMAIN, + MSI_MAX_DEVICE_IRQDOMAINS, +}; + unsigned int msi_get_virq(struct device *dev, unsigned int index); #endif diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 5939dc613559..c2bc94ee5c3e 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -60,10 +60,11 @@ static void msi_free_desc(struct msi_desc *desc) static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, unsigned int index) { + struct xarray *xa = &md->__domains[MSI_DEFAULT_DOMAIN].store; int ret; desc->msi_index = index; - ret = xa_insert(&md->__store, index, desc, GFP_KERNEL); + ret = xa_insert(xa, index, desc, GFP_KERNEL); if (ret) msi_free_desc(desc); return ret; @@ -147,7 +148,7 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index) { - struct xarray *xa = &dev->msi.data->__store; + struct xarray *xa = &dev->msi.data->__domains[MSI_DEFAULT_DOMAIN].store; struct msi_desc *desc; unsigned long idx; @@ -179,9 +180,12 @@ EXPORT_SYMBOL_GPL(get_cached_msi_msg); static void msi_device_data_release(struct device *dev, void *res) { struct msi_device_data *md = res; + int i; - WARN_ON_ONCE(!xa_empty(&md->__store)); - xa_destroy(&md->__store); + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) { + WARN_ON_ONCE(!xa_empty(&md->__domains[i].store)); + xa_destroy(&md->__domains[i].store); + } dev->msi.data = NULL; } @@ -198,7 +202,7 @@ static void msi_device_data_release(struct device *dev, void *res) int msi_setup_device_data(struct device *dev) { struct msi_device_data *md; - int ret; + int ret, i; if (dev->msi.data) return 0; @@ -213,7 +217,9 @@ int msi_setup_device_data(struct device *dev) return ret; } - xa_init(&md->__store); + for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) + xa_init(&md->__domains[i].store); + mutex_init(&md->mutex); dev->msi.data = md; devres_add(dev, md); @@ -236,7 +242,7 @@ EXPORT_SYMBOL_GPL(msi_lock_descs); */ void msi_unlock_descs(struct device *dev) { - /* Invalidate the index wich was cached by the iterator */ + /* Invalidate the index which was cached by the iterator */ dev->msi.data->__iter_idx = MSI_MAX_INDEX; mutex_unlock(&dev->msi.data->mutex); } @@ -244,9 +250,10 @@ EXPORT_SYMBOL_GPL(msi_unlock_descs); static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter) { + struct xarray *xa = &md->__domains[MSI_DEFAULT_DOMAIN].store; struct msi_desc *desc; - xa_for_each_start(&md->__store, md->__iter_idx, desc, md->__iter_idx) { + xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) { if (msi_desc_match(desc, filter)) return desc; } @@ -320,6 +327,7 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index) { struct msi_desc *desc; unsigned int ret = 0; + struct xarray *xa; bool pcimsi; if (!dev->msi.data) @@ -328,7 +336,8 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index) pcimsi = dev_is_pci(dev) ? to_pci_dev(dev)->msi_enabled : false; msi_lock_descs(dev); - desc = xa_load(&dev->msi.data->__store, pcimsi ? 0 : index); + xa = &dev->msi.data->__domains[MSI_DEFAULT_DOMAIN].store; + desc = xa_load(xa, pcimsi ? 0 : index); if (desc && desc->irq) { /* * PCI-MSI has only one descriptor for multiple interrupts. @@ -707,6 +716,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; struct msi_desc *desc; + struct xarray *xa; int ret, virq; msi_lock_descs(dev); @@ -714,8 +724,10 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, if (ret) goto unlock; + xa = &dev->msi.data->__domains[MSI_DEFAULT_DOMAIN].store; + for (virq = virq_base; virq < virq_base + nvec; virq++) { - desc = xa_load(&dev->msi.data->__store, virq); + desc = xa_load(xa, virq); desc->irq = virq; ops->set_desc(arg, desc); From 64258eaa442b0b7d7eac8942cf27863ad9e6028e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:22 +0100 Subject: [PATCH 085/134] genirq/msi: Add pointers for per device irq domains With the upcoming per device MSI interrupt domain support it is necessary to store the domain pointers per device. Instead of delegating that storage to device drivers or subsystems add a domain pointer to the msi_dev_domain array in struct msi_device_data. This pointer is also used to take care of tearing down the irq domains when msi_device_data is cleaned up via devres. The interfaces into the MSI core will be changed from irqdomain pointer based interfaces to domain id based interfaces to support multiple MSI domains on a single device (e.g. PCI/MSI[-X] and PCI/IMS. Once the per device domain support is complete the irq domain pointer in struct device::msi.domain will not longer contain a pointer to the "global" MSI domain. It will contain a pointer to the MSI parent domain instead. It would be a horrible maze of conditionals to evaluate all over the place which domain pointer should be used, i.e. the "global" one in device::msi::domain or one from the internal pointer array. To avoid this evaluate in msi_setup_device_data() whether the irq domain which is associated to a device is a "global" or a parent MSI domain. If it is global then copy the pointer into the first entry of the msi_dev_domain array. This allows to convert interfaces and implementation to domain ids while keeping everything existing working. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.923860399@linutronix.de --- include/linux/msi.h | 3 +++ kernel/irq/msi.c | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/include/linux/msi.h b/include/linux/msi.h index f7b9c41a4f54..fdbc80d81e26 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -77,6 +77,7 @@ struct msi_desc; struct pci_dev; struct platform_msi_priv_data; struct device_attribute; +struct irq_domain; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ @@ -177,9 +178,11 @@ enum msi_desc_filter { /** * struct msi_dev_domain - The internals of MSI domain info per device * @store: Xarray for storing MSI descriptor pointers + * @irqdomain: Pointer to a per device interrupt domain */ struct msi_dev_domain { struct xarray store; + struct irq_domain *domain; }; /** diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c2bc94ee5c3e..de65acc5cfb0 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -220,6 +220,15 @@ int msi_setup_device_data(struct device *dev) for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) xa_init(&md->__domains[i].store); + /* + * If @dev::msi::domain is set and is a global MSI domain, copy the + * pointer into the domain array so all code can operate on domain + * ids. The NULL pointer check is required to keep the legacy + * architecture specific PCI/MSI support working. + */ + if (dev->msi.domain && !irq_domain_is_msi_parent(dev->msi.domain)) + md->__domains[MSI_DEFAULT_DOMAIN].domain = dev->msi.domain; + mutex_init(&md->mutex); dev->msi.data = md; devres_add(dev, md); From 94ff94cfea2827e287a42781a47f37c9ef82186b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:24 +0100 Subject: [PATCH 086/134] genirq/msi: Make MSI descriptor iterators device domain aware To support multiple MSI interrupt domains per device it is necessary to segment the xarray MSI descriptor storage. Each domain gets up to MSI_MAX_INDEX entries. Change the iterators so they operate with domain ids and take the domain offsets into account. The publicly available iterators which are mostly used in legacy implementations and the PCI/MSI core default to MSI_DEFAULT_DOMAIN (0) which is the id for the existing "global" domains. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230313.985498981@linutronix.de --- include/linux/msi.h | 48 +++++++++++++++++++++++++++++++++++++++------ kernel/irq/msi.c | 35 +++++++++++++++++++++------------ 2 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index fdbc80d81e26..764a4e8828fb 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -206,11 +206,48 @@ int msi_setup_device_data(struct device *dev); void msi_lock_descs(struct device *dev); void msi_unlock_descs(struct device *dev); -struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter); -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); +struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter); /** - * msi_for_each_desc - Iterate the MSI descriptors + * msi_first_desc - Get the first MSI descriptor of the default irqdomain + * @dev: Device to operate on + * @filter: Descriptor state filter + * + * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() + * must be invoked before the call. + * + * Return: Pointer to the first MSI descriptor matching the search + * criteria, NULL if none found. + */ +static inline struct msi_desc *msi_first_desc(struct device *dev, + enum msi_desc_filter filter) +{ + return msi_domain_first_desc(dev, MSI_DEFAULT_DOMAIN, filter); +} + +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter); + +/** + * msi_domain_for_each_desc - Iterate the MSI descriptors in a specific domain + * + * @desc: struct msi_desc pointer used as iterator + * @dev: struct device pointer - device to iterate + * @domid: The id of the interrupt domain which should be walked. + * @filter: Filter for descriptor selection + * + * Notes: + * - The loop must be protected with a msi_lock_descs()/msi_unlock_descs() + * pair. + * - It is safe to remove a retrieved MSI descriptor in the loop. + */ +#define msi_domain_for_each_desc(desc, dev, domid, filter) \ + for ((desc) = msi_domain_first_desc((dev), (domid), (filter)); (desc); \ + (desc) = msi_next_desc((dev), (domid), (filter))) + +/** + * msi_for_each_desc - Iterate the MSI descriptors in the default irqdomain * * @desc: struct msi_desc pointer used as iterator * @dev: struct device pointer - device to iterate @@ -221,9 +258,8 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter); * pair. * - It is safe to remove a retrieved MSI descriptor in the loop. */ -#define msi_for_each_desc(desc, dev, filter) \ - for ((desc) = msi_first_desc((dev), (filter)); (desc); \ - (desc) = msi_next_desc((dev), (filter))) +#define msi_for_each_desc(desc, dev, filter) \ + msi_domain_for_each_desc((desc), (dev), MSI_DEFAULT_DOMAIN, (filter)) #define msi_desc_to_dev(desc) ((desc)->dev) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index de65acc5cfb0..ec08d1f22be0 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -19,8 +19,14 @@ #include "internals.h" +/* Invalid Xarray index which is outside of any searchable range */ +#define MSI_XA_MAX_INDEX (ULONG_MAX - 1) +/* The maximum domain size */ +#define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) + static inline int msi_sysfs_create_group(struct device *dev); + /** * msi_alloc_desc - Allocate an initialized msi_desc * @dev: Pointer to the device for which this is allocated @@ -252,27 +258,29 @@ EXPORT_SYMBOL_GPL(msi_lock_descs); void msi_unlock_descs(struct device *dev) { /* Invalidate the index which was cached by the iterator */ - dev->msi.data->__iter_idx = MSI_MAX_INDEX; + dev->msi.data->__iter_idx = MSI_XA_MAX_INDEX; mutex_unlock(&dev->msi.data->mutex); } EXPORT_SYMBOL_GPL(msi_unlock_descs); -static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_filter filter) +static struct msi_desc *msi_find_desc(struct msi_device_data *md, unsigned int domid, + enum msi_desc_filter filter) { - struct xarray *xa = &md->__domains[MSI_DEFAULT_DOMAIN].store; + struct xarray *xa = &md->__domains[domid].store; struct msi_desc *desc; xa_for_each_start(xa, md->__iter_idx, desc, md->__iter_idx) { if (msi_desc_match(desc, filter)) return desc; } - md->__iter_idx = MSI_MAX_INDEX; + md->__iter_idx = MSI_XA_MAX_INDEX; return NULL; } /** - * msi_first_desc - Get the first MSI descriptor of a device + * msi_domain_first_desc - Get the first MSI descriptor of an irqdomain associated to a device * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. * @filter: Descriptor state filter * * Must be called with the MSI descriptor mutex held, i.e. msi_lock_descs() @@ -281,23 +289,25 @@ static struct msi_desc *msi_find_desc(struct msi_device_data *md, enum msi_desc_ * Return: Pointer to the first MSI descriptor matching the search * criteria, NULL if none found. */ -struct msi_desc *msi_first_desc(struct device *dev, enum msi_desc_filter filter) +struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) { struct msi_device_data *md = dev->msi.data; - if (WARN_ON_ONCE(!md)) + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return NULL; lockdep_assert_held(&md->mutex); md->__iter_idx = 0; - return msi_find_desc(md, filter); + return msi_find_desc(md, domid, filter); } -EXPORT_SYMBOL_GPL(msi_first_desc); +EXPORT_SYMBOL_GPL(msi_domain_first_desc); /** * msi_next_desc - Get the next MSI descriptor of a device * @dev: Device to operate on + * @domid: The id of the interrupt domain which should be walked. * @filter: Descriptor state filter * * The first invocation of msi_next_desc() has to be preceeded by a @@ -308,11 +318,12 @@ EXPORT_SYMBOL_GPL(msi_first_desc); * Return: Pointer to the next MSI descriptor matching the search * criteria, NULL if none found. */ -struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter) +struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, + enum msi_desc_filter filter) { struct msi_device_data *md = dev->msi.data; - if (WARN_ON_ONCE(!md)) + if (WARN_ON_ONCE(!md || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) return NULL; lockdep_assert_held(&md->mutex); @@ -321,7 +332,7 @@ struct msi_desc *msi_next_desc(struct device *dev, enum msi_desc_filter filter) return NULL; md->__iter_idx++; - return msi_find_desc(md, filter); + return msi_find_desc(md, domid, filter); } EXPORT_SYMBOL_GPL(msi_next_desc); From 98043704f375f63a47efeff123ab92fcf34b95e6 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 25 Nov 2022 00:24:25 +0100 Subject: [PATCH 087/134] genirq/msi: Make msi_get_virq() device domain aware In preparation of the upcoming per device multi MSI domain support, change the interface to support lookups based on domain id and zero based index within the domain. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.044613697@linutronix.de --- include/linux/msi_api.h | 14 +++++++++++++- kernel/irq/msi.c | 19 +++++++++++++------ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h index 4dbbce68a217..8640171288a8 100644 --- a/include/linux/msi_api.h +++ b/include/linux/msi_api.h @@ -18,6 +18,18 @@ enum msi_domain_ids { MSI_MAX_DEVICE_IRQDOMAINS, }; -unsigned int msi_get_virq(struct device *dev, unsigned int index); +unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index); + +/** + * msi_get_virq - Lookup the Linux interrupt number for a MSI index on the default interrupt domain + * @dev: Device for which the lookup happens + * @index: The MSI index to lookup + * + * Return: The Linux interrupt number on success (> 0), 0 if not found + */ +static inline unsigned int msi_get_virq(struct device *dev, unsigned int index) +{ + return msi_domain_get_virq(dev, MSI_DEFAULT_DOMAIN, index); +} #endif diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ec08d1f22be0..e1593c16518b 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -337,26 +337,32 @@ struct msi_desc *msi_next_desc(struct device *dev, unsigned int domid, EXPORT_SYMBOL_GPL(msi_next_desc); /** - * msi_get_virq - Return Linux interrupt number of a MSI interrupt + * msi_domain_get_virq - Lookup the Linux interrupt number for a MSI index on a interrupt domain * @dev: Device to operate on + * @domid: Domain ID of the interrupt domain associated to the device * @index: MSI interrupt index to look for (0-based) * * Return: The Linux interrupt number on success (> 0), 0 if not found */ -unsigned int msi_get_virq(struct device *dev, unsigned int index) +unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index) { struct msi_desc *desc; unsigned int ret = 0; + bool pcimsi = false; struct xarray *xa; - bool pcimsi; if (!dev->msi.data) return 0; - pcimsi = dev_is_pci(dev) ? to_pci_dev(dev)->msi_enabled : false; + if (WARN_ON_ONCE(index > MSI_MAX_INDEX || domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return 0; + + /* This check is only valid for the PCI default MSI domain */ + if (dev_is_pci(dev) && domid == MSI_DEFAULT_DOMAIN) + pcimsi = to_pci_dev(dev)->msi_enabled; msi_lock_descs(dev); - xa = &dev->msi.data->__domains[MSI_DEFAULT_DOMAIN].store; + xa = &dev->msi.data->__domains[domid].store; desc = xa_load(xa, pcimsi ? 0 : index); if (desc && desc->irq) { /* @@ -371,10 +377,11 @@ unsigned int msi_get_virq(struct device *dev, unsigned int index) ret = desc->irq; } } + msi_unlock_descs(dev); return ret; } -EXPORT_SYMBOL_GPL(msi_get_virq); +EXPORT_SYMBOL_GPL(msi_domain_get_virq); #ifdef CONFIG_SYSFS static struct attribute *msi_dev_attrs[] = { From 1c89396300292da4e4a87db63bef45975461fa25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:27 +0100 Subject: [PATCH 088/134] genirq/msi: Rename msi_add_msi_desc() to msi_insert_msi_desc() This reflects the functionality better. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.103554618@linutronix.de --- drivers/pci/msi/msi.c | 4 ++-- drivers/soc/ti/ti_sci_inta_msi.c | 4 ++-- include/linux/msi.h | 2 +- kernel/irq/msi.c | 6 ++++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index b94f6da3f32f..d107bde58bd6 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -308,7 +308,7 @@ static int msi_setup_msi_desc(struct pci_dev *dev, int nvec, if (desc.pci.msi_attrib.can_mask) pci_read_config_dword(dev, desc.pci.mask_pos, &desc.pci.msi_mask); - return msi_add_msi_desc(&dev->dev, &desc); + return msi_insert_msi_desc(&dev->dev, &desc); } static int msi_verify_entries(struct pci_dev *dev) @@ -591,7 +591,7 @@ static int msix_setup_msi_descs(struct pci_dev *dev, void __iomem *base, desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); } - ret = msi_add_msi_desc(&dev->dev, &desc); + ret = msi_insert_msi_desc(&dev->dev, &desc); if (ret) break; } diff --git a/drivers/soc/ti/ti_sci_inta_msi.c b/drivers/soc/ti/ti_sci_inta_msi.c index 991c78b34745..255849cb81ed 100644 --- a/drivers/soc/ti/ti_sci_inta_msi.c +++ b/drivers/soc/ti/ti_sci_inta_msi.c @@ -73,13 +73,13 @@ static int ti_sci_inta_msi_alloc_descs(struct device *dev, for (set = 0; set < res->sets; set++) { for (i = 0; i < res->desc[set].num; i++, count++) { msi_desc.msi_index = res->desc[set].start + i; - if (msi_add_msi_desc(dev, &msi_desc)) + if (msi_insert_msi_desc(dev, &msi_desc)) goto fail; } for (i = 0; i < res->desc[set].num_sec; i++, count++) { msi_desc.msi_index = res->desc[set].start_sec + i; - if (msi_add_msi_desc(dev, &msi_desc)) + if (msi_insert_msi_desc(dev, &msi_desc)) goto fail; } } diff --git a/include/linux/msi.h b/include/linux/msi.h index 764a4e8828fb..f8ba85af3542 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -286,7 +286,7 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, } #endif -int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc); +int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc); void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index); /** diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index e1593c16518b..33b8a6cca3f8 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -77,13 +77,15 @@ static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, un } /** - * msi_add_msi_desc - Allocate and initialize a MSI descriptor + * msi_insert_msi_desc - Allocate and initialize a MSI descriptor and + * insert it at @init_desc->msi_index + * * @dev: Pointer to the device for which the descriptor is allocated * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor * * Return: 0 on success or an appropriate failure code. */ -int msi_add_msi_desc(struct device *dev, struct msi_desc *init_desc) +int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc) { struct msi_desc *desc; From fc8ab388325ddfd848d00f3a3383b4304594546a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:29 +0100 Subject: [PATCH 089/134] genirq/msi: Make descriptor allocation device domain aware Change the descriptor allocation and insertion functions to take a domain id to prepare for the upcoming multi MSI domain per device support. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.163043028@linutronix.de --- include/linux/msi.h | 16 +++++++++++++++- kernel/irq/msi.c | 20 ++++++++++++-------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index f8ba85af3542..35e9d0050ca1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -286,7 +286,21 @@ static inline void msi_desc_set_iommu_cookie(struct msi_desc *desc, } #endif -int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc); +int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, + struct msi_desc *init_desc); +/** + * msi_insert_msi_desc - Allocate and initialize a MSI descriptor in the + * default irqdomain and insert it at @init_desc->msi_index + * @dev: Pointer to the device for which the descriptor is allocated + * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor + * + * Return: 0 on success or an appropriate failure code. + */ +static inline int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc) +{ + return msi_domain_insert_msi_desc(dev, MSI_DEFAULT_DOMAIN, init_desc); +} + void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index); /** diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 33b8a6cca3f8..e75b07fffbcd 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -39,7 +39,7 @@ static inline int msi_sysfs_create_group(struct device *dev); * Return: pointer to allocated &msi_desc on success or %NULL on failure */ static struct msi_desc *msi_alloc_desc(struct device *dev, int nvec, - const struct irq_affinity_desc *affinity) + const struct irq_affinity_desc *affinity) { struct msi_desc *desc = kzalloc(sizeof(*desc), GFP_KERNEL); @@ -64,9 +64,10 @@ static void msi_free_desc(struct msi_desc *desc) kfree(desc); } -static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, unsigned int index) +static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, + unsigned int domid, unsigned int index) { - struct xarray *xa = &md->__domains[MSI_DEFAULT_DOMAIN].store; + struct xarray *xa = &md->__domains[domid].store; int ret; desc->msi_index = index; @@ -77,15 +78,17 @@ static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, un } /** - * msi_insert_msi_desc - Allocate and initialize a MSI descriptor and - * insert it at @init_desc->msi_index + * msi_domain_insert_msi_desc - Allocate and initialize a MSI descriptor and + * insert it at @init_desc->msi_index * * @dev: Pointer to the device for which the descriptor is allocated + * @domid: The id of the interrupt domain to which the desriptor is added * @init_desc: Pointer to an MSI descriptor to initialize the new descriptor * * Return: 0 on success or an appropriate failure code. */ -int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc) +int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, + struct msi_desc *init_desc) { struct msi_desc *desc; @@ -97,7 +100,8 @@ int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_desc) /* Copy type specific data to the new descriptor. */ desc->pci = init_desc->pci; - return msi_insert_desc(dev->msi.data, desc, init_desc->msi_index); + + return msi_insert_desc(dev->msi.data, desc, domid, init_desc->msi_index); } /** @@ -120,7 +124,7 @@ static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsi desc = msi_alloc_desc(dev, 1, NULL); if (!desc) goto fail_mem; - ret = msi_insert_desc(dev->msi.data, desc, idx); + ret = msi_insert_desc(dev->msi.data, desc, MSI_DEFAULT_DOMAIN, idx); if (ret) goto fail; } From 377712c5a45f6de40bffef5ddc31b39cd86cf23f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:30 +0100 Subject: [PATCH 090/134] genirq/msi: Make descriptor freeing domain aware Change the descriptor free functions to take a domain id to prepare for the upcoming multi MSI domain per device support. To avoid changing and extending the interfaces over and over use an core internal control struct and hand the pointer through the various functions. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.220788011@linutronix.de --- include/linux/msi.h | 19 +++++++++++++-- kernel/irq/msi.c | 58 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 65 insertions(+), 12 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 35e9d0050ca1..23172d6354ca 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -301,10 +301,25 @@ static inline int msi_insert_msi_desc(struct device *dev, struct msi_desc *init_ return msi_domain_insert_msi_desc(dev, MSI_DEFAULT_DOMAIN, init_desc); } -void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, unsigned int last_index); +void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); /** - * msi_free_msi_descs - Free MSI descriptors of a device + * msi_free_msi_descs_range - Free a range of MSI descriptors of a device + * in the default irqdomain + * + * @dev: Device for which to free the descriptors + * @first: Index to start freeing from (inclusive) + * @last: Last index to be freed (inclusive) + */ +static inline void msi_free_msi_descs_range(struct device *dev, unsigned int first, + unsigned int last) +{ + msi_domain_free_msi_descs_range(dev, MSI_DEFAULT_DOMAIN, first, last); +} + +/** + * msi_free_msi_descs - Free all MSI descriptors of a device in the default irqdomain * @dev: Device to free the descriptors */ static inline void msi_free_msi_descs(struct device *dev) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index e75b07fffbcd..33ababf8645d 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -19,6 +19,18 @@ #include "internals.h" +/** + * struct msi_ctrl - MSI internal management control structure + * @domid: ID of the domain on which management operations should be done + * @first: First (hardware) slot index to operate on + * @last: Last (hardware) slot index to operate on + */ +struct msi_ctrl { + unsigned int domid; + unsigned int first; + unsigned int last; +}; + /* Invalid Xarray index which is outside of any searchable range */ #define MSI_XA_MAX_INDEX (ULONG_MAX - 1) /* The maximum domain size */ @@ -151,22 +163,29 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) return false; } -/** - * msi_free_msi_descs_range - Free MSI descriptors of a device - * @dev: Device to free the descriptors - * @first_index: Index to start freeing from - * @last_index: Last index to be freed - */ -void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, - unsigned int last_index) +static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) +{ + if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || + ctrl->first > ctrl->last || + ctrl->first > MSI_MAX_INDEX || + ctrl->last > MSI_MAX_INDEX)) + return false; + return true; +} + +static void msi_domain_free_descs(struct device *dev, struct msi_ctrl *ctrl) { - struct xarray *xa = &dev->msi.data->__domains[MSI_DEFAULT_DOMAIN].store; struct msi_desc *desc; + struct xarray *xa; unsigned long idx; lockdep_assert_held(&dev->msi.data->mutex); - xa_for_each_range(xa, idx, desc, first_index, last_index) { + if (!msi_ctrl_valid(dev, ctrl)) + return; + + xa = &dev->msi.data->__domains[ctrl->domid].store; + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { xa_erase(xa, idx); /* Leak the descriptor when it is still referenced */ @@ -176,6 +195,25 @@ void msi_free_msi_descs_range(struct device *dev, unsigned int first_index, } } +/** + * msi_domain_free_msi_descs_range - Free a range of MSI descriptors of a device in an irqdomain + * @dev: Device for which to free the descriptors + * @domid: Id of the domain to operate on + * @first: Index to start freeing from (inclusive) + * @last: Last index to be freed (inclusive) + */ +void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + + msi_domain_free_descs(dev, &ctrl); +} + void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { *msg = entry->msg; From 40742716f294449549884421170ea18356a2abe8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:32 +0100 Subject: [PATCH 091/134] genirq/msi: Make msi_add_simple_msi_descs() device domain aware Allocating simple interrupt descriptors in the core code has to be multi device irqdomain aware for the upcoming PCI/IMS support. Change the interfaces to take a domain id into account. Use the internal control struct for transport of arguments. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.279112474@linutronix.de --- kernel/irq/msi.c | 98 ++++++++++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 33ababf8645d..64a4cc8123e4 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -116,39 +116,6 @@ int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, return msi_insert_desc(dev->msi.data, desc, domid, init_desc->msi_index); } -/** - * msi_add_simple_msi_descs - Allocate and initialize MSI descriptors - * @dev: Pointer to the device for which the descriptors are allocated - * @index: Index for the first MSI descriptor - * @ndesc: Number of descriptors to allocate - * - * Return: 0 on success or an appropriate failure code. - */ -static int msi_add_simple_msi_descs(struct device *dev, unsigned int index, unsigned int ndesc) -{ - unsigned int idx, last = index + ndesc - 1; - struct msi_desc *desc; - int ret; - - lockdep_assert_held(&dev->msi.data->mutex); - - for (idx = index; idx <= last; idx++) { - desc = msi_alloc_desc(dev, 1, NULL); - if (!desc) - goto fail_mem; - ret = msi_insert_desc(dev->msi.data, desc, MSI_DEFAULT_DOMAIN, idx); - if (ret) - goto fail; - } - return 0; - -fail_mem: - ret = -ENOMEM; -fail: - msi_free_msi_descs_range(dev, index, last); - return ret; -} - static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) { switch (filter) { @@ -166,6 +133,7 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) { if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || + !dev->msi.data->__domains[ctrl->domid].domain || ctrl->first > ctrl->last || ctrl->first > MSI_MAX_INDEX || ctrl->last > MSI_MAX_INDEX)) @@ -214,6 +182,41 @@ void msi_domain_free_msi_descs_range(struct device *dev, unsigned int domid, msi_domain_free_descs(dev, &ctrl); } +/** + * msi_domain_add_simple_msi_descs - Allocate and initialize MSI descriptors + * @dev: Pointer to the device for which the descriptors are allocated + * @ctrl: Allocation control struct + * + * Return: 0 on success or an appropriate failure code. + */ +static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_desc *desc; + unsigned int idx; + int ret; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + for (idx = ctrl->first; idx <= ctrl->last; idx++) { + desc = msi_alloc_desc(dev, 1, NULL); + if (!desc) + goto fail_mem; + ret = msi_insert_desc(dev->msi.data, desc, ctrl->domid, idx); + if (ret) + goto fail; + } + return 0; + +fail_mem: + ret = -ENOMEM; +fail: + msi_domain_free_descs(dev, ctrl); + return ret; +} + void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { *msg = entry->msg; @@ -786,16 +789,24 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, { struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; + struct msi_ctrl ctrl = { + .domid = MSI_DEFAULT_DOMAIN, + .first = virq_base, + .last = virq_base + nvec - 1, + }; struct msi_desc *desc; struct xarray *xa; int ret, virq; + if (!msi_ctrl_valid(dev, &ctrl)) + return -EINVAL; + msi_lock_descs(dev); - ret = msi_add_simple_msi_descs(dev, virq_base, nvec); + ret = msi_domain_add_simple_msi_descs(dev, &ctrl); if (ret) goto unlock; - xa = &dev->msi.data->__domains[MSI_DEFAULT_DOMAIN].store; + xa = &dev->msi.data->__domains[ctrl.domid].store; for (virq = virq_base; virq < virq_base + nvec; virq++) { desc = xa_load(xa, virq); @@ -814,7 +825,7 @@ int msi_domain_populate_irqs(struct irq_domain *domain, struct device *dev, fail: for (--virq; virq >= virq_base; virq--) irq_domain_free_irqs_common(domain, virq, 1); - msi_free_msi_descs_range(dev, virq_base, virq_base + nvec - 1); + msi_domain_free_descs(dev, &ctrl); unlock: msi_unlock_descs(dev); return ret; @@ -988,14 +999,19 @@ static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev return 0; } -static int msi_domain_add_simple_msi_descs(struct msi_domain_info *info, - struct device *dev, - unsigned int num_descs) +static int msi_domain_alloc_simple_msi_descs(struct device *dev, + struct msi_domain_info *info, + unsigned int num_descs) { + struct msi_ctrl ctrl = { + .domid = MSI_DEFAULT_DOMAIN, + .last = num_descs - 1, + }; + if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS)) return 0; - return msi_add_simple_msi_descs(dev, 0, num_descs); + return msi_domain_add_simple_msi_descs(dev, &ctrl); } /** @@ -1026,7 +1042,7 @@ int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device } /* Frees allocated descriptors in case of failure. */ - ret = msi_domain_add_simple_msi_descs(info, dev, nvec); + ret = msi_domain_alloc_simple_msi_descs(dev, info, nvec); if (ret) goto free; From 4cd5f4403f283766f73749c4c936801f58ffe77a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:33 +0100 Subject: [PATCH 092/134] genirq/msi: Provide new domain id based interfaces for freeing interrupts Provide two sorts of interfaces to handle the different use cases: - msi_domain_free_irqs_range(): Handles a caller defined precise range - msi_domain_free_irqs_all(): Frees all interrupts associated to a domain The latter is useful for device teardown and to handle the legacy MSI support which does not have any range information available. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.337844751@linutronix.de --- include/linux/msi.h | 9 +++ kernel/irq/msi.c | 142 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 129 insertions(+), 22 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 23172d6354ca..74cb0a93de6f 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -498,6 +498,15 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); + +void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); + +void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid); +void msi_domain_free_irqs_all(struct device *dev, unsigned int domid); + struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 64a4cc8123e4..c1ac780b2192 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -545,7 +545,25 @@ static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *de #endif /* !CONFIG_SYSFS */ static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); -static void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); + +static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid) +{ + struct irq_domain *domain; + + lockdep_assert_held(&dev->msi.data->mutex); + + if (WARN_ON_ONCE(domid >= MSI_MAX_DEVICE_IRQDOMAINS)) + return NULL; + + domain = dev->msi.data->__domains[domid].domain; + if (!domain) + return NULL; + + if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) + return NULL; + + return domain; +} static inline void irq_chip_write_msi_msg(struct irq_data *data, struct msi_msg *msg) @@ -706,7 +724,6 @@ static struct msi_domain_ops msi_domain_ops_default = { .msi_prepare = msi_domain_ops_prepare, .set_desc = msi_domain_ops_set_desc, .domain_alloc_irqs = __msi_domain_alloc_irqs, - .domain_free_irqs = __msi_domain_free_irqs, }; static void msi_domain_update_dom_ops(struct msi_domain_info *info) @@ -720,8 +737,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info) if (ops->domain_alloc_irqs == NULL) ops->domain_alloc_irqs = msi_domain_ops_default.domain_alloc_irqs; - if (ops->domain_free_irqs == NULL) - ops->domain_free_irqs = msi_domain_ops_default.domain_free_irqs; if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS)) return; @@ -1073,15 +1088,21 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nve return ret; } -static void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) +static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) { + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; struct msi_domain_info *info = domain->host_data; struct irq_data *irqd; struct msi_desc *desc; + unsigned long idx; int i; - /* Only handle MSI entries which have an interrupt associated */ - msi_for_each_desc(desc, dev, MSI_DESC_ASSOCIATED) { + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + /* Only handle MSI entries which have an interrupt associated */ + if (!msi_desc_match(desc, MSI_DESC_ASSOCIATED)) + continue; + /* Make sure all interrupts are deactivated */ for (i = 0; i < desc->nvec_used; i++) { irqd = irq_domain_get_irq_data(domain, desc->irq + i); @@ -1096,11 +1117,99 @@ static void __msi_domain_free_irqs(struct irq_domain *domain, struct device *dev } } -static void msi_domain_free_msi_descs(struct msi_domain_info *info, - struct device *dev) +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl) { + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + + if (!msi_ctrl_valid(dev, ctrl)) + return; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return; + + info = domain->host_data; + ops = info->ops; + + if (ops->domain_free_irqs) + ops->domain_free_irqs(domain, dev); + else + __msi_domain_free_irqs(dev, domain, ctrl); + + if (ops->msi_post_free) + ops->msi_post_free(domain, dev); + if (info->flags & MSI_FLAG_FREE_MSI_DESCS) - msi_free_msi_descs(dev); + msi_domain_free_descs(dev, ctrl); +} + +/** + * msi_domain_free_irqs_range_locked - Free a range of interrupts from a MSI interrupt domain + * associated to @dev with msi_lock held + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) + */ +void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + }; + msi_domain_free_locked(dev, &ctrl); +} + +/** + * msi_domain_free_irqs_range - Free a range of interrupts from a MSI interrupt domain + * associated to @dev + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: Id of the interrupt domain to operate on + * @first: First index to free (inclusive) + * @last: Last index to free (inclusive) + */ +void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + msi_lock_descs(dev); + msi_domain_free_irqs_range_locked(dev, domid, first, last); + msi_unlock_descs(dev); +} + +/** + * msi_domain_free_irqs_all_locked - Free all interrupts from a MSI interrupt domain + * associated to a device + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: The id of the domain to operate on + * + * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() + * pair. Use this for MSI irqdomains which implement their own vector + * allocation. + */ +void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid) +{ + msi_domain_free_irqs_range_locked(dev, domid, 0, MSI_MAX_INDEX); +} + +/** + * msi_domain_free_irqs_all - Free all interrupts from a MSI interrupt domain + * associated to a device + * @dev: Pointer to device struct of the device for which the interrupts + * are freed + * @domid: The id of the domain to operate on + */ +void msi_domain_free_irqs_all(struct device *dev, unsigned int domid) +{ + msi_lock_descs(dev); + msi_domain_free_irqs_all_locked(dev, domid); + msi_unlock_descs(dev); } /** @@ -1115,18 +1224,7 @@ static void msi_domain_free_msi_descs(struct msi_domain_info *info, */ void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - - lockdep_assert_held(&dev->msi.data->mutex); - - if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) - return; - - ops->domain_free_irqs(domain, dev); - if (ops->msi_post_free) - ops->msi_post_free(domain, dev); - msi_domain_free_msi_descs(info, dev); + msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, 0, MSI_MAX_INDEX); } /** From f2480e7dacdcd9aab25641346ae53b7ff03777fc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:35 +0100 Subject: [PATCH 093/134] genirq/msi: Provide new domain id allocation functions Provide two sorts of interfaces to handle the different use cases: - msi_domain_alloc_irqs_range(): Handles a caller defined precise range - msi_domain_alloc_irqs_all(): Allocates all interrupts associated to a domain by scanning the allocated MSI descriptors The latter is useful for the existing PCI/MSI support which does not have range information available. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.396497163@linutronix.de --- include/linux/msi.h | 18 +++-- kernel/irq/msi.c | 177 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 147 insertions(+), 48 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 74cb0a93de6f..611707d619fc 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -383,8 +383,8 @@ struct msi_domain_info; * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare and @set_desc are callbacks used by - * msi_domain_alloc/free_irqs(). + * @msi_check, @msi_prepare and @set_desc are callbacks used by the + * msi_domain_alloc/free_irqs*() variants. * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -392,11 +392,6 @@ struct msi_domain_info; * be wrapped into the regular irq domains concepts by mere mortals. This * allows to universally use msi_domain_alloc/free_irqs without having to * special case XEN all over the place. - * - * Contrary to other operations @domain_alloc_irqs and @domain_free_irqs - * are set to the default implementation if NULL and even when - * MSI_FLAG_USE_DEF_DOM_OPS is not set to avoid breaking existing users and - * because these callbacks are obviously mandatory. */ struct msi_domain_ops { irq_hw_number_t (*get_hwirq)(struct msi_domain_info *info, @@ -496,14 +491,21 @@ int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device int nvec); int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); + void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); +int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last); +int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs); + + void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); - void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid); void msi_domain_free_irqs_all(struct device *dev, unsigned int domid); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c1ac780b2192..f857295304bd 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -24,11 +24,14 @@ * @domid: ID of the domain on which management operations should be done * @first: First (hardware) slot index to operate on * @last: Last (hardware) slot index to operate on + * @nirqs: The number of Linux interrupts to allocate. Can be larger + * than the range due to PCI/multi-MSI. */ struct msi_ctrl { unsigned int domid; unsigned int first; unsigned int last; + unsigned int nirqs; }; /* Invalid Xarray index which is outside of any searchable range */ @@ -36,6 +39,7 @@ struct msi_ctrl { /* The maximum domain size */ #define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) +static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl); static inline int msi_sysfs_create_group(struct device *dev); @@ -544,8 +548,6 @@ static inline int msi_sysfs_populate_desc(struct device *dev, struct msi_desc *d static inline void msi_sysfs_remove_desc(struct device *dev, struct msi_desc *desc) { } #endif /* !CONFIG_SYSFS */ -static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec); - static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int domid) { struct irq_domain *domain; @@ -723,7 +725,6 @@ static struct msi_domain_ops msi_domain_ops_default = { .msi_init = msi_domain_ops_init, .msi_prepare = msi_domain_ops_prepare, .set_desc = msi_domain_ops_set_desc, - .domain_alloc_irqs = __msi_domain_alloc_irqs, }; static void msi_domain_update_dom_ops(struct msi_domain_info *info) @@ -735,9 +736,6 @@ static void msi_domain_update_dom_ops(struct msi_domain_info *info) return; } - if (ops->domain_alloc_irqs == NULL) - ops->domain_alloc_irqs = msi_domain_ops_default.domain_alloc_irqs; - if (!(info->flags & MSI_FLAG_USE_DEF_DOM_OPS)) return; @@ -951,18 +949,19 @@ static int msi_init_virq(struct irq_domain *domain, int virq, unsigned int vflag return 0; } -static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec) +static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain, + struct msi_ctrl *ctrl) { + struct xarray *xa = &dev->msi.data->__domains[ctrl->domid].store; struct msi_domain_info *info = domain->host_data; struct msi_domain_ops *ops = info->ops; + unsigned int vflags = 0, allocated = 0; msi_alloc_info_t arg = { }; - unsigned int vflags = 0; struct msi_desc *desc; - int allocated = 0; + unsigned long idx; int i, ret, virq; - ret = msi_domain_prepare_irqs(domain, dev, nvec, &arg); + ret = msi_domain_prepare_irqs(domain, dev, ctrl->nirqs, &arg); if (ret) return ret; @@ -988,7 +987,14 @@ static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev vflags |= VIRQ_NOMASK_QUIRK; } - msi_for_each_desc(desc, dev, MSI_DESC_NOTASSOCIATED) { + xa_for_each_range(xa, idx, desc, ctrl->first, ctrl->last) { + if (!msi_desc_match(desc, MSI_DESC_NOTASSOCIATED)) + continue; + + /* This should return -ECONFUSED... */ + if (WARN_ON_ONCE(allocated >= ctrl->nirqs)) + return -EINVAL; + ops->set_desc(&arg, desc); virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, @@ -1016,17 +1022,122 @@ static int __msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev static int msi_domain_alloc_simple_msi_descs(struct device *dev, struct msi_domain_info *info, - unsigned int num_descs) + struct msi_ctrl *ctrl) { - struct msi_ctrl ctrl = { - .domid = MSI_DEFAULT_DOMAIN, - .last = num_descs - 1, - }; - if (!(info->flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS)) return 0; - return msi_domain_add_simple_msi_descs(dev, &ctrl); + return msi_domain_add_simple_msi_descs(dev, ctrl); +} + +static int __msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + struct msi_domain_info *info; + struct msi_domain_ops *ops; + struct irq_domain *domain; + int ret; + + if (!msi_ctrl_valid(dev, ctrl)) + return -EINVAL; + + domain = msi_get_device_domain(dev, ctrl->domid); + if (!domain) + return -ENODEV; + + info = domain->host_data; + + ret = msi_domain_alloc_simple_msi_descs(dev, info, ctrl); + if (ret) + return ret; + + ops = info->ops; + if (ops->domain_alloc_irqs) + return ops->domain_alloc_irqs(domain, dev, ctrl->nirqs); + + return __msi_domain_alloc_irqs(dev, domain, ctrl); +} + +static int msi_domain_alloc_locked(struct device *dev, struct msi_ctrl *ctrl) +{ + int ret = __msi_domain_alloc_locked(dev, ctrl); + + if (ret) + msi_domain_free_locked(dev, ctrl); + return ret; +} + +/** + * msi_domain_alloc_irqs_range_locked - Allocate interrupts from a MSI interrupt domain + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) + * + * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() + * pair. Use this for MSI irqdomains which implement their own descriptor + * allocation/free. + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = first, + .last = last, + .nirqs = last + 1 - first, + }; + + return msi_domain_alloc_locked(dev, &ctrl); +} + +/** + * msi_domain_alloc_irqs_range - Allocate interrupts from a MSI interrupt domain + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @first: First index to allocate (inclusive) + * @last: Last index to allocate (inclusive) + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, + unsigned int first, unsigned int last) +{ + int ret; + + msi_lock_descs(dev); + ret = msi_domain_alloc_irqs_range_locked(dev, domid, first, last); + msi_unlock_descs(dev); + return ret; +} + +/** + * msi_domain_alloc_irqs_all_locked - Allocate all interrupts from a MSI interrupt domain + * + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @nirqs: The number of interrupts to allocate + * + * This function scans all MSI descriptors of the MSI domain and allocates interrupts + * for all unassigned ones. That function is to be used for MSI domain usage where + * the descriptor allocation is handled at the call site, e.g. PCI/MSI[X]. + * + * Return: %0 on success or an error code. + */ +int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs) +{ + struct msi_ctrl ctrl = { + .domid = domid, + .first = 0, + .last = MSI_MAX_INDEX, + .nirqs = nirqs, + }; + + return msi_domain_alloc_locked(dev, &ctrl); } /** @@ -1045,28 +1156,14 @@ static int msi_domain_alloc_simple_msi_descs(struct device *dev, int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, int nvec) { - struct msi_domain_info *info = domain->host_data; - struct msi_domain_ops *ops = info->ops; - int ret; + struct msi_ctrl ctrl = { + .domid = MSI_DEFAULT_DOMAIN, + .first = 0, + .last = MSI_MAX_INDEX, + .nirqs = nvec, + }; - lockdep_assert_held(&dev->msi.data->mutex); - - if (WARN_ON_ONCE(irq_domain_is_msi_parent(domain))) { - ret = -EINVAL; - goto free; - } - - /* Frees allocated descriptors in case of failure. */ - ret = msi_domain_alloc_simple_msi_descs(dev, info, nvec); - if (ret) - goto free; - - ret = ops->domain_alloc_irqs(domain, dev, nvec); - if (!ret) - return 0; -free: - msi_domain_free_irqs_descs_locked(domain, dev); - return ret; + return msi_domain_alloc_locked(dev, &ctrl); } /** From d3a11dee9f946daa6e66aca1bf8db93e9d4e02ec Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:36 +0100 Subject: [PATCH 094/134] PCI/MSI: Use msi_domain_alloc/free_irqs_all_locked() Switch to the new domain id aware interfaces to phase out the previous ones. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.455168748@linutronix.de --- drivers/pci/msi/irqdomain.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index dea2e8c4dfa4..f4338fb42c2d 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -14,7 +14,7 @@ int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) domain = dev_get_msi_domain(&dev->dev); if (domain && irq_domain_is_hierarchy(domain)) - return msi_domain_alloc_irqs_descs_locked(domain, &dev->dev, nvec); + return msi_domain_alloc_irqs_all_locked(&dev->dev, MSI_DEFAULT_DOMAIN, nvec); return pci_msi_legacy_setup_msi_irqs(dev, nvec, type); } @@ -25,7 +25,7 @@ void pci_msi_teardown_msi_irqs(struct pci_dev *dev) domain = dev_get_msi_domain(&dev->dev); if (domain && irq_domain_is_hierarchy(domain)) { - msi_domain_free_irqs_descs_locked(domain, &dev->dev); + msi_domain_free_irqs_all_locked(&dev->dev, MSI_DEFAULT_DOMAIN); } else { pci_msi_legacy_teardown_msi_irqs(dev); msi_free_msi_descs(&dev->dev); From b330ff9f0b03d6107ee941240ef63cc95374ff3d Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 25 Nov 2022 00:24:38 +0100 Subject: [PATCH 095/134] platform-msi: Switch to the domain id aware MSI interfaces Switch to the new domain id aware interfaces to phase out the previous ones. No functional change. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.513924920@linutronix.de --- drivers/base/platform-msi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c index dddafa197693..5883e7634a2b 100644 --- a/drivers/base/platform-msi.c +++ b/drivers/base/platform-msi.c @@ -213,7 +213,7 @@ int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, if (err) return err; - err = msi_domain_alloc_irqs(dev->msi.domain, dev, nvec); + err = msi_domain_alloc_irqs_range(dev, MSI_DEFAULT_DOMAIN, 0, nvec - 1); if (err) platform_msi_free_priv_data(dev); @@ -227,7 +227,7 @@ EXPORT_SYMBOL_GPL(platform_msi_domain_alloc_irqs); */ void platform_msi_domain_free_irqs(struct device *dev) { - msi_domain_free_irqs(dev->msi.domain, dev); + msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN); platform_msi_free_priv_data(dev); } EXPORT_SYMBOL_GPL(platform_msi_domain_free_irqs); From 46a2bc8c7092cf277fb486c0629894ed904984a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:40 +0100 Subject: [PATCH 096/134] bus: fsl-mc-msi: Switch to domain id aware interfaces Switch to the new domain id aware interfaces to phase out the previous ones. Get rid of the MSI descriptor and domain checks as the core code detects these issues anyway. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.575538524@linutronix.de --- drivers/bus/fsl-mc/fsl-mc-msi.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/bus/fsl-mc/fsl-mc-msi.c b/drivers/bus/fsl-mc/fsl-mc-msi.c index 0cfe859a4ac4..f3f8af9426c9 100644 --- a/drivers/bus/fsl-mc/fsl-mc-msi.c +++ b/drivers/bus/fsl-mc/fsl-mc-msi.c @@ -213,21 +213,8 @@ struct irq_domain *fsl_mc_find_msi_domain(struct device *dev) int fsl_mc_msi_domain_alloc_irqs(struct device *dev, unsigned int irq_count) { - struct irq_domain *msi_domain; - int error; + int error = msi_setup_device_data(dev); - msi_domain = dev_get_msi_domain(dev); - if (!msi_domain) - return -EINVAL; - - error = msi_setup_device_data(dev); - if (error) - return error; - - msi_lock_descs(dev); - if (msi_first_desc(dev, MSI_DESC_ALL)) - error = -EINVAL; - msi_unlock_descs(dev); if (error) return error; @@ -235,7 +222,7 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev, unsigned int irq_count) * NOTE: Calling this function will trigger the invocation of the * its_fsl_mc_msi_prepare() callback */ - error = msi_domain_alloc_irqs(msi_domain, dev, irq_count); + error = msi_domain_alloc_irqs_range(dev, MSI_DEFAULT_DOMAIN, 0, irq_count - 1); if (error) dev_err(dev, "Failed to allocate IRQs\n"); @@ -244,11 +231,5 @@ int fsl_mc_msi_domain_alloc_irqs(struct device *dev, unsigned int irq_count) void fsl_mc_msi_domain_free_irqs(struct device *dev) { - struct irq_domain *msi_domain; - - msi_domain = dev_get_msi_domain(dev); - if (!msi_domain) - return; - - msi_domain_free_irqs(msi_domain, dev); + msi_domain_free_irqs_all(dev, MSI_DEFAULT_DOMAIN); } From 811b32811fbd1a5d3a9eb089ff1d34fa04ef2144 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 25 Nov 2022 00:24:41 +0100 Subject: [PATCH 097/134] oc: ti: ti_sci_inta_msi: Switch to domain id aware MSI functions Switch to the new domain id aware interfaces to phase out the previous ones. Remove the domain check as it happens in the core code now. No functional change. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.634800247@linutronix.de --- drivers/soc/ti/ti_sci_inta_msi.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/soc/ti/ti_sci_inta_msi.c b/drivers/soc/ti/ti_sci_inta_msi.c index 255849cb81ed..b9251e1d9a5c 100644 --- a/drivers/soc/ti/ti_sci_inta_msi.c +++ b/drivers/soc/ti/ti_sci_inta_msi.c @@ -93,13 +93,8 @@ int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev, struct ti_sci_resource *res) { struct platform_device *pdev = to_platform_device(dev); - struct irq_domain *msi_domain; int ret, nvec; - msi_domain = dev_get_msi_domain(dev); - if (!msi_domain) - return -EINVAL; - if (pdev->id < 0) return -ENODEV; @@ -114,7 +109,8 @@ int ti_sci_inta_msi_domain_alloc_irqs(struct device *dev, goto unlock; } - ret = msi_domain_alloc_irqs_descs_locked(msi_domain, dev, nvec); + /* Use alloc ALL as it's unclear whether there are gaps in the indices */ + ret = msi_domain_alloc_irqs_all_locked(dev, MSI_DEFAULT_DOMAIN, nvec); if (ret) dev_err(dev, "Failed to allocate IRQs %d\n", ret); unlock: From c459f11f32a022d0f97694030419d16816275a9d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:24:43 +0100 Subject: [PATCH 098/134] genirq/msi: Remove unused alloc/free interfaces Now that all users are converted remove the old interfaces. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124230314.694291814@linutronix.de --- include/linux/msi.h | 7 ----- kernel/irq/msi.c | 73 --------------------------------------------- 2 files changed, 80 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 611707d619fc..43b8866c8431 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -487,13 +487,6 @@ int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, - int nvec); -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, - int nvec); - -void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev); -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index f857295304bd..8e653f089f82 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1140,51 +1140,6 @@ int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int return msi_domain_alloc_locked(dev, &ctrl); } -/** - * msi_domain_alloc_irqs_descs_locked - Allocate interrupts from a MSI interrupt domain - * @domain: The domain to allocate from - * @dev: Pointer to device struct of the device for which the interrupts - * are allocated - * @nvec: The number of interrupts to allocate - * - * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() - * pair. Use this for MSI irqdomains which implement their own vector - * allocation/free. - * - * Return: %0 on success or an error code. - */ -int msi_domain_alloc_irqs_descs_locked(struct irq_domain *domain, struct device *dev, - int nvec) -{ - struct msi_ctrl ctrl = { - .domid = MSI_DEFAULT_DOMAIN, - .first = 0, - .last = MSI_MAX_INDEX, - .nirqs = nvec, - }; - - return msi_domain_alloc_locked(dev, &ctrl); -} - -/** - * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain - * @domain: The domain to allocate from - * @dev: Pointer to device struct of the device for which the interrupts - * are allocated - * @nvec: The number of interrupts to allocate - * - * Return: %0 on success or an error code. - */ -int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, int nvec) -{ - int ret; - - msi_lock_descs(dev); - ret = msi_domain_alloc_irqs_descs_locked(domain, dev, nvec); - msi_unlock_descs(dev); - return ret; -} - static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, struct msi_ctrl *ctrl) { @@ -1309,34 +1264,6 @@ void msi_domain_free_irqs_all(struct device *dev, unsigned int domid) msi_unlock_descs(dev); } -/** - * msi_domain_free_irqs_descs_locked - Free interrupts from a MSI interrupt @domain associated to @dev - * @domain: The domain to managing the interrupts - * @dev: Pointer to device struct of the device for which the interrupts - * are free - * - * Must be invoked from within a msi_lock_descs() / msi_unlock_descs() - * pair. Use this for MSI irqdomains which implement their own vector - * allocation. - */ -void msi_domain_free_irqs_descs_locked(struct irq_domain *domain, struct device *dev) -{ - msi_domain_free_irqs_range_locked(dev, MSI_DEFAULT_DOMAIN, 0, MSI_MAX_INDEX); -} - -/** - * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated to @dev - * @domain: The domain to managing the interrupts - * @dev: Pointer to device struct of the device for which the interrupts - * are free - */ -void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev) -{ - msi_lock_descs(dev); - msi_domain_free_irqs_descs_locked(domain, dev); - msi_unlock_descs(dev); -} - /** * msi_get_domain_info - Get the MSI interrupt domain info for @domain * @domain: The interrupt domain to retrieve data from From 2d958b02b04f18955b0e15eda531461153c399d4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:46 +0100 Subject: [PATCH 099/134] genirq/msi: Rearrange MSI domain flags These flags got added as necessary and have no obvious structure. For feature support checks and masking it's convenient to have two blocks of flags: 1) Flags to control the internal behaviour like allocating/freeing MSI descriptors. Those flags do not need any support from the underlying MSI parent domain. They are mostly under the control of the outermost domain which implements the actual MSI support. 2) Flags to expose features, e.g. PCI multi-MSI or requirements which can depend on a underlying domain. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.322714918@linutronix.de --- include/linux/msi.h | 49 +++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 43b8866c8431..a4339ebcc035 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -24,6 +24,8 @@ #include #include #include +#include + #include /* Dummy shadow structures if an architecture does not define them */ @@ -440,7 +442,16 @@ struct msi_domain_info { void *data; }; -/* Flags for msi_domain_info */ +/* + * Flags for msi_domain_info + * + * Bit 0-15: Generic MSI functionality which is not subject to restriction + * by parent domains + * + * Bit 16-31: Functionality which depends on the underlying parent domain and + * can be masked out by msi_parent_ops::init_dev_msi_info() when + * a device MSI domain is initialized. + */ enum { /* * Init non implemented ops callbacks with default MSI domain @@ -452,33 +463,41 @@ enum { * callbacks. */ MSI_FLAG_USE_DEF_CHIP_OPS = (1 << 1), - /* Support multiple PCI MSI interrupts */ - MSI_FLAG_MULTI_PCI_MSI = (1 << 2), - /* Support PCI MSIX interrupts */ - MSI_FLAG_PCI_MSIX = (1 << 3), /* Needs early activate, required for PCI */ - MSI_FLAG_ACTIVATE_EARLY = (1 << 4), + MSI_FLAG_ACTIVATE_EARLY = (1 << 2), /* * Must reactivate when irq is started even when * MSI_FLAG_ACTIVATE_EARLY has been set. */ - MSI_FLAG_MUST_REACTIVATE = (1 << 5), - /* Is level-triggered capable, using two messages */ - MSI_FLAG_LEVEL_CAPABLE = (1 << 6), + MSI_FLAG_MUST_REACTIVATE = (1 << 3), /* Populate sysfs on alloc() and destroy it on free() */ - MSI_FLAG_DEV_SYSFS = (1 << 7), - /* MSI-X entries must be contiguous */ - MSI_FLAG_MSIX_CONTIGUOUS = (1 << 8), + MSI_FLAG_DEV_SYSFS = (1 << 4), /* Allocate simple MSI descriptors */ - MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 9), + MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS = (1 << 5), /* Free MSI descriptors */ - MSI_FLAG_FREE_MSI_DESCS = (1 << 10), + MSI_FLAG_FREE_MSI_DESCS = (1 << 6), /* * Quirk to handle MSI implementations which do not provide * masking. Currently known to affect x86, but has to be partially * handled in the core MSI code. */ - MSI_FLAG_NOMASK_QUIRK = (1 << 11), + MSI_FLAG_NOMASK_QUIRK = (1 << 7), + + /* Mask for the generic functionality */ + MSI_GENERIC_FLAGS_MASK = GENMASK(15, 0), + + /* Mask for the domain specific functionality */ + MSI_DOMAIN_FLAGS_MASK = GENMASK(31, 16), + + /* Support multiple PCI MSI interrupts */ + MSI_FLAG_MULTI_PCI_MSI = (1 << 16), + /* Support PCI MSIX interrupts */ + MSI_FLAG_PCI_MSIX = (1 << 17), + /* Is level-triggered capable, using two messages */ + MSI_FLAG_LEVEL_CAPABLE = (1 << 18), + /* MSI-X entries must be contiguous */ + MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), + }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, From b78780d93b068706d04f8f2f02bd08db5da01479 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:48 +0100 Subject: [PATCH 100/134] genirq/msi: Provide struct msi_parent_ops MSI parent domains must have some control over the MSI domains which are built on top. On domain creation they need to fill in e.g. architecture specific chip callbacks or msi domain ops to make the outermost domain parent agnostic which is obviously required for architecture independence etc. The structure contains: 1) A bitfield which exposes the supported functional features. This allows to check for features and is also used in the initialization callback to mask out unsupported features when the actual domain implementation requests a broader range, e.g. on x86 PCI multi-MSI is only supported by remapping domains but not by the underlying vector domain. The PCI/MSI code can then always request multi-MSI support, but the resulting feature set after creation might not have it set. 2) An optional string prefix which is put in front of domain and chip names during creation of the MSI domain. That allows to keep the naming schemes e.g. on x86 where PCI-MSI domains have a IR- prefix when interrupt remapping is enabled. 3) An initialization callback to sanity check the domain info of the to be created MSI domain, to restrict features and to apply changes in MSI ops and interrupt chip callbacks to accomodate to the particular MSI parent implementation and/or the underlying hierarchy. Add a conveniance function to delegate the initialization from the MSI parent domain to an underlying domain in the hierarchy. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.382485843@linutronix.de --- include/linux/irqdomain.h | 5 +++++ include/linux/msi.h | 21 ++++++++++++++++++++ kernel/irq/msi.c | 41 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 24b76688c2fb..a668cc015874 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -46,6 +46,7 @@ struct irq_desc; struct cpumask; struct seq_file; struct irq_affinity_desc; +struct msi_parent_ops; #define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16 @@ -134,6 +135,7 @@ struct irq_domain_chip_generic; * @pm_dev: Pointer to a device that can be utilized for power management * purposes related to the irq domain. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * @msi_parent_ops: Pointer to MSI parent domain methods for per device domain init * * Revmap data, used internally by the irq domain code: * @revmap_size: Size of the linear map table @revmap[] @@ -157,6 +159,9 @@ struct irq_domain { #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; #endif +#ifdef CONFIG_GENERIC_MSI_IRQ + const struct msi_parent_ops *msi_parent_ops; +#endif /* reverse map data. The linear map gets appended to the irq_domain */ irq_hw_number_t hwirq_max; diff --git a/include/linux/msi.h b/include/linux/msi.h index a4339ebcc035..9bf3cbad1114 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -500,6 +500,27 @@ enum { }; +/** + * struct msi_parent_ops - MSI parent domain callbacks and configuration info + * + * @supported_flags: Required: The supported MSI flags of the parent domain + * @prefix: Optional: Prefix for the domain and chip name + * @init_dev_msi_info: Required: Callback for MSI parent domains to setup parent + * domain specific domain flags, domain ops and interrupt chip + * callbacks when a per device domain is created. + */ +struct msi_parent_ops { + u32 supported_flags; + const char *prefix; + bool (*init_dev_msi_info)(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info); +}; + +bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info); + int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force); diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 8e653f089f82..c368116e8b08 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -788,6 +788,47 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, return domain; } +/** + * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down + * in the domain hierarchy + * @dev: The device for which the domain should be created + * @domain: The domain in the hierarchy this op is being called on + * @msi_parent_domain: The IRQ_DOMAIN_FLAG_MSI_PARENT domain for the child to + * be created + * @msi_child_info: The MSI domain info of the IRQ_DOMAIN_FLAG_MSI_DEVICE + * domain to be created + * + * Return: true on success, false otherwise + * + * This is the most complex problem of per device MSI domains and the + * underlying interrupt domain hierarchy: + * + * The device domain to be initialized requests the broadest feature set + * possible and the underlying domain hierarchy puts restrictions on it. + * + * That's trivial for a simple parent->child relationship, but it gets + * interesting with an intermediate domain: root->parent->child. The + * intermediate 'parent' can expand the capabilities which the 'root' + * domain is providing. So that creates a classic hen and egg problem: + * Which entity is doing the restrictions/expansions? + * + * One solution is to let the root domain handle the initialization that's + * why there is the @domain and the @msi_parent_domain pointer. + */ +bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *msi_parent_domain, + struct msi_domain_info *msi_child_info) +{ + struct irq_domain *parent = domain->parent; + + if (WARN_ON_ONCE(!parent || !parent->msi_parent_ops || + !parent->msi_parent_ops->init_dev_msi_info)) + return false; + + return parent->msi_parent_ops->init_dev_msi_info(dev, parent, msi_parent_domain, + msi_child_info); +} + int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) { From ebca4396ee18521e9e5d435a15e5d0ab2eb6b009 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:49 +0100 Subject: [PATCH 101/134] genirq/msi: Provide data structs for per device domains Provide struct msi_domain_template which contains a bundle of struct irq_chip, struct msi_domain_ops and struct msi_domain_info and a name field. This template is used by MSI device domain implementations to provide the domain specific functionality, feature bits etc. When a MSI domain is created the template is duplicated in the core code so that it can be modified per instance. That means templates can be marked const at the MSI device domain code. The template is a bundle to avoid several allocations and duplications of the involved structures. The name field is used to construct the final domain and chip name via: $PREFIX$NAME-$DEVNAME where prefix is the optional prefix of the MSI parent domain, $NAME is the provided name in template::chip and the device name so that the domain is properly identified. On x86 this results for PCI/MSI in: PCI-MSI-0000:3d:00.1 or IR-PCI-MSIX-0000:3d:00.1 depending on the domain type and the availability of remapping. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.442499757@linutronix.de --- include/linux/msi.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 9bf3cbad1114..7fb87371541a 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -74,7 +75,6 @@ struct msi_msg { extern int pci_msi_ignore_mask; /* Helper functions */ -struct irq_data; struct msi_desc; struct pci_dev; struct platform_msi_priv_data; @@ -442,6 +442,20 @@ struct msi_domain_info { void *data; }; +/** + * struct msi_domain_template - Template for MSI device domains + * @name: Storage for the resulting name. Filled in by the core. + * @chip: Interrupt chip for this domain + * @ops: MSI domain ops + * @info: MSI domain info data + */ +struct msi_domain_template { + char name[48]; + struct irq_chip chip; + struct msi_domain_ops ops; + struct msi_domain_info info; +}; + /* * Flags for msi_domain_info * From 61bf992fc618503c910416f28afa0b015838b72b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:51 +0100 Subject: [PATCH 102/134] genirq/msi: Add size info to struct msi_domain_info To allow proper range checking especially for dynamic allocations add a size field to struct msi_domain_info. If the field is 0 then the size is unknown or unlimited (up to MSI_MAX_INDEX) to provide backwards compability. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.501144862@linutronix.de --- include/linux/msi.h | 5 +++++ kernel/irq/msi.c | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/include/linux/msi.h b/include/linux/msi.h index 7fb87371541a..08a0e2abf048 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -422,6 +422,10 @@ struct msi_domain_ops { * struct msi_domain_info - MSI interrupt domain data * @flags: Flags to decribe features and capabilities * @bus_token: The domain bus token + * @hwsize: The hardware table size or the software index limit. + * If 0 then the size is considered unlimited and + * gets initialized to the maximum software index limit + * by the domain creation code. * @ops: The callback data structure * @chip: Optional: associated interrupt chip * @chip_data: Optional: associated interrupt chip data @@ -433,6 +437,7 @@ struct msi_domain_ops { struct msi_domain_info { u32 flags; enum irq_domain_bus_token bus_token; + unsigned int hwsize; struct msi_domain_ops *ops; struct irq_chip *chip; void *chip_data; diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index c368116e8b08..0a3890598b75 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -772,6 +772,17 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, { struct irq_domain *domain; + if (info->hwsize > MSI_XA_DOMAIN_SIZE) + return NULL; + + /* + * Hardware size 0 is valid for backwards compatibility and for + * domains which are not backed by a hardware table. Grant the + * maximum index space. + */ + if (!info->hwsize) + info->hwsize = MSI_XA_DOMAIN_SIZE; + msi_domain_update_dom_ops(info); if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) msi_domain_update_chip_ops(info); From a80c0aceeaffdb3afe9536fe747480e85841da7f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:52 +0100 Subject: [PATCH 103/134] genirq/msi: Split msi_create_irq_domain() Split the functionality of msi_create_irq_domain() so it can be reused for creating per device irq domains. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.559086358@linutronix.de --- kernel/irq/msi.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 0a3890598b75..0f7fe562b307 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -758,17 +758,10 @@ static void msi_domain_update_chip_ops(struct msi_domain_info *info) chip->irq_set_affinity = msi_domain_set_affinity; } -/** - * msi_create_irq_domain - Create an MSI interrupt domain - * @fwnode: Optional fwnode of the interrupt controller - * @info: MSI domain info - * @parent: Parent irq domain - * - * Return: pointer to the created &struct irq_domain or %NULL on failure - */ -struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, - struct msi_domain_info *info, - struct irq_domain *parent) +static struct irq_domain *__msi_create_irq_domain(struct fwnode_handle *fwnode, + struct msi_domain_info *info, + unsigned int flags, + struct irq_domain *parent) { struct irq_domain *domain; @@ -787,7 +780,7 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS) msi_domain_update_chip_ops(info); - domain = irq_domain_create_hierarchy(parent, IRQ_DOMAIN_FLAG_MSI, 0, + domain = irq_domain_create_hierarchy(parent, flags | IRQ_DOMAIN_FLAG_MSI, 0, fwnode, &msi_domain_ops, info); if (domain) { @@ -799,6 +792,21 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, return domain; } +/** + * msi_create_irq_domain - Create an MSI interrupt domain + * @fwnode: Optional fwnode of the interrupt controller + * @info: MSI domain info + * @parent: Parent irq domain + * + * Return: pointer to the created &struct irq_domain or %NULL on failure + */ +struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, + struct msi_domain_info *info, + struct irq_domain *parent) +{ + return __msi_create_irq_domain(fwnode, info, 0, parent); +} + /** * msi_parent_init_dev_msi_info - Delegate initialization of device MSI info down * in the domain hierarchy From 4443664f298d1a2cba25a2e48d53b78f4138209b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:54 +0100 Subject: [PATCH 104/134] genirq/irqdomain: Add irq_domain:: Dev for per device MSI domains Per device domains require the device pointer of the device which instantiated the domain for some purposes. Add the pointer to struct irq_domain. It will be used in the next step which provides the infrastructure to create per device MSI domains. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.618807601@linutronix.de --- include/linux/irqdomain.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a668cc015874..a372086750ca 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -132,6 +132,9 @@ struct irq_domain_chip_generic; * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. + * @dev: Pointer to the device which instantiated the irqdomain + * With per device irq domains this is not necessarily the same + * as @pm_dev. * @pm_dev: Pointer to a device that can be utilized for power management * purposes related to the irq domain. * @parent: Pointer to parent irq_domain to support hierarchy irq_domains @@ -155,6 +158,7 @@ struct irq_domain { struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; + struct device *dev; struct device *pm_dev; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY struct irq_domain *parent; From 27a6dea3ebaab3d6f8ded969ec3af710bcbe0c02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:56 +0100 Subject: [PATCH 105/134] genirq/msi: Provide msi_create/free_device_irq_domain() Now that all prerequsites are in place, provide the actual interfaces for creating and removing per device interrupt domains. MSI device interrupt domains are created from the provided msi_domain_template which is duplicated so that it can be modified for the particular device. The name of the domain and the name of the interrupt chip are composed by "$(PREFIX)$(CHIPNAME)-$(DEVNAME)" $PREFIX: The optional prefix provided by the underlying MSI parent domain via msi_parent_ops::prefix. $CHIPNAME: The name of the irq_chip in the template $DEVNAME: The name of the device The domain is further initialized through a MSI parent domain callback which fills in the required functionality for the parent domain or domains further down the hierarchy. This initialization can fail, e.g. when the requested feature or MSI domain type cannot be supported. The domain pointer is stored in the pointer array inside of msi_device_data which is attached to the domain. The domain can be removed via the API or left for disposal via devres when the device is torn down. The API removal is useful e.g. for PCI to have seperate domains for MSI and MSI-X, which are mutually exclusive and always occupy the default domain id slot. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.678838546@linutronix.de --- include/linux/msi.h | 6 ++ kernel/irq/msi.c | 138 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) diff --git a/include/linux/msi.h b/include/linux/msi.h index 08a0e2abf048..ef46a3ee18e4 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -547,6 +547,12 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); +bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, + const struct msi_domain_template *template, + unsigned int hwsize, void *domain_data, + void *chip_data); +void msi_remove_device_irq_domain(struct device *dev, unsigned int domid); + int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 0f7fe562b307..8b415bdb0425 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -240,6 +240,7 @@ static void msi_device_data_release(struct device *dev, void *res) int i; for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) { + msi_remove_device_irq_domain(dev, i); WARN_ON_ONCE(!xa_empty(&md->__domains[i].store)); xa_destroy(&md->__domains[i].store); } @@ -848,6 +849,143 @@ bool msi_parent_init_dev_msi_info(struct device *dev, struct irq_domain *domain, msi_child_info); } +/** + * msi_create_device_irq_domain - Create a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + * @template: MSI domain info bundle used as template + * @hwsize: Maximum number of MSI table entries (0 if unknown or unlimited) + * @domain_data: Optional pointer to domain specific data which is set in + * msi_domain_info::data + * @chip_data: Optional pointer to chip specific data which is set in + * msi_domain_info::chip_data + * + * Return: True on success, false otherwise + * + * There is no firmware node required for this interface because the per + * device domains are software constructs which are actually closer to the + * hardware reality than any firmware can describe them. + * + * The domain name and the irq chip name for a MSI device domain are + * composed by: "$(PREFIX)$(CHIPNAME)-$(DEVNAME)" + * + * $PREFIX: Optional prefix provided by the underlying MSI parent domain + * via msi_parent_ops::prefix. If that pointer is NULL the prefix + * is empty. + * $CHIPNAME: The name of the irq_chip in @template + * $DEVNAME: The name of the device + * + * This results in understandable chip names and hardware interrupt numbers + * in e.g. /proc/interrupts + * + * PCI-MSI-0000:00:1c.0 0-edge Parent domain has no prefix + * IR-PCI-MSI-0000:00:1c.4 0-edge Same with interrupt remapping prefix 'IR-' + * + * IR-PCI-MSIX-0000:3d:00.0 0-edge Hardware interrupt numbers reflect + * IR-PCI-MSIX-0000:3d:00.0 1-edge the real MSI-X index on that device + * IR-PCI-MSIX-0000:3d:00.0 2-edge + * + * On IMS domains the hardware interrupt number is either a table entry + * index or a purely software managed index but it is guaranteed to be + * unique. + * + * The domain pointer is stored in @dev::msi::data::__irqdomains[]. All + * subsequent operations on the domain depend on the domain id. + * + * The domain is automatically freed when the device is removed via devres + * in the context of @dev::msi::data freeing, but it can also be + * independently removed via @msi_remove_device_irq_domain(). + */ +bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, + const struct msi_domain_template *template, + unsigned int hwsize, void *domain_data, + void *chip_data) +{ + struct irq_domain *domain, *parent = dev->msi.domain; + const struct msi_parent_ops *pops; + struct msi_domain_template *bundle; + struct fwnode_handle *fwnode; + + if (!irq_domain_is_msi_parent(parent)) + return false; + + if (domid >= MSI_MAX_DEVICE_IRQDOMAINS) + return false; + + bundle = kmemdup(template, sizeof(*bundle), GFP_KERNEL); + if (!bundle) + return false; + + bundle->info.hwsize = hwsize; + bundle->info.chip = &bundle->chip; + bundle->info.ops = &bundle->ops; + bundle->info.data = domain_data; + bundle->info.chip_data = chip_data; + + pops = parent->msi_parent_ops; + snprintf(bundle->name, sizeof(bundle->name), "%s%s-%s", + pops->prefix ? : "", bundle->chip.name, dev_name(dev)); + bundle->chip.name = bundle->name; + + fwnode = irq_domain_alloc_named_fwnode(bundle->name); + if (!fwnode) + goto free_bundle; + + if (msi_setup_device_data(dev)) + goto free_fwnode; + + msi_lock_descs(dev); + + if (WARN_ON_ONCE(msi_get_device_domain(dev, domid))) + goto fail; + + if (!pops->init_dev_msi_info(dev, parent, parent, &bundle->info)) + goto fail; + + domain = __msi_create_irq_domain(fwnode, &bundle->info, IRQ_DOMAIN_FLAG_MSI_DEVICE, parent); + if (!domain) + goto fail; + + domain->dev = dev; + dev->msi.data->__domains[domid].domain = domain; + msi_unlock_descs(dev); + return true; + +fail: + msi_unlock_descs(dev); +free_fwnode: + kfree(fwnode); +free_bundle: + kfree(bundle); + return false; +} + +/** + * msi_remove_device_irq_domain - Free a device MSI interrupt domain + * @dev: Pointer to the device + * @domid: Domain id + */ +void msi_remove_device_irq_domain(struct device *dev, unsigned int domid) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + + msi_lock_descs(dev); + + domain = msi_get_device_domain(dev, domid); + + if (!domain || !irq_domain_is_msi_device(domain)) + goto unlock; + + dev->msi.data->__domains[domid].domain = NULL; + info = domain->host_data; + irq_domain_remove(domain); + kfree(container_of(info, struct msi_domain_template, info)); + +unlock: + msi_unlock_descs(dev); +} + int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) { From 26e91b75bf6108550035355c835bf0c93c885b61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:57 +0100 Subject: [PATCH 106/134] genirq/msi: Provide msi_match_device_domain() Provide an interface to match a per device domain bus token. This allows to query which type of domain is installed for a particular domain id. Will be used for PCI to avoid frequent create/remove cycles for the MSI resp. MSI-X domains. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.738047902@linutronix.de --- include/linux/msi.h | 3 +++ kernel/irq/msi.c | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/include/linux/msi.h b/include/linux/msi.h index ef46a3ee18e4..b4ab00562a29 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -553,6 +553,9 @@ bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, void *chip_data); void msi_remove_device_irq_domain(struct device *dev, unsigned int domid); +bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, + enum irq_domain_bus_token bus_token); + int msi_domain_alloc_irqs_range_locked(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 8b415bdb0425..74499980cfc2 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -986,6 +986,31 @@ unlock: msi_unlock_descs(dev); } +/** + * msi_match_device_irq_domain - Match a device irq domain against a bus token + * @dev: Pointer to the device + * @domid: Domain id + * @bus_token: Bus token to match against the domain bus token + * + * Return: True if device domain exists and bus tokens match. + */ +bool msi_match_device_irq_domain(struct device *dev, unsigned int domid, + enum irq_domain_bus_token bus_token) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + bool ret = false; + + msi_lock_descs(dev); + domain = msi_get_device_domain(dev, domid); + if (domain && irq_domain_is_msi_device(domain)) { + info = domain->host_data; + ret = info->bus_token == bus_token; + } + msi_unlock_descs(dev); + return ret; +} + int msi_domain_prepare_irqs(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) { From 36db3d9003ea85217b357a658cf7b37920c2c38e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:25:59 +0100 Subject: [PATCH 107/134] genirq/msi: Add range checking to msi_insert_desc() Per device domains provide the real domain size to the core code. This allows range checking on insertion of MSI descriptors and also paves the way for dynamic index allocations which are required e.g. for IMS. This avoids external mechanisms like bitmaps on the device side and just utilizes the core internal MSI descriptor storxe for it. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.798556374@linutronix.de --- kernel/irq/msi.c | 53 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 74499980cfc2..ae5869222fe5 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -40,6 +40,7 @@ struct msi_ctrl { #define MSI_XA_DOMAIN_SIZE (MSI_MAX_INDEX + 1) static void msi_domain_free_locked(struct device *dev, struct msi_ctrl *ctrl); +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid); static inline int msi_sysfs_create_group(struct device *dev); @@ -80,16 +81,28 @@ static void msi_free_desc(struct msi_desc *desc) kfree(desc); } -static int msi_insert_desc(struct msi_device_data *md, struct msi_desc *desc, +static int msi_insert_desc(struct device *dev, struct msi_desc *desc, unsigned int domid, unsigned int index) { + struct msi_device_data *md = dev->msi.data; struct xarray *xa = &md->__domains[domid].store; + unsigned int hwsize; int ret; + hwsize = msi_domain_get_hwsize(dev, domid); + if (index >= hwsize) { + ret = -ERANGE; + goto fail; + } + desc->msi_index = index; ret = xa_insert(xa, index, desc, GFP_KERNEL); if (ret) - msi_free_desc(desc); + goto fail; + return 0; + +fail: + msi_free_desc(desc); return ret; } @@ -117,7 +130,7 @@ int msi_domain_insert_msi_desc(struct device *dev, unsigned int domid, /* Copy type specific data to the new descriptor. */ desc->pci = init_desc->pci; - return msi_insert_desc(dev->msi.data, desc, domid, init_desc->msi_index); + return msi_insert_desc(dev, desc, domid, init_desc->msi_index); } static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) @@ -136,11 +149,16 @@ static bool msi_desc_match(struct msi_desc *desc, enum msi_desc_filter filter) static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl) { + unsigned int hwsize; + if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS || - !dev->msi.data->__domains[ctrl->domid].domain || - ctrl->first > ctrl->last || - ctrl->first > MSI_MAX_INDEX || - ctrl->last > MSI_MAX_INDEX)) + !dev->msi.data->__domains[ctrl->domid].domain)) + return false; + + hwsize = msi_domain_get_hwsize(dev, ctrl->domid); + if (WARN_ON_ONCE(ctrl->first > ctrl->last || + ctrl->first >= hwsize || + ctrl->last >= hwsize)) return false; return true; } @@ -208,7 +226,7 @@ static int msi_domain_add_simple_msi_descs(struct device *dev, struct msi_ctrl * desc = msi_alloc_desc(dev, 1, NULL); if (!desc) goto fail_mem; - ret = msi_insert_desc(dev->msi.data, desc, ctrl->domid, idx); + ret = msi_insert_desc(dev, desc, ctrl->domid, idx); if (ret) goto fail; } @@ -568,6 +586,20 @@ static struct irq_domain *msi_get_device_domain(struct device *dev, unsigned int return domain; } +static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid) +{ + struct msi_domain_info *info; + struct irq_domain *domain; + + domain = msi_get_device_domain(dev, domid); + if (domain) { + info = domain->host_data; + return info->hwsize; + } + /* No domain, no size... */ + return 0; +} + static inline void irq_chip_write_msi_msg(struct irq_data *data, struct msi_msg *msg) { @@ -1356,7 +1388,7 @@ int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int struct msi_ctrl ctrl = { .domid = domid, .first = 0, - .last = MSI_MAX_INDEX, + .last = msi_domain_get_hwsize(dev, domid) - 1, .nirqs = nirqs, }; @@ -1470,7 +1502,8 @@ void msi_domain_free_irqs_range(struct device *dev, unsigned int domid, */ void msi_domain_free_irqs_all_locked(struct device *dev, unsigned int domid) { - msi_domain_free_irqs_range_locked(dev, domid, 0, MSI_MAX_INDEX); + msi_domain_free_irqs_range_locked(dev, domid, 0, + msi_domain_get_hwsize(dev, domid) - 1); } /** From 877d6c4e93f5091bfa52549bde8fb9ce71d6f7e5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:00 +0100 Subject: [PATCH 108/134] PCI/MSI: Split __pci_write_msi_msg() The upcoming per device MSI domains will create different domains for MSI and MSI-X. Split the write message function into MSI and MSI-X helpers so they can be used by those new domain functions seperately. Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.857982142@linutronix.de --- drivers/pci/msi/msi.c | 104 ++++++++++++++++++++++-------------------- 1 file changed, 54 insertions(+), 50 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index d107bde58bd6..76a3d447e947 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -180,6 +180,58 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg) } } +static inline void pci_write_msg_msi(struct pci_dev *dev, struct msi_desc *desc, + struct msi_msg *msg) +{ + int pos = dev->msi_cap; + u16 msgctl; + + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + msgctl &= ~PCI_MSI_FLAGS_QSIZE; + msgctl |= desc->pci.msi_attrib.multiple << 4; + pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); + + pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, msg->address_lo); + if (desc->pci.msi_attrib.is_64) { + pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, msg->address_hi); + pci_write_config_word(dev, pos + PCI_MSI_DATA_64, msg->data); + } else { + pci_write_config_word(dev, pos + PCI_MSI_DATA_32, msg->data); + } + /* Ensure that the writes are visible in the device */ + pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); +} + +static inline void pci_write_msg_msix(struct msi_desc *desc, struct msi_msg *msg) +{ + void __iomem *base = pci_msix_desc_addr(desc); + u32 ctrl = desc->pci.msix_ctrl; + bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); + + if (desc->pci.msi_attrib.is_virtual) + return; + /* + * The specification mandates that the entry is masked + * when the message is modified: + * + * "If software changes the Address or Data value of an + * entry while the entry is unmasked, the result is + * undefined." + */ + if (unmasked) + pci_msix_write_vector_ctrl(desc, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT); + + writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); + writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); + writel(msg->data, base + PCI_MSIX_ENTRY_DATA); + + if (unmasked) + pci_msix_write_vector_ctrl(desc, ctrl); + + /* Ensure that the writes are visible in the device */ + readl(base + PCI_MSIX_ENTRY_DATA); +} + void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) { struct pci_dev *dev = msi_desc_to_pci_dev(entry); @@ -187,63 +239,15 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) if (dev->current_state != PCI_D0 || pci_dev_is_disconnected(dev)) { /* Don't touch the hardware now */ } else if (entry->pci.msi_attrib.is_msix) { - void __iomem *base = pci_msix_desc_addr(entry); - u32 ctrl = entry->pci.msix_ctrl; - bool unmasked = !(ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT); - - if (entry->pci.msi_attrib.is_virtual) - goto skip; - - /* - * The specification mandates that the entry is masked - * when the message is modified: - * - * "If software changes the Address or Data value of an - * entry while the entry is unmasked, the result is - * undefined." - */ - if (unmasked) - pci_msix_write_vector_ctrl(entry, ctrl | PCI_MSIX_ENTRY_CTRL_MASKBIT); - - writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR); - writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR); - writel(msg->data, base + PCI_MSIX_ENTRY_DATA); - - if (unmasked) - pci_msix_write_vector_ctrl(entry, ctrl); - - /* Ensure that the writes are visible in the device */ - readl(base + PCI_MSIX_ENTRY_DATA); + pci_write_msg_msix(entry, msg); } else { - int pos = dev->msi_cap; - u16 msgctl; - - pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); - msgctl &= ~PCI_MSI_FLAGS_QSIZE; - msgctl |= entry->pci.msi_attrib.multiple << 4; - pci_write_config_word(dev, pos + PCI_MSI_FLAGS, msgctl); - - pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_LO, - msg->address_lo); - if (entry->pci.msi_attrib.is_64) { - pci_write_config_dword(dev, pos + PCI_MSI_ADDRESS_HI, - msg->address_hi); - pci_write_config_word(dev, pos + PCI_MSI_DATA_64, - msg->data); - } else { - pci_write_config_word(dev, pos + PCI_MSI_DATA_32, - msg->data); - } - /* Ensure that the writes are visible in the device */ - pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl); + pci_write_msg_msi(dev, entry, msg); } -skip: entry->msg = *msg; if (entry->write_msi_msg) entry->write_msi_msg(entry, entry->write_msi_msg_data); - } void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg) From bd141a3db40c877e01de8e981edb57c03199d876 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:02 +0100 Subject: [PATCH 109/134] genirq/msi: Provide BUS_DEVICE_PCI_MSI[X] Provide new bus tokens for the upcoming per device PCI/MSI and PCI/MSIX interrupt domains. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.917219885@linutronix.de --- include/linux/irqdomain_defs.h | 2 ++ kernel/irq/msi.c | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index 69035b4f740a..b3f4b7ef31f1 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -21,6 +21,8 @@ enum irq_domain_bus_token { DOMAIN_BUS_TI_SCI_INTA_MSI, DOMAIN_BUS_WAKEUP, DOMAIN_BUS_VMD_MSI, + DOMAIN_BUS_PCI_DEVICE_MSI, + DOMAIN_BUS_PCI_DEVICE_MSIX, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index ae5869222fe5..0749e661e459 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1118,6 +1118,8 @@ static bool msi_check_reservation_mode(struct irq_domain *domain, switch(domain->bus_token) { case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: case DOMAIN_BUS_VMD_MSI: break; default: @@ -1143,6 +1145,8 @@ static int msi_handle_pci_fail(struct irq_domain *domain, struct msi_desc *desc, { switch(domain->bus_token) { case DOMAIN_BUS_PCI_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: case DOMAIN_BUS_VMD_MSI: if (IS_ENABLED(CONFIG_PCI_MSI)) break; From 15c72f824b32761696b1854500bb3dedccbbb45a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:04 +0100 Subject: [PATCH 110/134] PCI/MSI: Add support for per device MSI[X] domains Provide a template and the necessary callbacks to create PCI/MSI and PCI/MSI-X domains. The domains are created when MSI or MSI-X is enabled. The domain's lifetime is either the device lifetime or in case that e.g. MSI-X was tried first and failed, then the MSI-X domain is removed and a MSI domain is created as both are mutually exclusive and reside in the default domain ID slot of the per device domain pointer array. Also expand pci_msi_domain_supports() to handle feature checks correctly even in the case that the per device domain was not yet created by checking the features supported by the MSI parent. Add the necessary setup calls into the MSI and MSI-X enable code path. These setup calls are backwards compatible. They return success when there is no parent domain found, which means the existing global domains or the legacy allocation path keep just working. Co-developed-by: Ahmed S. Darwish Signed-off-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232325.975388241@linutronix.de --- drivers/pci/msi/irqdomain.c | 188 +++++++++++++++++++++++++++++++++++- drivers/pci/msi/msi.c | 16 ++- drivers/pci/msi/msi.h | 2 + 3 files changed, 201 insertions(+), 5 deletions(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index f4338fb42c2d..be3d50ffc093 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -139,6 +139,170 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, } EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain); +/* + * Per device MSI[-X] domain functionality + */ +static void pci_device_domain_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc) +{ + arg->desc = desc; + arg->hwirq = desc->msi_index; +} + +static void pci_irq_mask_msi(struct irq_data *data) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + + pci_msi_mask(desc, BIT(data->irq - desc->irq)); +} + +static void pci_irq_unmask_msi(struct irq_data *data) +{ + struct msi_desc *desc = irq_data_get_msi_desc(data); + + pci_msi_unmask(desc, BIT(data->irq - desc->irq)); +} + +#ifdef CONFIG_GENERIC_IRQ_RESERVATION_MODE +# define MSI_REACTIVATE MSI_FLAG_MUST_REACTIVATE +#else +# define MSI_REACTIVATE 0 +#endif + +#define MSI_COMMON_FLAGS (MSI_FLAG_FREE_MSI_DESCS | \ + MSI_FLAG_ACTIVATE_EARLY | \ + MSI_FLAG_DEV_SYSFS | \ + MSI_REACTIVATE) + +static const struct msi_domain_template pci_msi_template = { + .chip = { + .name = "PCI-MSI", + .irq_mask = pci_irq_mask_msi, + .irq_unmask = pci_irq_unmask_msi, + .irq_write_msi_msg = pci_msi_domain_write_msg, + .flags = IRQCHIP_ONESHOT_SAFE, + }, + + .ops = { + .set_desc = pci_device_domain_set_desc, + }, + + .info = { + .flags = MSI_COMMON_FLAGS | MSI_FLAG_MULTI_PCI_MSI, + .bus_token = DOMAIN_BUS_PCI_DEVICE_MSI, + }, +}; + +static void pci_irq_mask_msix(struct irq_data *data) +{ + pci_msix_mask(irq_data_get_msi_desc(data)); +} + +static void pci_irq_unmask_msix(struct irq_data *data) +{ + pci_msix_unmask(irq_data_get_msi_desc(data)); +} + +static const struct msi_domain_template pci_msix_template = { + .chip = { + .name = "PCI-MSIX", + .irq_mask = pci_irq_mask_msix, + .irq_unmask = pci_irq_unmask_msix, + .irq_write_msi_msg = pci_msi_domain_write_msg, + .flags = IRQCHIP_ONESHOT_SAFE, + }, + + .ops = { + .set_desc = pci_device_domain_set_desc, + }, + + .info = { + .flags = MSI_COMMON_FLAGS | MSI_FLAG_PCI_MSIX, + .bus_token = DOMAIN_BUS_PCI_DEVICE_MSIX, + }, +}; + +static bool pci_match_device_domain(struct pci_dev *pdev, enum irq_domain_bus_token bus_token) +{ + return msi_match_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN, bus_token); +} + +static bool pci_create_device_domain(struct pci_dev *pdev, const struct msi_domain_template *tmpl, + unsigned int hwsize) +{ + struct irq_domain *domain = dev_get_msi_domain(&pdev->dev); + + if (!domain || !irq_domain_is_msi_parent(domain)) + return true; + + return msi_create_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN, tmpl, + hwsize, NULL, NULL); +} + +/** + * pci_setup_msi_device_domain - Setup a device MSI interrupt domain + * @pdev: The PCI device to create the domain on + * + * Return: + * True when: + * - The device does not have a MSI parent irq domain associated, + * which keeps the legacy architecture specific and the global + * PCI/MSI domain models working + * - The MSI domain exists already + * - The MSI domain was successfully allocated + * False when: + * - MSI-X is enabled + * - The domain creation fails. + * + * The created MSI domain is preserved until: + * - The device is removed + * - MSI is disabled and a MSI-X domain is created + */ +bool pci_setup_msi_device_domain(struct pci_dev *pdev) +{ + if (WARN_ON_ONCE(pdev->msix_enabled)) + return false; + + if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSI)) + return true; + if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSIX)) + msi_remove_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN); + + return pci_create_device_domain(pdev, &pci_msi_template, 1); +} + +/** + * pci_setup_msix_device_domain - Setup a device MSI-X interrupt domain + * @pdev: The PCI device to create the domain on + * @hwsize: The size of the MSI-X vector table + * + * Return: + * True when: + * - The device does not have a MSI parent irq domain associated, + * which keeps the legacy architecture specific and the global + * PCI/MSI domain models working + * - The MSI-X domain exists already + * - The MSI-X domain was successfully allocated + * False when: + * - MSI is enabled + * - The domain creation fails. + * + * The created MSI-X domain is preserved until: + * - The device is removed + * - MSI-X is disabled and a MSI domain is created + */ +bool pci_setup_msix_device_domain(struct pci_dev *pdev, unsigned int hwsize) +{ + if (WARN_ON_ONCE(pdev->msi_enabled)) + return false; + + if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSIX)) + return true; + if (pci_match_device_domain(pdev, DOMAIN_BUS_PCI_DEVICE_MSI)) + msi_remove_device_irq_domain(&pdev->dev, MSI_DEFAULT_DOMAIN); + + return pci_create_device_domain(pdev, &pci_msix_template, hwsize); +} + /** * pci_msi_domain_supports - Check for support of a particular feature flag * @pdev: The PCI device to operate on @@ -152,13 +316,33 @@ bool pci_msi_domain_supports(struct pci_dev *pdev, unsigned int feature_mask, { struct msi_domain_info *info; struct irq_domain *domain; + unsigned int supported; domain = dev_get_msi_domain(&pdev->dev); if (!domain || !irq_domain_is_hierarchy(domain)) return mode == ALLOW_LEGACY; - info = domain->host_data; - return (info->flags & feature_mask) == feature_mask; + + if (!irq_domain_is_msi_parent(domain)) { + /* + * For "global" PCI/MSI interrupt domains the associated + * msi_domain_info::flags is the authoritive source of + * information. + */ + info = domain->host_data; + supported = info->flags; + } else { + /* + * For MSI parent domains the supported feature set + * is avaliable in the parent ops. This makes checks + * possible before actually instantiating the + * per device domain because the parent is never + * expanding the PCI/MSI functionality. + */ + supported = domain->msi_parent_ops->supported_flags; + } + + return (supported & feature_mask) == feature_mask; } /* diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index 76a3d447e947..b8d74df3817e 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -436,6 +436,9 @@ int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec, if (rc) return rc; + if (!pci_setup_msi_device_domain(dev)) + return -ENODEV; + for (;;) { if (affd) { nvec = irq_calc_affinity_vectors(minvec, nvec, affd); @@ -787,9 +790,13 @@ int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int if (!pci_msix_validate_entries(dev, entries, nvec, hwsize)) return -EINVAL; - /* PCI_IRQ_VIRTUAL is a horrible hack! */ - if (nvec > hwsize && !(flags & PCI_IRQ_VIRTUAL)) - nvec = hwsize; + if (hwsize < nvec) { + /* Keep the IRQ virtual hackery working */ + if (flags & PCI_IRQ_VIRTUAL) + hwsize = nvec; + else + nvec = hwsize; + } if (nvec < minvec) return -ENOSPC; @@ -798,6 +805,9 @@ int __pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int if (rc) return rc; + if (!pci_setup_msix_device_domain(dev, hwsize)) + return -ENODEV; + for (;;) { if (affd) { nvec = irq_calc_affinity_vectors(minvec, nvec, affd); diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index 9d75b6fb3e0c..74408cc689c6 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -105,6 +105,8 @@ enum support_mode { }; bool pci_msi_domain_supports(struct pci_dev *dev, unsigned int feature_mask, enum support_mode mode); +bool pci_setup_msi_device_domain(struct pci_dev *pdev); +bool pci_setup_msix_device_domain(struct pci_dev *pdev, unsigned int hwsize); /* Legacy (!IRQDOMAIN) fallbacks */ From b6d5fc3a5245c65f7c83440460a1566d09cc9038 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:05 +0100 Subject: [PATCH 111/134] x86/apic/vector: Provide MSI parent domain Enable MSI parent domain support in the x86 vector domain and fixup the checks in the iommu implementations to check whether device::msi::domain is the default MSI parent domain. That keeps the existing logic to protect e.g. devices behind VMD working. The interrupt remap PCI/MSI code still works because the underlying vector domain still provides the same functionality. None of the other x86 PCI/MSI, e.g. XEN and HyperV, implementations are affected either. They still work the same way both at the low level and the PCI/MSI implementations they provide. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.034672592@linutronix.de --- arch/x86/include/asm/msi.h | 6 + arch/x86/include/asm/pci.h | 1 + arch/x86/kernel/apic/msi.c | 176 ++++++++++++++++++++-------- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/intel/irq_remapping.c | 2 +- 5 files changed, 138 insertions(+), 49 deletions(-) diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index d71c7e8b738d..770295887e56 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -62,4 +62,10 @@ typedef struct x86_msi_addr_hi { struct msi_msg; u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid); +#define X86_VECTOR_MSI_FLAGS_SUPPORTED \ + (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX) + +#define X86_VECTOR_MSI_FLAGS_REQUIRED \ + (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS) + #endif /* _ASM_X86_MSI_H */ diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index c4789de0da09..b40c462b4af3 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -92,6 +92,7 @@ void pcibios_scan_root(int bus); struct irq_routing_table *pcibios_get_irq_routing_table(void); int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); +bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev); #define HAVE_PCI_MMAP #define arch_can_pci_mmap_wc() pat_enabled() diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 71c87513327e..db96bfcf61a3 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -142,67 +142,131 @@ msi_set_affinity(struct irq_data *irqd, const struct cpumask *mask, bool force) return ret; } -/* - * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices, - * which implement the MSI or MSI-X Capability Structure. +/** + * pci_dev_has_default_msi_parent_domain - Check whether the device has the default + * MSI parent domain associated + * @dev: Pointer to the PCI device */ -static struct irq_chip pci_msi_controller = { - .name = "PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, - .irq_ack = irq_chip_ack_parent, - .irq_retrigger = irq_chip_retrigger_hierarchy, - .irq_set_affinity = msi_set_affinity, - .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_AFFINITY_PRE_STARTUP, -}; - -int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, - msi_alloc_info_t *arg) +bool pci_dev_has_default_msi_parent_domain(struct pci_dev *dev) { - init_irq_alloc_info(arg, NULL); - if (to_pci_dev(dev)->msix_enabled) - arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; - else - arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; + struct irq_domain *domain = dev_get_msi_domain(&dev->dev); - return 0; + if (!domain) + domain = dev_get_msi_domain(&dev->bus->dev); + if (!domain) + return false; + + return domain == x86_vector_domain; } -EXPORT_SYMBOL_GPL(pci_msi_prepare); -static struct msi_domain_ops pci_msi_domain_ops = { - .msi_prepare = pci_msi_prepare, -}; +/** + * x86_msi_prepare - Setup of msi_alloc_info_t for allocations + * @domain: The domain for which this setup happens + * @dev: The device for which interrupts are allocated + * @nvec: The number of vectors to allocate + * @alloc: The allocation info structure to initialize + * + * This function is to be used for all types of MSI domains above the x86 + * vector domain and any intermediates. It is always invoked from the + * top level interrupt domain. The domain specific allocation + * functionality is determined via the @domain's bus token which allows to + * map the X86 specific allocation type. + */ +static int x86_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *alloc) +{ + struct msi_domain_info *info = domain->host_data; -static struct msi_domain_info pci_msi_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_PCI_MSIX | MSI_FLAG_NOMASK_QUIRK, + init_irq_alloc_info(alloc, NULL); - .ops = &pci_msi_domain_ops, - .chip = &pci_msi_controller, - .handler = handle_edge_irq, - .handler_name = "edge", + switch (info->bus_token) { + case DOMAIN_BUS_PCI_DEVICE_MSI: + alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; + return 0; + case DOMAIN_BUS_PCI_DEVICE_MSIX: + alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; + return 0; + default: + return -EINVAL; + } +} + +/** + * x86_init_dev_msi_info - Domain info setup for MSI domains + * @dev: The device for which the domain should be created + * @domain: The (root) domain providing this callback + * @real_parent: The real parent domain of the to initialize domain + * @info: The domain info for the to initialize domain + * + * This function is to be used for all types of MSI domains above the x86 + * vector domain and any intermediates. The domain specific functionality + * is determined via the @real_parent. + */ +static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, struct msi_domain_info *info) +{ + const struct msi_parent_ops *pops = real_parent->msi_parent_ops; + + /* MSI parent domain specific settings */ + switch (real_parent->bus_token) { + case DOMAIN_BUS_ANY: + /* Only the vector domain can have the ANY token */ + if (WARN_ON_ONCE(domain != real_parent)) + return false; + info->chip->irq_set_affinity = msi_set_affinity; + /* See msi_set_affinity() for the gory details */ + info->flags |= MSI_FLAG_NOMASK_QUIRK; + break; + default: + WARN_ON_ONCE(1); + return false; + } + + /* Is the target supported? */ + switch(info->bus_token) { + case DOMAIN_BUS_PCI_DEVICE_MSI: + case DOMAIN_BUS_PCI_DEVICE_MSIX: + break; + default: + WARN_ON_ONCE(1); + return false; + } + + /* + * Mask out the domain specific MSI feature flags which are not + * supported by the real parent. + */ + info->flags &= pops->supported_flags; + /* Enforce the required flags */ + info->flags |= X86_VECTOR_MSI_FLAGS_REQUIRED; + + /* This is always invoked from the top level MSI domain! */ + info->ops->msi_prepare = x86_msi_prepare; + + info->chip->irq_ack = irq_chip_ack_parent; + info->chip->irq_retrigger = irq_chip_retrigger_hierarchy; + info->chip->flags |= IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_AFFINITY_PRE_STARTUP; + + info->handler = handle_edge_irq; + info->handler_name = "edge"; + + return true; +} + +static const struct msi_parent_ops x86_vector_msi_parent_ops = { + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED, + .init_dev_msi_info = x86_init_dev_msi_info, }; struct irq_domain * __init native_create_pci_msi_domain(void) { - struct fwnode_handle *fn; - struct irq_domain *d; - if (disable_apic) return NULL; - fn = irq_domain_alloc_named_fwnode("PCI-MSI"); - if (!fn) - return NULL; - - d = pci_msi_create_irq_domain(fn, &pci_msi_domain_info, - x86_vector_domain); - if (!d) { - irq_domain_free_fwnode(fn); - pr_warn("Failed to initialize PCI-MSI irqdomain.\n"); - } - return d; + x86_vector_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + x86_vector_domain->msi_parent_ops = &x86_vector_msi_parent_ops; + return x86_vector_domain; } void __init x86_create_pci_msi_domain(void) @@ -210,7 +274,25 @@ void __init x86_create_pci_msi_domain(void) x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain(); } +/* Keep around for hyperV and the remap code below */ +int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, + msi_alloc_info_t *arg) +{ + init_irq_alloc_info(arg, NULL); + + if (to_pci_dev(dev)->msix_enabled) + arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; + else + arg->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; + return 0; +} +EXPORT_SYMBOL_GPL(pci_msi_prepare); + #ifdef CONFIG_IRQ_REMAP +static struct msi_domain_ops pci_msi_domain_ops = { + .msi_prepare = pci_msi_prepare, +}; + static struct irq_chip pci_msi_ir_controller = { .name = "IR-PCI-MSI", .irq_unmask = pci_msi_unmask_irq, diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 72dfe57f5802..67e209c2b7b1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -812,7 +812,7 @@ static void amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) { if (!irq_remapping_enabled || !dev_is_pci(dev) || - pci_dev_has_special_msi_domain(to_pci_dev(dev))) + !pci_dev_has_default_msi_parent_domain(to_pci_dev(dev))) return; dev_set_msi_domain(dev, iommu->msi_domain); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index a914eba787ac..08bbf08f4b03 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -1107,7 +1107,7 @@ error: */ void intel_irq_remap_add_device(struct dmar_pci_notify_info *info) { - if (!irq_remapping_enabled || pci_dev_has_special_msi_domain(info->dev)) + if (!irq_remapping_enabled || !pci_dev_has_default_msi_parent_domain(info->dev)) return; dev_set_msi_domain(&info->dev->dev, map_dev_to_ir(info->dev)); From 45c0402457c1ed2f07ee32dc129ae710e0dc288c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:07 +0100 Subject: [PATCH 112/134] PCI/MSI: Remove unused pci_dev_has_special_msi_domain() The check for special MSI domains like VMD which prevents the interrupt remapping code to overwrite device::msi::domain is not longer required and has been replaced by an x86 specific version which is aware of MSI parent domains. Remove it. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.093093200@linutronix.de --- drivers/pci/msi/irqdomain.c | 21 --------------------- include/linux/msi.h | 1 - 2 files changed, 22 deletions(-) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index be3d50ffc093..473640381f92 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -414,24 +414,3 @@ struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) DOMAIN_BUS_PCI_MSI); return dom; } - -/** - * pci_dev_has_special_msi_domain - Check whether the device is handled by - * a non-standard PCI-MSI domain - * @pdev: The PCI device to check. - * - * Returns: True if the device irqdomain or the bus irqdomain is - * non-standard PCI/MSI. - */ -bool pci_dev_has_special_msi_domain(struct pci_dev *pdev) -{ - struct irq_domain *dom = dev_get_msi_domain(&pdev->dev); - - if (!dom) - dom = dev_get_msi_domain(&pdev->bus->dev); - - if (!dom) - return true; - - return dom->bus_token != DOMAIN_BUS_PCI_MSI; -} diff --git a/include/linux/msi.h b/include/linux/msi.h index b4ab00562a29..b5dda4bf4156 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -617,7 +617,6 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct irq_domain *parent); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); -bool pci_dev_has_special_msi_domain(struct pci_dev *pdev); #else /* CONFIG_PCI_MSI */ static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) { From 9a945234abea27d45f8d89e1a1b35ab5bf41dd01 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:08 +0100 Subject: [PATCH 113/134] iommu/vt-d: Switch to MSI parent domains Remove the global PCI/MSI irqdomain implementation and provide the required MSI parent ops so the PCI/MSI code can detect the new parent and setup per device domains. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.151226317@linutronix.de --- arch/x86/kernel/apic/msi.c | 2 ++ drivers/iommu/intel/iommu.h | 1 - drivers/iommu/intel/irq_remapping.c | 27 ++++++++++++--------------- include/linux/irqdomain_defs.h | 1 + 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index db96bfcf61a3..a8dccb0f8c74 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -217,6 +217,8 @@ static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, /* See msi_set_affinity() for the gory details */ info->flags |= MSI_FLAG_NOMASK_QUIRK; break; + case DOMAIN_BUS_DMAR: + break; default: WARN_ON_ONCE(1); return false; diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 92023dff9513..6eadb86b9d19 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -600,7 +600,6 @@ struct intel_iommu { #ifdef CONFIG_IRQ_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ struct irq_domain *ir_domain; - struct irq_domain *ir_msi_domain; #endif struct iommu_device iommu; /* IOMMU core code handle */ int node; diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 08bbf08f4b03..6fab4076a9e2 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -82,6 +82,7 @@ static const struct irq_domain_ops intel_ir_domain_ops; static void iommu_disable_irq_remapping(struct intel_iommu *iommu); static int __init parse_ioapics_under_ir(void); +static const struct msi_parent_ops dmar_msi_parent_ops; static bool ir_pre_enabled(struct intel_iommu *iommu) { @@ -230,7 +231,7 @@ static struct irq_domain *map_dev_to_ir(struct pci_dev *dev) { struct dmar_drhd_unit *drhd = dmar_find_matched_drhd_unit(dev); - return drhd ? drhd->iommu->ir_msi_domain : NULL; + return drhd ? drhd->iommu->ir_domain : NULL; } static int clear_entries(struct irq_2_iommu *irq_iommu) @@ -573,10 +574,10 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id); goto out_free_fwnode; } - iommu->ir_msi_domain = - arch_create_remap_msi_irq_domain(iommu->ir_domain, - "INTEL-IR-MSI", - iommu->seq_id); + + irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; ir_table->base = page_address(pages); ir_table->bitmap = bitmap; @@ -620,9 +621,6 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) return 0; out_free_ir_domain: - if (iommu->ir_msi_domain) - irq_domain_remove(iommu->ir_msi_domain); - iommu->ir_msi_domain = NULL; irq_domain_remove(iommu->ir_domain); iommu->ir_domain = NULL; out_free_fwnode: @@ -644,13 +642,6 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu) struct fwnode_handle *fn; if (iommu && iommu->ir_table) { - if (iommu->ir_msi_domain) { - fn = iommu->ir_msi_domain->fwnode; - - irq_domain_remove(iommu->ir_msi_domain); - irq_domain_free_fwnode(fn); - iommu->ir_msi_domain = NULL; - } if (iommu->ir_domain) { fn = iommu->ir_domain->fwnode; @@ -1437,6 +1428,12 @@ static const struct irq_domain_ops intel_ir_domain_ops = { .deactivate = intel_irq_remapping_deactivate, }; +static const struct msi_parent_ops dmar_msi_parent_ops = { + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, + .prefix = "IR-", + .init_dev_msi_info = msi_parent_init_dev_msi_info, +}; + /* * Support of Interrupt Remapping Unit Hotplug */ diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index b3f4b7ef31f1..3a09396ac88d 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -23,6 +23,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_VMD_MSI, DOMAIN_BUS_PCI_DEVICE_MSI, DOMAIN_BUS_PCI_DEVICE_MSIX, + DOMAIN_BUS_DMAR, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ From cc7594ffadde77e2825faf1c576230530c829bc3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:10 +0100 Subject: [PATCH 114/134] iommu/amd: Switch to MSI base domains Remove the global PCI/MSI irqdomain implementation and provide the required MSI parent ops so the PCI/MSI code can detect the new parent and setup per device domains. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.209212272@linutronix.de --- arch/x86/kernel/apic/msi.c | 1 + drivers/iommu/amd/amd_iommu_types.h | 1 - drivers/iommu/amd/iommu.c | 19 +++++++++++++------ include/linux/irqdomain_defs.h | 1 + 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index a8dccb0f8c74..d198da3ddbf1 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -218,6 +218,7 @@ static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, info->flags |= MSI_FLAG_NOMASK_QUIRK; break; case DOMAIN_BUS_DMAR: + case DOMAIN_BUS_AMDVI: break; default: WARN_ON_ONCE(1); diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 1d0a70c85333..3d684190b4d5 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -734,7 +734,6 @@ struct amd_iommu { u8 max_counters; #ifdef CONFIG_IRQ_REMAP struct irq_domain *ir_domain; - struct irq_domain *msi_domain; struct amd_irte_ops *irte_ops; #endif diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 67e209c2b7b1..7caccd8996f9 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -815,7 +815,7 @@ amd_iommu_set_pci_msi_domain(struct device *dev, struct amd_iommu *iommu) !pci_dev_has_default_msi_parent_domain(to_pci_dev(dev))) return; - dev_set_msi_domain(dev, iommu->msi_domain); + dev_set_msi_domain(dev, iommu->ir_domain); } #else /* CONFIG_IRQ_REMAP */ @@ -3648,6 +3648,12 @@ static struct irq_chip amd_ir_chip = { .irq_compose_msi_msg = ir_compose_msi_msg, }; +static const struct msi_parent_ops amdvi_msi_parent_ops = { + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, + .prefix = "IR-", + .init_dev_msi_info = msi_parent_init_dev_msi_info, +}; + int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { struct fwnode_handle *fn; @@ -3655,16 +3661,17 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) fn = irq_domain_alloc_named_id_fwnode("AMD-IR", iommu->index); if (!fn) return -ENOMEM; - iommu->ir_domain = irq_domain_create_tree(fn, &amd_ir_domain_ops, iommu); + iommu->ir_domain = irq_domain_create_hierarchy(arch_get_ir_parent_domain(), 0, 0, + fn, &amd_ir_domain_ops, iommu); if (!iommu->ir_domain) { irq_domain_free_fwnode(fn); return -ENOMEM; } - iommu->ir_domain->parent = arch_get_ir_parent_domain(); - iommu->msi_domain = arch_create_remap_msi_irq_domain(iommu->ir_domain, - "AMD-IR-MSI", - iommu->index); + irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); + iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; + iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; + return 0; } diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index 3a09396ac88d..0b2d8a83e0d8 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -24,6 +24,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_PCI_DEVICE_MSI, DOMAIN_BUS_PCI_DEVICE_MSIX, DOMAIN_BUS_DMAR, + DOMAIN_BUS_AMDVI, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ From 4d5a4ccc519ab0a62e220dc8dcd8bc1c5f8fee10 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:12 +0100 Subject: [PATCH 115/134] x86/apic/msi: Remove arch_create_remap_msi_irq_domain() and related code which is not longer required now that the interrupt remap code has been converted to MSI parent domains. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.267353814@linutronix.de --- arch/x86/include/asm/irq_remapping.h | 4 --- arch/x86/kernel/apic/msi.c | 42 +--------------------------- 2 files changed, 1 insertion(+), 45 deletions(-) diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 7cc49432187f..7a2ed154a5e1 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h @@ -44,10 +44,6 @@ extern int irq_remapping_reenable(int); extern int irq_remap_enable_fault_handling(void); extern void panic_if_irq_remap(const char *msg); -/* Create PCI MSI/MSIx irqdomain, use @parent as the parent irqdomain. */ -extern struct irq_domain * -arch_create_remap_msi_irq_domain(struct irq_domain *par, const char *n, int id); - /* Get parent irqdomain for interrupt remapping irqdomain */ static inline struct irq_domain *arch_get_ir_parent_domain(void) { diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index d198da3ddbf1..682f51a5b12d 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -277,7 +277,7 @@ void __init x86_create_pci_msi_domain(void) x86_pci_msi_default_domain = x86_init.irqs.create_pci_msi_domain(); } -/* Keep around for hyperV and the remap code below */ +/* Keep around for hyperV */ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg) { @@ -291,46 +291,6 @@ int pci_msi_prepare(struct irq_domain *domain, struct device *dev, int nvec, } EXPORT_SYMBOL_GPL(pci_msi_prepare); -#ifdef CONFIG_IRQ_REMAP -static struct msi_domain_ops pci_msi_domain_ops = { - .msi_prepare = pci_msi_prepare, -}; - -static struct irq_chip pci_msi_ir_controller = { - .name = "IR-PCI-MSI", - .irq_unmask = pci_msi_unmask_irq, - .irq_mask = pci_msi_mask_irq, - .irq_ack = irq_chip_ack_parent, - .irq_retrigger = irq_chip_retrigger_hierarchy, - .flags = IRQCHIP_SKIP_SET_WAKE | - IRQCHIP_AFFINITY_PRE_STARTUP, -}; - -static struct msi_domain_info pci_msi_ir_domain_info = { - .flags = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | - MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX, - .ops = &pci_msi_domain_ops, - .chip = &pci_msi_ir_controller, - .handler = handle_edge_irq, - .handler_name = "edge", -}; - -struct irq_domain *arch_create_remap_msi_irq_domain(struct irq_domain *parent, - const char *name, int id) -{ - struct fwnode_handle *fn; - struct irq_domain *d; - - fn = irq_domain_alloc_named_id_fwnode(name, id); - if (!fn) - return NULL; - d = pci_msi_create_irq_domain(fn, &pci_msi_ir_domain_info, parent); - if (!d) - irq_domain_free_fwnode(fn); - return d; -} -#endif - #ifdef CONFIG_DMAR_TABLE /* * The Intel IOMMU (ab)uses the high bits of the MSI address to contain the From 06bff9e347271566e8dd79e7c3eb971660209a00 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:13 +0100 Subject: [PATCH 116/134] genirq/msi: Provide struct msi_map A simple struct to hold a MSI index / Linux interrupt number pair. It will be returned from the dynamic vector allocation function and handed back to the corresponding free() function. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.326410494@linutronix.de --- include/linux/msi_api.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h index 8640171288a8..4cb7f4c9a3ae 100644 --- a/include/linux/msi_api.h +++ b/include/linux/msi_api.h @@ -18,6 +18,19 @@ enum msi_domain_ids { MSI_MAX_DEVICE_IRQDOMAINS, }; +/** + * msi_map - Mapping between MSI index and Linux interrupt number + * @index: The MSI index, e.g. slot in the MSI-X table or + * a software managed index if >= 0. If negative + * the allocation function failed and it contains + * the error code. + * @virq: The associated Linux interrupt number + */ +struct msi_map { + int index; + int virq; +}; + unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index); /** From efd42049657e958797a483f793e4064042faa49c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:15 +0100 Subject: [PATCH 117/134] genirq/msi: Provide msi_desc:: Msi_data The upcoming support for PCI/IMS requires to store some information related to the message handling in the MSI descriptor, e.g. PASID or a pointer to a queue. Provide a generic storage struct which maps over the existing PCI specific storage which means the size of struct msi_desc is not getting bigger. This storage struct has two elements: 1) msi_domain_cookie 2) msi_instance_cookie The domain cookie is going to be used to store domain specific information, e.g. iobase pointer, data pointer. The instance cookie is going to be handed in when allocating an interrupt on an IMS domain so the irq chip callbacks of the IMS domain have the necessary per vector information available. It also comes in handy when cleaning up the platform MSI code for wire to MSI bridges which need to hand down the type information to the underlying interrupt domain. For the core code the cookies are opaque and meaningless. It just stores the instance cookie during an allocation through the upcoming interfaces for IMS and wire to MSI brigdes. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.385036043@linutronix.de --- include/linux/msi.h | 38 +++++++++++++++++++++++++++++++++++++- include/linux/msi_api.h | 17 +++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index b5dda4bf4156..dca3b801428f 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -125,6 +125,38 @@ struct pci_msi_desc { }; }; +/** + * union msi_domain_cookie - Opaque MSI domain specific data + * @value: u64 value store + * @ptr: Pointer to domain specific data + * @iobase: Domain specific IOmem pointer + * + * The content of this data is implementation defined and used by the MSI + * domain to store domain specific information which is requried for + * interrupt chip callbacks. + */ +union msi_domain_cookie { + u64 value; + void *ptr; + void __iomem *iobase; +}; + +/** + * struct msi_desc_data - Generic MSI descriptor data + * @dcookie: Cookie for MSI domain specific data which is required + * for irq_chip callbacks + * @icookie: Cookie for the MSI interrupt instance provided by + * the usage site to the allocation function + * + * The content of this data is implementation defined, e.g. PCI/IMS + * implementations define the meaning of the data. The MSI core ignores + * this data completely. + */ +struct msi_desc_data { + union msi_domain_cookie dcookie; + union msi_instance_cookie icookie; +}; + #define MSI_MAX_INDEX ((unsigned int)USHRT_MAX) /** @@ -142,6 +174,7 @@ struct pci_msi_desc { * * @msi_index: Index of the msi descriptor * @pci: PCI specific msi descriptor data + * @data: Generic MSI descriptor data */ struct msi_desc { /* Shared device/bus type independent data */ @@ -161,7 +194,10 @@ struct msi_desc { void *write_msi_msg_data; u16 msi_index; - struct pci_msi_desc pci; + union { + struct pci_msi_desc pci; + struct msi_desc_data data; + }; }; /* diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h index 4cb7f4c9a3ae..2e4456ec9f6b 100644 --- a/include/linux/msi_api.h +++ b/include/linux/msi_api.h @@ -18,6 +18,23 @@ enum msi_domain_ids { MSI_MAX_DEVICE_IRQDOMAINS, }; +/** + * union msi_instance_cookie - MSI instance cookie + * @value: u64 value store + * @ptr: Pointer to usage site specific data + * + * This cookie is handed to the IMS allocation function and stored in the + * MSI descriptor for the interrupt chip callbacks. + * + * The content of this cookie is MSI domain implementation defined. For + * PCI/IMS implementations this could be a PASID or a pointer to queue + * memory. + */ +union msi_instance_cookie { + u64 value; + void *ptr; +}; + /** * msi_map - Mapping between MSI index and Linux interrupt number * @index: The MSI index, e.g. slot in the MSI-X table or From 8f986fd7755bec8b8c5776824afa1bd1151986d9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:16 +0100 Subject: [PATCH 118/134] genirq/msi: Provide msi_domain_ops:: Prepare_desc() The existing MSI domain ops msi_prepare() and set_desc() turned out to be unsuitable for implementing IMS support. msi_prepare() does not operate on the MSI descriptors. set_desc() lacks an irq_domain pointer and has a completely different purpose. Introduce a prepare_desc() op which allows IMS implementations to amend an MSI descriptor which was allocated by the core code, e.g. by adjusting the iomem base or adding some data based on the allocated index. This is way better than requiring that all IMS domain implementations preallocate the MSI descriptor and then allocate the interrupt. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.444560717@linutronix.de --- include/linux/msi.h | 6 +++++- kernel/irq/msi.c | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index dca3b801428f..cb0bee3f43a7 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -410,6 +410,8 @@ struct msi_domain_info; * @msi_init: Domain specific init function for MSI interrupts * @msi_free: Domain specific function to free a MSI interrupts * @msi_prepare: Prepare the allocation of the interrupts in the domain + * @prepare_desc: Optional function to prepare the allocated MSI descriptor + * in the domain * @set_desc: Set the msi descriptor for an interrupt * @domain_alloc_irqs: Optional function to override the default allocation * function. @@ -421,7 +423,7 @@ struct msi_domain_info; * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare and @set_desc are callbacks used by the + * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the * msi_domain_alloc/free_irqs*() variants. * * @domain_alloc_irqs, @domain_free_irqs can be used to override the @@ -444,6 +446,8 @@ struct msi_domain_ops { int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); + void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg, + struct msi_desc *desc); void (*set_desc)(msi_alloc_info_t *arg, struct msi_desc *desc); int (*domain_alloc_irqs)(struct irq_domain *domain, diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 0749e661e459..6370ea53ff4a 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -1254,6 +1254,9 @@ static int __msi_domain_alloc_irqs(struct device *dev, struct irq_domain *domain if (WARN_ON_ONCE(allocated >= ctrl->nirqs)) return -EINVAL; + if (ops->prepare_desc) + ops->prepare_desc(domain, &arg, desc); + ops->set_desc(&arg, desc); virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used, From 3d393b21740bffbeeae7d4fa534a6b16c3e3e832 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:18 +0100 Subject: [PATCH 119/134] genirq/msi: Provide msi_domain_alloc_irq_at() For supporting post MSI-X enable allocations and for the upcoming PCI/IMS support a separate interface is required which allows not only the allocation of a specific index, but also the allocation of any, i.e. the next free index. The latter is especially required for IMS because IMS completely does away with index to functionality mappings which are often found in MSI/MSI-X implementation. But even with MSI-X there are devices where only the first few indices have a fixed functionality and the rest is freely assignable by software, e.g. to queues. msi_domain_alloc_irq_at() is also different from the range based interfaces as it always enforces that the MSI descriptor is allocated by the core code and not preallocated by the caller like the PCI/MSI[-X] enable code path does. msi_domain_alloc_irq_at() can be invoked with the index argument set to MSI_ANY_INDEX which makes the core code pick the next free index. The irq domain can provide a prepare_desc() operation callback in it's msi_domain_ops to do domain specific post allocation initialization before the actual Linux interrupt and the associated interrupt descriptor and hierarchy alloccations are conducted. The function also takes an optional @icookie argument which is of type union msi_instance_cookie. This cookie is not used by the core code and is stored in the allocated msi_desc::data::icookie. The meaning of the cookie is completely implementation defined. In case of IMS this might be a PASID or a pointer to a device queue, but for the MSI core it's opaque and not used in any way. The function returns a struct msi_map which on success contains the allocated index number and the Linux interrupt number so the caller can spare the index to Linux interrupt number lookup. On failure map::index contains the error code and map::virq is 0. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.501359457@linutronix.de --- include/linux/msi.h | 4 ++ include/linux/msi_api.h | 7 +++ kernel/irq/msi.c | 107 +++++++++++++++++++++++++++++++++++----- 3 files changed, 107 insertions(+), 11 deletions(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index cb0bee3f43a7..00c501979ff1 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -80,6 +80,7 @@ struct pci_dev; struct platform_msi_priv_data; struct device_attribute; struct irq_domain; +struct irq_affinity_desc; void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg); #ifdef CONFIG_GENERIC_MSI_IRQ @@ -602,6 +603,9 @@ int msi_domain_alloc_irqs_range(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int nirqs); +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *cookie); void msi_domain_free_irqs_range_locked(struct device *dev, unsigned int domid, unsigned int first, unsigned int last); diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h index 2e4456ec9f6b..5ae72d1912c4 100644 --- a/include/linux/msi_api.h +++ b/include/linux/msi_api.h @@ -48,6 +48,13 @@ struct msi_map { int virq; }; +/* + * Constant to be used for dynamic allocations when the allocation is any + * free MSI index, which is either an entry in a hardware table or a + * software managed index. + */ +#define MSI_ANY_INDEX UINT_MAX + unsigned int msi_domain_get_virq(struct device *dev, unsigned int domid, unsigned int index); /** diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c index 6370ea53ff4a..bd4d4dd626b4 100644 --- a/kernel/irq/msi.c +++ b/kernel/irq/msi.c @@ -90,17 +90,30 @@ static int msi_insert_desc(struct device *dev, struct msi_desc *desc, int ret; hwsize = msi_domain_get_hwsize(dev, domid); - if (index >= hwsize) { - ret = -ERANGE; - goto fail; + + if (index == MSI_ANY_INDEX) { + struct xa_limit limit = { .min = 0, .max = hwsize - 1 }; + unsigned int index; + + /* Let the xarray allocate a free index within the limit */ + ret = xa_alloc(xa, &index, desc, limit, GFP_KERNEL); + if (ret) + goto fail; + + desc->msi_index = index; + return 0; + } else { + if (index >= hwsize) { + ret = -ERANGE; + goto fail; + } + + desc->msi_index = index; + ret = xa_insert(xa, index, desc, GFP_KERNEL); + if (ret) + goto fail; + return 0; } - - desc->msi_index = index; - ret = xa_insert(xa, index, desc, GFP_KERNEL); - if (ret) - goto fail; - return 0; - fail: msi_free_desc(desc); return ret; @@ -294,7 +307,7 @@ int msi_setup_device_data(struct device *dev) } for (i = 0; i < MSI_MAX_DEVICE_IRQDOMAINS; i++) - xa_init(&md->__domains[i].store); + xa_init_flags(&md->__domains[i].store, XA_FLAGS_ALLOC); /* * If @dev::msi::domain is set and is a global MSI domain, copy the @@ -1402,6 +1415,78 @@ int msi_domain_alloc_irqs_all_locked(struct device *dev, unsigned int domid, int return msi_domain_alloc_locked(dev, &ctrl); } +/** + * msi_domain_alloc_irq_at - Allocate an interrupt from a MSI interrupt domain at + * a given index - or at the next free index + * + * @dev: Pointer to device struct of the device for which the interrupts + * are allocated + * @domid: Id of the interrupt domain to operate on + * @index: Index for allocation. If @index == %MSI_ANY_INDEX the allocation + * uses the next free index. + * @affdesc: Optional pointer to an interrupt affinity descriptor structure + * @icookie: Optional pointer to a domain specific per instance cookie. If + * non-NULL the content of the cookie is stored in msi_desc::data. + * Must be NULL for MSI-X allocations + * + * This requires a MSI interrupt domain which lets the core code manage the + * MSI descriptors. + * + * Return: struct msi_map + * + * On success msi_map::index contains the allocated index number and + * msi_map::virq the corresponding Linux interrupt number + * + * On failure msi_map::index contains the error code and msi_map::virq + * is %0. + */ +struct msi_map msi_domain_alloc_irq_at(struct device *dev, unsigned int domid, unsigned int index, + const struct irq_affinity_desc *affdesc, + union msi_instance_cookie *icookie) +{ + struct msi_ctrl ctrl = { .domid = domid, .nirqs = 1, }; + struct irq_domain *domain; + struct msi_map map = { }; + struct msi_desc *desc; + int ret; + + msi_lock_descs(dev); + domain = msi_get_device_domain(dev, domid); + if (!domain) { + map.index = -ENODEV; + goto unlock; + } + + desc = msi_alloc_desc(dev, 1, affdesc); + if (!desc) { + map.index = -ENOMEM; + goto unlock; + } + + if (icookie) + desc->data.icookie = *icookie; + + ret = msi_insert_desc(dev, desc, domid, index); + if (ret) { + map.index = ret; + goto unlock; + } + + ctrl.first = ctrl.last = desc->msi_index; + + ret = __msi_domain_alloc_irqs(dev, domain, &ctrl); + if (ret) { + map.index = ret; + msi_domain_free_locked(dev, &ctrl); + } else { + map.index = desc->msi_index; + map.virq = desc->irq; + } +unlock: + msi_unlock_descs(dev); + return map; +} + static void __msi_domain_free_irqs(struct device *dev, struct irq_domain *domain, struct msi_ctrl *ctrl) { From b834e3c08fc6c4460c2bce6575cba4705f6301e3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:20 +0100 Subject: [PATCH 120/134] genirq/msi: Provide MSI_FLAG_MSIX_ALLOC_DYN Provide a new MSI feature flag in preparation for dynamic MSIX allocation after the initial MSI-X enable has been done. This needs to be an explicit MSI interrupt domain feature because quite some implementations (both interrupt domains and legacy allocation mode) have clear expectations that the allocation code is only invoked when MSI-X is about to be enabled. They either talk to hypervisors or do some other work and are not prepared to be invoked on an already MSI-X enabled device. This is also explicit MSI-X only because rewriting the size of the MSI entries is only possible when disabling MSI which in turn might cause lost interrupts on the device. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.558843119@linutronix.de --- include/linux/msi.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/msi.h b/include/linux/msi.h index 00c501979ff1..3cb15866ffbf 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -557,7 +557,8 @@ enum { MSI_FLAG_LEVEL_CAPABLE = (1 << 18), /* MSI-X entries must be contiguous */ MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), - + /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */ + MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), }; /** From 612ad43330d98f800f3784d68e7d8ab66d17a512 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:21 +0100 Subject: [PATCH 121/134] PCI/MSI: Split MSI-X descriptor setup The upcoming mechanism to allocate MSI-X vectors after enabling MSI-X needs to share some of the MSI-X descriptor setup. The regular descriptor setup on enable has the following code flow: 1) Allocate descriptor 2) Setup descriptor with PCI specific data 3) Insert descriptor 4) Allocate interrupts which in turn scans the inserted descriptors This cannot be easily changed because the PCI/MSI code needs to handle the legacy architecture specific allocation model and the irq domain model where quite some domains have the assumption that the above flow is how it works. Ideally the code flow should look like this: 1) Invoke allocation at the MSI core 2) MSI core allocates descriptor 3) MSI core calls back into the irq domain which fills in the domain specific parts This could be done for underlying parent MSI domains which support post-enable allocation/free but that would create significantly different code pathes for MSI/MSI-X enable. Though for dynamic allocation which wants to share the allocation code with the upcoming PCI/IMS support it's the right thing to do. Split the MSI-X descriptor setup into the preallocation part which just sets the index and fills in the horrible hack of virtual IRQs and the real PCI specific MSI-X setup part which solely depends on the index in the descriptor. This allows to provide a common dynamic allocation interface at the MSI core level for both PCI/MSI-X and PCI/IMS. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.616292598@linutronix.de --- drivers/pci/msi/msi.c | 72 +++++++++++++++++++++++++++---------------- drivers/pci/msi/msi.h | 2 ++ 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/drivers/pci/msi/msi.c b/drivers/pci/msi/msi.c index b8d74df3817e..1f716624ca56 100644 --- a/drivers/pci/msi/msi.c +++ b/drivers/pci/msi/msi.c @@ -569,34 +569,56 @@ static void __iomem *msix_map_region(struct pci_dev *dev, return ioremap(phys_addr, nr_entries * PCI_MSIX_ENTRY_SIZE); } -static int msix_setup_msi_descs(struct pci_dev *dev, void __iomem *base, - struct msix_entry *entries, int nvec, - struct irq_affinity_desc *masks) +/** + * msix_prepare_msi_desc - Prepare a half initialized MSI descriptor for operation + * @dev: The PCI device for which the descriptor is prepared + * @desc: The MSI descriptor for preparation + * + * This is separate from msix_setup_msi_descs() below to handle dynamic + * allocations for MSI-X after initial enablement. + * + * Ideally the whole MSI-X setup would work that way, but there is no way to + * support this for the legacy arch_setup_msi_irqs() mechanism and for the + * fake irq domains like the x86 XEN one. Sigh... + * + * The descriptor is zeroed and only @desc::msi_index and @desc::affinity + * are set. When called from msix_setup_msi_descs() then the is_virtual + * attribute is initialized as well. + * + * Fill in the rest. + */ +void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc) +{ + desc->nvec_used = 1; + desc->pci.msi_attrib.is_msix = 1; + desc->pci.msi_attrib.is_64 = 1; + desc->pci.msi_attrib.default_irq = dev->irq; + desc->pci.mask_base = dev->msix_base; + desc->pci.msi_attrib.can_mask = !pci_msi_ignore_mask && + !desc->pci.msi_attrib.is_virtual; + + if (desc->pci.msi_attrib.can_mask) { + void __iomem *addr = pci_msix_desc_addr(desc); + + desc->pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); + } +} + +static int msix_setup_msi_descs(struct pci_dev *dev, struct msix_entry *entries, + int nvec, struct irq_affinity_desc *masks) { int ret = 0, i, vec_count = pci_msix_vec_count(dev); struct irq_affinity_desc *curmsk; struct msi_desc desc; - void __iomem *addr; memset(&desc, 0, sizeof(desc)); - desc.nvec_used = 1; - desc.pci.msi_attrib.is_msix = 1; - desc.pci.msi_attrib.is_64 = 1; - desc.pci.msi_attrib.default_irq = dev->irq; - desc.pci.mask_base = base; - for (i = 0, curmsk = masks; i < nvec; i++, curmsk++) { desc.msi_index = entries ? entries[i].entry : i; desc.affinity = masks ? curmsk : NULL; desc.pci.msi_attrib.is_virtual = desc.msi_index >= vec_count; - desc.pci.msi_attrib.can_mask = !pci_msi_ignore_mask && - !desc.pci.msi_attrib.is_virtual; - if (desc.pci.msi_attrib.can_mask) { - addr = pci_msix_desc_addr(&desc); - desc.pci.msix_ctrl = readl(addr + PCI_MSIX_ENTRY_VECTOR_CTRL); - } + msix_prepare_msi_desc(dev, &desc); ret = msi_insert_msi_desc(&dev->dev, &desc); if (ret) @@ -629,9 +651,8 @@ static void msix_mask_all(void __iomem *base, int tsize) writel(ctrl, base + PCI_MSIX_ENTRY_VECTOR_CTRL); } -static int msix_setup_interrupts(struct pci_dev *dev, void __iomem *base, - struct msix_entry *entries, int nvec, - struct irq_affinity *affd) +static int msix_setup_interrupts(struct pci_dev *dev, struct msix_entry *entries, + int nvec, struct irq_affinity *affd) { struct irq_affinity_desc *masks = NULL; int ret; @@ -640,7 +661,7 @@ static int msix_setup_interrupts(struct pci_dev *dev, void __iomem *base, masks = irq_create_affinity_masks(nvec, affd); msi_lock_descs(&dev->dev); - ret = msix_setup_msi_descs(dev, base, entries, nvec, masks); + ret = msix_setup_msi_descs(dev, entries, nvec, masks); if (ret) goto out_free; @@ -678,7 +699,6 @@ out_unlock: static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, int nvec, struct irq_affinity *affd) { - void __iomem *base; int ret, tsize; u16 control; @@ -696,15 +716,13 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, pci_read_config_word(dev, dev->msix_cap + PCI_MSIX_FLAGS, &control); /* Request & Map MSI-X table region */ tsize = msix_table_size(control); - base = msix_map_region(dev, tsize); - if (!base) { + dev->msix_base = msix_map_region(dev, tsize); + if (!dev->msix_base) { ret = -ENOMEM; goto out_disable; } - dev->msix_base = base; - - ret = msix_setup_interrupts(dev, base, entries, nvec, affd); + ret = msix_setup_interrupts(dev, entries, nvec, affd); if (ret) goto out_disable; @@ -719,7 +737,7 @@ static int msix_capability_init(struct pci_dev *dev, struct msix_entry *entries, * which takes the MSI-X mask bits into account even * when MSI-X is disabled, which prevents MSI delivery. */ - msix_mask_all(base, tsize); + msix_mask_all(dev->msix_base, tsize); pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_MASKALL, 0); pcibios_free_irq(dev); diff --git a/drivers/pci/msi/msi.h b/drivers/pci/msi/msi.h index 74408cc689c6..ee53cf079f4e 100644 --- a/drivers/pci/msi/msi.h +++ b/drivers/pci/msi/msi.h @@ -84,6 +84,8 @@ static inline __attribute_const__ u32 msi_multi_mask(struct msi_desc *desc) return (1 << (1 << desc->pci.msi_attrib.multi_cap)) - 1; } +void msix_prepare_msi_desc(struct pci_dev *dev, struct msi_desc *desc); + /* Subsystem variables */ extern int pci_msi_enable; From 73bd063ca03493f44e0700cc08824093da9741bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:23 +0100 Subject: [PATCH 122/134] PCI/MSI: Provide prepare_desc() MSI domain op The setup of MSI descriptors for PCI/MSI-X interrupts depends partially on the MSI index for which the descriptor is initialized. Dynamic MSI-X vector allocation post MSI-X enablement allows to allocate vectors at a given index or at any free index in the available table range. The latter requires that the descriptor is initialized after the MSI core has chosen an index. Implement the prepare_desc() op in the PCI/MSI-X specific msi_domain_ops which is invoked before the core interrupt descriptor and the associated Linux interrupt number is allocated. That callback is also provided for the upcoming PCI/IMS implementations so the implementation specific interrupt domain can do their domain specific initialization of the MSI descriptors. Signed-off-by: Thomas Gleixner Reviewed-by: Jason Gunthorpe Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.673658806@linutronix.de --- drivers/pci/msi/irqdomain.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 473640381f92..8afaef10b939 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -202,6 +202,14 @@ static void pci_irq_unmask_msix(struct irq_data *data) pci_msix_unmask(irq_data_get_msi_desc(data)); } +static void pci_msix_prepare_desc(struct irq_domain *domain, msi_alloc_info_t *arg, + struct msi_desc *desc) +{ + /* Don't fiddle with preallocated MSI descriptors */ + if (!desc->pci.mask_base) + msix_prepare_msi_desc(to_pci_dev(desc->dev), desc); +} + static const struct msi_domain_template pci_msix_template = { .chip = { .name = "PCI-MSIX", @@ -212,6 +220,7 @@ static const struct msi_domain_template pci_msix_template = { }, .ops = { + .prepare_desc = pci_msix_prepare_desc, .set_desc = pci_device_domain_set_desc, }, From 34026364df8eca05ee32e706a2c014511a19af02 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:24 +0100 Subject: [PATCH 123/134] PCI/MSI: Provide post-enable dynamic allocation interfaces for MSI-X MSI-X vectors can be allocated after the initial MSI-X enablement, but this needs explicit support of the underlying interrupt domains. Provide a function to query the ability and functions to allocate/free individual vectors post-enable. The allocation can either request a specific index in the MSI-X table or with the index argument MSI_ANY_INDEX it allocates the next free vector. The return value is a struct msi_map which on success contains both index and the Linux interrupt number. In case of failure index is negative and the Linux interrupt number is 0. The allocation function is for a single MSI-X index at a time as that's sufficient for the most urgent use case VFIO to get rid of the 'disable MSI-X, reallocate, enable-MSI-X' cycle which is prone to lost interrupts and redirections to the legacy and obviously unhandled INTx. As single index allocation is also sufficient for the use cases Jason Gunthorpe pointed out: Allocation of a MSI-X or IMS vector for a network queue. See Link below. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/all/20211126232735.547996838@linutronix.de Link: https://lore.kernel.org/r/20221124232326.731233614@linutronix.de --- drivers/pci/msi/api.c | 67 +++++++++++++++++++++++++++++++++++++ drivers/pci/msi/irqdomain.c | 3 +- include/linux/pci.h | 6 ++++ 3 files changed, 75 insertions(+), 1 deletion(-) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index 2d46a0cfd541..c8816db91e9c 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -112,6 +112,73 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, } EXPORT_SYMBOL(pci_enable_msix_range); +/** + * pci_msix_can_alloc_dyn - Query whether dynamic allocation after enabling + * MSI-X is supported + * + * @dev: PCI device to operate on + * + * Return: True if supported, false otherwise + */ +bool pci_msix_can_alloc_dyn(struct pci_dev *dev) +{ + if (!dev->msix_cap) + return false; + + return pci_msi_domain_supports(dev, MSI_FLAG_PCI_MSIX_ALLOC_DYN, DENY_LEGACY); +} +EXPORT_SYMBOL_GPL(pci_msix_can_alloc_dyn); + +/** + * pci_msix_alloc_irq_at - Allocate an MSI-X interrupt after enabling MSI-X + * at a given MSI-X vector index or any free vector index + * + * @dev: PCI device to operate on + * @index: Index to allocate. If @index == MSI_ANY_INDEX this allocates + * the next free index in the MSI-X table + * @affdesc: Optional pointer to an affinity descriptor structure. NULL otherwise + * + * Return: A struct msi_map + * + * On success msi_map::index contains the allocated index (>= 0) and + * msi_map::virq contains the allocated Linux interrupt number (> 0). + * + * On fail msi_map::index contains the error code and msi_map::virq + * is set to 0. + */ +struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, + const struct irq_affinity_desc *affdesc) +{ + struct msi_map map = { .index = -ENOTSUPP }; + + if (!dev->msix_enabled) + return map; + + if (!pci_msix_can_alloc_dyn(dev)) + return map; + + return msi_domain_alloc_irq_at(&dev->dev, MSI_DEFAULT_DOMAIN, index, affdesc, NULL); +} +EXPORT_SYMBOL_GPL(pci_msix_alloc_irq_at); + +/** + * pci_msix_free_irq - Free an interrupt on a PCI/MSIX interrupt domain + * which was allocated via pci_msix_alloc_irq_at() + * + * @dev: The PCI device to operate on + * @map: A struct msi_map describing the interrupt to free + * as returned from the allocation function. + */ +void pci_msix_free_irq(struct pci_dev *dev, struct msi_map map) +{ + if (WARN_ON_ONCE(map.index < 0 || map.virq <= 0)) + return; + if (WARN_ON_ONCE(!pci_msix_can_alloc_dyn(dev))) + return; + msi_domain_free_irqs_range(&dev->dev, MSI_DEFAULT_DOMAIN, map.index, map.index); +} +EXPORT_SYMBOL_GPL(pci_msix_free_irq); + /** * pci_disable_msix() - Disable MSI-X interrupt mode on device * @dev: the PCI device to operate on diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index 8afaef10b939..deb1930b4e66 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -225,7 +225,8 @@ static const struct msi_domain_template pci_msix_template = { }, .info = { - .flags = MSI_COMMON_FLAGS | MSI_FLAG_PCI_MSIX, + .flags = MSI_COMMON_FLAGS | MSI_FLAG_PCI_MSIX | + MSI_FLAG_PCI_MSIX_ALLOC_DYN, .bus_token = DOMAIN_BUS_PCI_DEVICE_MSIX, }, }; diff --git a/include/linux/pci.h b/include/linux/pci.h index 243e48f79e82..68b14ba93df2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -1559,6 +1560,11 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, struct irq_affinity *affd); +bool pci_msix_can_alloc_dyn(struct pci_dev *dev); +struct msi_map pci_msix_alloc_irq_at(struct pci_dev *dev, unsigned int index, + const struct irq_affinity_desc *affdesc); +void pci_msix_free_irq(struct pci_dev *pdev, struct msi_map map); + void pci_free_irq_vectors(struct pci_dev *dev); int pci_irq_vector(struct pci_dev *dev, unsigned int nr); const struct cpumask *pci_irq_get_affinity(struct pci_dev *pdev, int vec); From 486254ad967dbef37fd797dd296fe69b465aa0f9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:26 +0100 Subject: [PATCH 124/134] x86/apic/msi: Enable MSI_FLAG_PCI_MSIX_ALLOC_DYN x86 MSI irqdomains can handle MSI-X allocation post MSI-X enable just out of the box - on the vector domain and on the remapping domains, Add the feature flag to the supported feature list Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.787373104@linutronix.de --- arch/x86/include/asm/msi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/msi.h b/arch/x86/include/asm/msi.h index 770295887e56..935c6d470341 100644 --- a/arch/x86/include/asm/msi.h +++ b/arch/x86/include/asm/msi.h @@ -63,7 +63,7 @@ struct msi_msg; u32 x86_msi_msg_get_destid(struct msi_msg *msg, bool extid); #define X86_VECTOR_MSI_FLAGS_SUPPORTED \ - (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX) + (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX | MSI_FLAG_PCI_MSIX_ALLOC_DYN) #define X86_VECTOR_MSI_FLAGS_REQUIRED \ (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS) From e23d4192bf9b612bce5b24f22719fd3cc6edaa69 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:28 +0100 Subject: [PATCH 125/134] genirq/msi: Provide constants for PCI/IMS support Provide the necessary constants for PCI/IMS support: - A new bus token for MSI irqdomain identification - A MSI feature flag for the MSI irqdomains to signal support - A secondary domain id The latter expands the device internal domain pointer storage array from 1 to 2 entries. That extra pointer is mostly unused today, but the alternative solutions would not be free either and would introduce more complexity all over the place. Trade the 8bytes for simplicity. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.846169830@linutronix.de --- include/linux/irqdomain_defs.h | 1 + include/linux/msi.h | 2 ++ include/linux/msi_api.h | 1 + 3 files changed, 4 insertions(+) diff --git a/include/linux/irqdomain_defs.h b/include/linux/irqdomain_defs.h index 0b2d8a83e0d8..c29921fd8cd1 100644 --- a/include/linux/irqdomain_defs.h +++ b/include/linux/irqdomain_defs.h @@ -25,6 +25,7 @@ enum irq_domain_bus_token { DOMAIN_BUS_PCI_DEVICE_MSIX, DOMAIN_BUS_DMAR, DOMAIN_BUS_AMDVI, + DOMAIN_BUS_PCI_DEVICE_IMS, }; #endif /* _LINUX_IRQDOMAIN_DEFS_H */ diff --git a/include/linux/msi.h b/include/linux/msi.h index 3cb15866ffbf..a112b913fff9 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -559,6 +559,8 @@ enum { MSI_FLAG_MSIX_CONTIGUOUS = (1 << 19), /* PCI/MSI-X vectors can be dynamically allocated/freed post MSI-X enable */ MSI_FLAG_PCI_MSIX_ALLOC_DYN = (1 << 20), + /* Support for PCI/IMS */ + MSI_FLAG_PCI_IMS = (1 << 21), }; /** diff --git a/include/linux/msi_api.h b/include/linux/msi_api.h index 5ae72d1912c4..391087ad99b1 100644 --- a/include/linux/msi_api.h +++ b/include/linux/msi_api.h @@ -15,6 +15,7 @@ struct device; */ enum msi_domain_ids { MSI_DEFAULT_DOMAIN, + MSI_SECONDARY_DOMAIN, MSI_MAX_DEVICE_IRQDOMAINS, }; From 0194425af0c87acaad457989a2c6d90dba58e776 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:29 +0100 Subject: [PATCH 126/134] PCI/MSI: Provide IMS (Interrupt Message Store) support IMS (Interrupt Message Store) is a new specification which allows implementation specific storage of MSI messages contrary to the strict standard specified MSI and MSI-X message stores. This requires new device specific interrupt domains to handle the implementation defined storage which can be an array in device memory or host/guest memory which is shared with hardware queues. Add a function to create IMS domains for PCI devices. IMS domains are using the new per device domain mechanism and are configured by the device driver via a template. IMS domains are created as secondary device domains so they work side on side with MSI[-X] on the same device. The IMS domains have a few constraints: - The index space is managed by the core code. Device memory based IMS provides a storage array with a fixed size which obviously requires an index. But there is no association between index and functionality so the core can randomly allocate an index in the array. System memory based IMS does not have the concept of an index as the storage is somewhere in memory. In that case the index is purely software based to keep track of the allocations. - There is no requirement for consecutive index ranges This is currently a limitation of the MSI core and can be implemented if there is a justified use case by changing the internal storage from xarray to maple_tree. For now it's single vector allocation. - The interrupt chip must provide the following callbacks: - irq_mask() - irq_unmask() - irq_write_msi_msg() - The interrupt chip must provide the following optional callbacks when the irq_mask(), irq_unmask() and irq_write_msi_msg() callbacks cannot operate directly on hardware, e.g. in the case that the interrupt message store is in queue memory: - irq_bus_lock() - irq_bus_unlock() These callbacks are invoked from preemptible task context and are allowed to sleep. In this case the mandatory callbacks above just store the information. The irq_bus_unlock() callback is supposed to make the change effective before returning. - Interrupt affinity setting is handled by the underlying parent interrupt domain and communicated to the IMS domain via irq_write_msi_msg(). IMS domains cannot have a irq_set_affinity() callback. That's a reasonable restriction similar to the PCI/MSI device domain implementations. The domain is automatically destroyed when the PCI device is removed. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.904316841@linutronix.de --- drivers/pci/msi/irqdomain.c | 59 +++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 5 ++++ 2 files changed, 64 insertions(+) diff --git a/drivers/pci/msi/irqdomain.c b/drivers/pci/msi/irqdomain.c index deb1930b4e66..e33bcc872699 100644 --- a/drivers/pci/msi/irqdomain.c +++ b/drivers/pci/msi/irqdomain.c @@ -355,6 +355,65 @@ bool pci_msi_domain_supports(struct pci_dev *pdev, unsigned int feature_mask, return (supported & feature_mask) == feature_mask; } +/** + * pci_create_ims_domain - Create a secondary IMS domain for a PCI device + * @pdev: The PCI device to operate on + * @template: The MSI info template which describes the domain + * @hwsize: The size of the hardware entry table or 0 if the domain + * is purely software managed + * @data: Optional pointer to domain specific data to be stored + * in msi_domain_info::data + * + * Return: True on success, false otherwise + * + * An IMS domain is expected to have the following constraints: + * - The index space is managed by the core code + * + * - There is no requirement for consecutive index ranges + * + * - The interrupt chip must provide the following callbacks: + * - irq_mask() + * - irq_unmask() + * - irq_write_msi_msg() + * + * - The interrupt chip must provide the following optional callbacks + * when the irq_mask(), irq_unmask() and irq_write_msi_msg() callbacks + * cannot operate directly on hardware, e.g. in the case that the + * interrupt message store is in queue memory: + * - irq_bus_lock() + * - irq_bus_unlock() + * + * These callbacks are invoked from preemptible task context and are + * allowed to sleep. In this case the mandatory callbacks above just + * store the information. The irq_bus_unlock() callback is supposed + * to make the change effective before returning. + * + * - Interrupt affinity setting is handled by the underlying parent + * interrupt domain and communicated to the IMS domain via + * irq_write_msi_msg(). + * + * The domain is automatically destroyed when the PCI device is removed. + */ +bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, + unsigned int hwsize, void *data) +{ + struct irq_domain *domain = dev_get_msi_domain(&pdev->dev); + + if (!domain || !irq_domain_is_msi_parent(domain)) + return false; + + if (template->info.bus_token != DOMAIN_BUS_PCI_DEVICE_IMS || + !(template->info.flags & MSI_FLAG_ALLOC_SIMPLE_MSI_DESCS) || + !(template->info.flags & MSI_FLAG_FREE_MSI_DESCS) || + !template->chip.irq_mask || !template->chip.irq_unmask || + !template->chip.irq_write_msi_msg || template->chip.irq_set_affinity) + return false; + + return msi_create_device_irq_domain(&pdev->dev, MSI_SECONDARY_DOMAIN, template, + hwsize, data, NULL); +} +EXPORT_SYMBOL_GPL(pci_create_ims_domain); + /* * Users of the generic MSI infrastructure expect a device to have a single ID, * so with DMA aliases we have to pick the least-worst compromise. Devices with diff --git a/include/linux/pci.h b/include/linux/pci.h index 68b14ba93df2..1592b630d919 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2487,6 +2487,11 @@ static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) void pci_uevent_ers(struct pci_dev *pdev, enum pci_ers_result err_type); #endif +struct msi_domain_template; + +bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, + unsigned int hwsize, void *data); + #include #define pci_printk(level, pdev, fmt, arg...) \ From c9e5bea273834a63b5e9ba90ad94b305ba50704e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:31 +0100 Subject: [PATCH 127/134] PCI/MSI: Provide pci_ims_alloc/free_irq() Single vector allocation which allocates the next free index in the IMS space. The free function releases. All allocated vectors are released also via pci_free_vectors() which is also releasing MSI/MSI-X vectors. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Bjorn Helgaas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232326.961711347@linutronix.de --- drivers/pci/msi/api.c | 50 +++++++++++++++++++++++++++++++++++++++++++ include/linux/pci.h | 3 +++ 2 files changed, 53 insertions(+) diff --git a/drivers/pci/msi/api.c b/drivers/pci/msi/api.c index c8816db91e9c..b8009aa11f3c 100644 --- a/drivers/pci/msi/api.c +++ b/drivers/pci/msi/api.c @@ -365,6 +365,56 @@ const struct cpumask *pci_irq_get_affinity(struct pci_dev *dev, int nr) } EXPORT_SYMBOL(pci_irq_get_affinity); +/** + * pci_ims_alloc_irq - Allocate an interrupt on a PCI/IMS interrupt domain + * @dev: The PCI device to operate on + * @icookie: Pointer to an IMS implementation specific cookie for this + * IMS instance (PASID, queue ID, pointer...). + * The cookie content is copied into the MSI descriptor for the + * interrupt chip callbacks or domain specific setup functions. + * @affdesc: Optional pointer to an interrupt affinity descriptor + * + * There is no index for IMS allocations as IMS is an implementation + * specific storage and does not have any direct associations between + * index, which might be a pure software construct, and device + * functionality. This association is established by the driver either via + * the index - if there is a hardware table - or in case of purely software + * managed IMS implementation the association happens via the + * irq_write_msi_msg() callback of the implementation specific interrupt + * chip, which utilizes the provided @icookie to store the MSI message in + * the appropriate place. + * + * Return: A struct msi_map + * + * On success msi_map::index contains the allocated index (>= 0) and + * msi_map::virq the allocated Linux interrupt number (> 0). + * + * On fail msi_map::index contains the error code and msi_map::virq + * is set to 0. + */ +struct msi_map pci_ims_alloc_irq(struct pci_dev *dev, union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc) +{ + return msi_domain_alloc_irq_at(&dev->dev, MSI_SECONDARY_DOMAIN, MSI_ANY_INDEX, + affdesc, icookie); +} +EXPORT_SYMBOL_GPL(pci_ims_alloc_irq); + +/** + * pci_ims_free_irq - Allocate an interrupt on a PCI/IMS interrupt domain + * which was allocated via pci_ims_alloc_irq() + * @dev: The PCI device to operate on + * @map: A struct msi_map describing the interrupt to free as + * returned from pci_ims_alloc_irq() + */ +void pci_ims_free_irq(struct pci_dev *dev, struct msi_map map) +{ + if (WARN_ON_ONCE(map.index < 0 || map.virq <= 0)) + return; + msi_domain_free_irqs_range(&dev->dev, MSI_SECONDARY_DOMAIN, map.index, map.index); +} +EXPORT_SYMBOL_GPL(pci_ims_free_irq); + /** * pci_free_irq_vectors() - Free previously allocated IRQs for a device * @dev: the PCI device to operate on diff --git a/include/linux/pci.h b/include/linux/pci.h index 1592b630d919..aa514b54c681 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -2491,6 +2491,9 @@ struct msi_domain_template; bool pci_create_ims_domain(struct pci_dev *pdev, const struct msi_domain_template *template, unsigned int hwsize, void *data); +struct msi_map pci_ims_alloc_irq(struct pci_dev *pdev, union msi_instance_cookie *icookie, + const struct irq_affinity_desc *affdesc); +void pci_ims_free_irq(struct pci_dev *pdev, struct msi_map map); #include From 6e24c887732901140f4e82ba2315c2e15f06f1d6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:32 +0100 Subject: [PATCH 128/134] x86/apic/msi: Enable PCI/IMS Enable IMS in the domain init and allocation mapping code, but do not enable it on the vector domain as discussed in various threads on LKML. The interrupt remap domains can expand this setting like they do with PCI multi MSI. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232327.022658817@linutronix.de --- arch/x86/kernel/apic/msi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 682f51a5b12d..35d5b8fb18ef 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -184,6 +184,7 @@ static int x86_msi_prepare(struct irq_domain *domain, struct device *dev, alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSI; return 0; case DOMAIN_BUS_PCI_DEVICE_MSIX: + case DOMAIN_BUS_PCI_DEVICE_IMS: alloc->type = X86_IRQ_ALLOC_TYPE_PCI_MSIX; return 0; default: @@ -230,6 +231,10 @@ static bool x86_init_dev_msi_info(struct device *dev, struct irq_domain *domain, case DOMAIN_BUS_PCI_DEVICE_MSI: case DOMAIN_BUS_PCI_DEVICE_MSIX: break; + case DOMAIN_BUS_PCI_DEVICE_IMS: + if (!(pops->supported_flags & MSI_FLAG_PCI_IMS)) + return false; + break; default: WARN_ON_ONCE(1); return false; From 810531a1af5393f010d6508b1cb48e6650fc5e8f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:34 +0100 Subject: [PATCH 129/134] iommu/vt-d: Enable PCI/IMS PCI/IMS works like PCI/MSI-X in the remapping. Just add the feature flag, but only when on real hardware. Virtualized IOMMUs need additional support, e.g. for PASID. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232327.081482253@linutronix.de --- drivers/iommu/intel/irq_remapping.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 6fab4076a9e2..a723f53ba472 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -82,7 +82,7 @@ static const struct irq_domain_ops intel_ir_domain_ops; static void iommu_disable_irq_remapping(struct intel_iommu *iommu); static int __init parse_ioapics_under_ir(void); -static const struct msi_parent_ops dmar_msi_parent_ops; +static const struct msi_parent_ops dmar_msi_parent_ops, virt_dmar_msi_parent_ops; static bool ir_pre_enabled(struct intel_iommu *iommu) { @@ -577,7 +577,11 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_DMAR); iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; - iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; + + if (cap_caching_mode(iommu->cap)) + iommu->ir_domain->msi_parent_ops = &virt_dmar_msi_parent_ops; + else + iommu->ir_domain->msi_parent_ops = &dmar_msi_parent_ops; ir_table->base = page_address(pages); ir_table->bitmap = bitmap; @@ -1429,11 +1433,20 @@ static const struct irq_domain_ops intel_ir_domain_ops = { }; static const struct msi_parent_ops dmar_msi_parent_ops = { - .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | + MSI_FLAG_MULTI_PCI_MSI | + MSI_FLAG_PCI_IMS, .prefix = "IR-", .init_dev_msi_info = msi_parent_init_dev_msi_info, }; +static const struct msi_parent_ops virt_dmar_msi_parent_ops = { + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | + MSI_FLAG_MULTI_PCI_MSI, + .prefix = "vIR-", + .init_dev_msi_info = msi_parent_init_dev_msi_info, +}; + /* * Support of Interrupt Remapping Unit Hotplug */ From fa5745aca1dc819aee6463a2475b5c277f7cf8f6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 25 Nov 2022 00:26:36 +0100 Subject: [PATCH 130/134] iommu/amd: Enable PCI/IMS PCI/IMS works like PCI/MSI-X in the remapping. Just add the feature flag, but only when on real hardware. Virtualized IOMMUs need additional support. Signed-off-by: Thomas Gleixner Reviewed-by: Kevin Tian Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221124232327.140571546@linutronix.de --- drivers/iommu/amd/iommu.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 7caccd8996f9..4d28967f910d 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -3649,11 +3649,20 @@ static struct irq_chip amd_ir_chip = { }; static const struct msi_parent_ops amdvi_msi_parent_ops = { - .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | MSI_FLAG_MULTI_PCI_MSI, + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | + MSI_FLAG_MULTI_PCI_MSI | + MSI_FLAG_PCI_IMS, .prefix = "IR-", .init_dev_msi_info = msi_parent_init_dev_msi_info, }; +static const struct msi_parent_ops virt_amdvi_msi_parent_ops = { + .supported_flags = X86_VECTOR_MSI_FLAGS_SUPPORTED | + MSI_FLAG_MULTI_PCI_MSI, + .prefix = "vIR-", + .init_dev_msi_info = msi_parent_init_dev_msi_info, +}; + int amd_iommu_create_irq_domain(struct amd_iommu *iommu) { struct fwnode_handle *fn; @@ -3670,7 +3679,11 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) irq_domain_update_bus_token(iommu->ir_domain, DOMAIN_BUS_AMDVI); iommu->ir_domain->flags |= IRQ_DOMAIN_FLAG_MSI_PARENT; - iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; + + if (amd_iommu_np_cache) + iommu->ir_domain->msi_parent_ops = &virt_amdvi_msi_parent_ops; + else + iommu->ir_domain->msi_parent_ops = &amdvi_msi_parent_ops; return 0; } From 55721afa8d8b82e442cb4eaf7173330f79cbfb48 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Nov 2022 15:39:29 +0100 Subject: [PATCH 131/134] irqchip/irq-mvebu-icu: Fix works by chance pointer assignment Assigning a void pointer which points to a struct to two different data types only works by chance if the second type is the first member of the struct. Replace this works by chance code by using the primary struct pointer. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221121140048.344525618@linutronix.de --- drivers/irqchip/irq-mvebu-icu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index 497da344717c..3c77acc7ec6a 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -151,9 +151,9 @@ static int mvebu_icu_irq_domain_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { - struct mvebu_icu_msi_data *msi_data = platform_msi_get_host_data(d); - struct mvebu_icu *icu = platform_msi_get_host_data(d); unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2; + struct mvebu_icu_msi_data *msi_data = platform_msi_get_host_data(d); + struct mvebu_icu *icu = msi_data->icu; /* Check the count of the parameters in dt */ if (WARN_ON(fwspec->param_count != param_count)) { From 72a3f8f22a36aaf05da8941535d1c553fe049b2b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Nov 2022 15:39:32 +0100 Subject: [PATCH 132/134] irqchip/gic-v2m: Include arm-gic-common.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit W=1 build complains: drivers/irqchip/irq-gic-v2m.c:570:12: warning: no previous prototype for ‘gicv2m_init’ [-Wmissing-prototypes] 570 | int __init gicv2m_init(struct fwnode_handle *parent_handle, Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221121140048.470680255@linutronix.de --- drivers/irqchip/irq-gic-v2m.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index 6e1ac330d7a6..fcc871a8c2d6 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * MSI_TYPER: From d51a15af37ce8cf59e73de51dcdce3c9f4944974 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Nov 2022 15:39:33 +0100 Subject: [PATCH 133/134] irqchip/gic-v2m: Mark a few functions __init They are all part of the init sequence. Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221121140048.534395323@linutronix.de --- drivers/irqchip/irq-gic-v2m.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index fcc871a8c2d6..f4d7eeb13951 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -263,7 +263,7 @@ static struct msi_domain_info gicv2m_pmsi_domain_info = { .chip = &gicv2m_pmsi_irq_chip, }; -static void gicv2m_teardown(void) +static void __init gicv2m_teardown(void) { struct v2m_data *v2m, *tmp; @@ -278,7 +278,7 @@ static void gicv2m_teardown(void) } } -static int gicv2m_allocate_domains(struct irq_domain *parent) +static __init int gicv2m_allocate_domains(struct irq_domain *parent) { struct irq_domain *inner_domain, *pci_domain, *plat_domain; struct v2m_data *v2m; @@ -405,7 +405,7 @@ err_free_v2m: return ret; } -static const struct of_device_id gicv2m_device_id[] = { +static __initconst struct of_device_id gicv2m_device_id[] = { { .compatible = "arm,gic-v2m-frame", }, {}, }; @@ -455,7 +455,7 @@ static int __init gicv2m_of_init(struct fwnode_handle *parent_handle, #ifdef CONFIG_ACPI static int acpi_num_msi; -static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) +static __init struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) { struct v2m_data *data; @@ -470,7 +470,7 @@ static struct fwnode_handle *gicv2m_get_fwnode(struct device *dev) return data->fwnode; } -static bool acpi_check_amazon_graviton_quirks(void) +static __init bool acpi_check_amazon_graviton_quirks(void) { static struct acpi_table_madt *madt; acpi_status status; From e6d22108621c837f81d041ec5d61b08d17b151df Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Nov 2022 15:39:34 +0100 Subject: [PATCH 134/134] irqchip/ti-sci-inta: Fix kernel doc W=1 build complains: drivers/irqchip/irq-ti-sci-inta.c:177: warning: Function parameter or member 'vint_id' not described in 'ti_sci_inta_xlate_irq' drivers/irqchip/irq-ti-sci-inta.c:177: warning: Excess function parameter 'irq' description in 'ti_sci_inta_xlate_irq' Signed-off-by: Thomas Gleixner Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20221121140048.596303869@linutronix.de --- drivers/irqchip/irq-ti-sci-inta.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c index 5fdbb4358dd0..a6ecc53d055c 100644 --- a/drivers/irqchip/irq-ti-sci-inta.c +++ b/drivers/irqchip/irq-ti-sci-inta.c @@ -168,7 +168,7 @@ static void ti_sci_inta_irq_handler(struct irq_desc *desc) /** * ti_sci_inta_xlate_irq() - Translate hwirq to parent's hwirq. * @inta: IRQ domain corresponding to Interrupt Aggregator - * @irq: Hardware irq corresponding to the above irq domain + * @vint_id: Hardware irq corresponding to the above irq domain * * Return parent irq number if translation is available else -ENOENT. */