2019-06-03 07:44:50 +02:00
// SPDX-License-Identifier: GPL-2.0-only
2014-06-30 16:01:31 +01:00
/*
2016-12-19 17:01:52 +00:00
* Copyright ( C ) 2013 - 2017 ARM Limited , All Rights Reserved .
2014-06-30 16:01:31 +01:00
* Author : Marc Zyngier < marc . zyngier @ arm . com >
*/
2016-04-11 16:32:55 +01:00
# define pr_fmt(fmt) "GICv3: " fmt
2016-01-19 14:11:15 +01:00
# include <linux/acpi.h>
2014-06-30 16:01:31 +01:00
# include <linux/cpu.h>
2014-08-26 16:03:35 +01:00
# include <linux/cpu_pm.h>
2014-06-30 16:01:31 +01:00
# include <linux/delay.h>
# include <linux/interrupt.h>
2016-01-19 14:11:15 +01:00
# include <linux/irqdomain.h>
2022-11-01 22:13:51 +01:00
# include <linux/kstrtox.h>
2014-06-30 16:01:31 +01:00
# include <linux/of.h>
# include <linux/of_address.h>
# include <linux/of_irq.h>
# include <linux/percpu.h>
2019-01-31 14:58:59 +00:00
# include <linux/refcount.h>
2014-06-30 16:01:31 +01:00
# include <linux/slab.h>
2015-07-07 17:11:46 -04:00
# include <linux/irqchip.h>
2016-04-11 16:32:57 +01:00
# include <linux/irqchip/arm-gic-common.h>
2014-06-30 16:01:31 +01:00
# include <linux/irqchip/arm-gic-v3.h>
2016-04-11 09:57:54 +01:00
# include <linux/irqchip/irq-partition-percpu.h>
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
# include <linux/bitfield.h>
# include <linux/bits.h>
# include <linux/arm-smccc.h>
2014-06-30 16:01:31 +01:00
# include <asm/cputype.h>
# include <asm/exception.h>
# include <asm/smp_plat.h>
2015-08-26 17:00:42 +01:00
# include <asm/virt.h>
2014-06-30 16:01:31 +01:00
# include "irq-gic-common.h"
2019-01-31 14:58:58 +00:00
# define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80)
2018-12-10 13:56:32 +00:00
# define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0)
2020-03-11 11:56:49 +00:00
# define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1)
2023-05-15 13:13:51 -07:00
# define FLAGS_WORKAROUND_MTK_GICR_SAVE (1ULL << 2)
2023-05-22 19:06:43 +08:00
# define FLAGS_WORKAROUND_ASR_ERRATUM_8601001 (1ULL << 3)
2018-12-10 13:56:32 +00:00
2020-04-25 15:24:01 +01:00
# define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1)
2014-11-24 14:35:10 +00:00
struct redist_region {
void __iomem * redist_base ;
phys_addr_t phys_base ;
2016-01-19 14:11:16 +01:00
bool single_redist ;
2014-11-24 14:35:10 +00:00
} ;
2014-06-30 16:01:31 +01:00
struct gic_chip_data {
2016-04-11 09:57:54 +01:00
struct fwnode_handle * fwnode ;
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
phys_addr_t dist_phys_base ;
2014-06-30 16:01:31 +01:00
void __iomem * dist_base ;
2014-11-24 14:35:10 +00:00
struct redist_region * redist_regions ;
struct rdists rdists ;
2014-06-30 16:01:31 +01:00
struct irq_domain * domain ;
u64 redist_stride ;
2014-11-24 14:35:10 +00:00
u32 nr_redist_regions ;
2018-12-10 13:56:32 +00:00
u64 flags ;
2017-10-06 10:24:00 -05:00
bool has_rss ;
2019-07-18 11:15:14 +01:00
unsigned int ppi_nr ;
2019-07-18 13:05:17 +01:00
struct partition_desc * * ppi_descs ;
2014-06-30 16:01:31 +01:00
} ;
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
# define T241_CHIPS_MAX 4
static void __iomem * t241_dist_base_alias [ T241_CHIPS_MAX ] __read_mostly ;
static DEFINE_STATIC_KEY_FALSE ( gic_nvidia_t241_erratum ) ;
2023-07-04 17:50:34 +02:00
static DEFINE_STATIC_KEY_FALSE ( gic_arm64_2941627_erratum ) ;
2014-06-30 16:01:31 +01:00
static struct gic_chip_data gic_data __read_mostly ;
2018-03-26 14:09:25 -07:00
static DEFINE_STATIC_KEY_TRUE ( supports_deactivate_key ) ;
2014-06-30 16:01:31 +01:00
2019-07-16 15:17:31 +01:00
# define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer))
2019-09-18 06:57:30 +00:00
# define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U)
2019-07-16 15:17:31 +01:00
# define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer)
2019-01-31 14:58:57 +00:00
/*
* The behaviours of RPR and PMR registers differ depending on the value of
* SCR_EL3 . FIQ , and the behaviour of non - secure priority registers of the
* distributor and redistributors depends on whether security is enabled in the
* GIC .
*
* When security is enabled , non - secure priority values from the ( re ) distributor
* are presented to the GIC CPUIF as follow :
* ( GIC_ ( R ) DIST_PRI [ irq ] > > 1 ) | 0x80 ;
*
2021-01-21 18:22:52 +00:00
* If SCR_EL3 . FIQ = = 1 , the values written to / read from PMR and RPR at non - secure
2019-01-31 14:58:57 +00:00
* EL1 are subject to a similar operation thus matching the priorities presented
2020-09-12 16:37:07 +01:00
* from the ( re ) distributor when security is enabled . When SCR_EL3 . FIQ = = 0 ,
2021-01-21 18:22:52 +00:00
* these values are unchanged by the GIC .
2019-01-31 14:58:57 +00:00
*
* see GICv3 / GICv4 Architecture Specification ( IHI0069D ) :
* - section 4.8 .1 Non - secure accesses to register fields for Secure interrupt
* priorities .
* - Figure 4 - 7 Secure read of the priority field for a Non - secure Group 1
* interrupt .
*/
static DEFINE_STATIC_KEY_FALSE ( supports_pseudo_nmis ) ;
2020-09-12 16:37:07 +01:00
DEFINE_STATIC_KEY_FALSE ( gic_nonsecure_priorities ) ;
EXPORT_SYMBOL ( gic_nonsecure_priorities ) ;
2021-08-12 01:15:05 +08:00
/*
* When the Non - secure world has access to group 0 interrupts ( as a
* consequence of SCR_EL3 . FIQ = = 0 ) , reading the ICC_RPR_EL1 register will
* return the Distributor ' s view of the interrupt priority .
*
* When GIC security is enabled ( GICD_CTLR . DS = = 0 ) , the interrupt priority
* written by software is moved to the Non - secure range by the Distributor .
*
* If both are true ( which is when gic_nonsecure_priorities gets enabled ) ,
* we need to shift down the priority programmed by software to match it
* against the value returned by ICC_RPR_EL1 .
*/
# define GICD_INT_RPR_PRI(priority) \
( { \
u32 __priority = ( priority ) ; \
if ( static_branch_unlikely ( & gic_nonsecure_priorities ) ) \
__priority = 0x80 | ( __priority > > 1 ) ; \
\
__priority ; \
} )
2019-01-31 14:58:59 +00:00
/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */
2019-07-18 12:53:05 +01:00
static refcount_t * ppi_nmi_refs ;
2019-01-31 14:58:59 +00:00
2021-02-27 10:23:45 +00:00
static struct gic_kvm_info gic_v3_kvm_info __initdata ;
2017-10-06 10:24:00 -05:00
static DEFINE_PER_CPU ( bool , has_rss ) ;
2016-04-11 16:32:57 +01:00
2017-10-06 10:24:00 -05:00
# define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4)
2014-11-24 14:35:10 +00:00
# define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist))
# define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base)
2014-06-30 16:01:31 +01:00
# define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K)
/* Our default, arbitrary priority value. Linux only uses one anyway. */
# define DEFAULT_PMR_VALUE 0xf0
2019-07-16 14:41:40 +01:00
enum gic_intid_range {
2020-04-25 15:11:20 +01:00
SGI_RANGE ,
2019-07-16 14:41:40 +01:00
PPI_RANGE ,
SPI_RANGE ,
2019-07-18 13:19:25 +01:00
EPPI_RANGE ,
2019-07-16 15:17:31 +01:00
ESPI_RANGE ,
2019-07-16 14:41:40 +01:00
LPI_RANGE ,
__INVALID_RANGE__
} ;
static enum gic_intid_range __get_intid_range ( irq_hw_number_t hwirq )
{
switch ( hwirq ) {
2020-04-25 15:11:20 +01:00
case 0 . . . 15 :
return SGI_RANGE ;
2019-07-16 14:41:40 +01:00
case 16 . . . 31 :
return PPI_RANGE ;
case 32 . . . 1019 :
return SPI_RANGE ;
2019-07-18 13:19:25 +01:00
case EPPI_BASE_INTID . . . ( EPPI_BASE_INTID + 63 ) :
return EPPI_RANGE ;
2019-07-16 15:17:31 +01:00
case ESPI_BASE_INTID . . . ( ESPI_BASE_INTID + 1023 ) :
return ESPI_RANGE ;
2019-07-16 14:41:40 +01:00
case 8192 . . . GENMASK ( 23 , 0 ) :
return LPI_RANGE ;
default :
return __INVALID_RANGE__ ;
}
}
static enum gic_intid_range get_intid_range ( struct irq_data * d )
{
return __get_intid_range ( d - > hwirq ) ;
}
2014-06-30 16:01:31 +01:00
static inline unsigned int gic_irq ( struct irq_data * d )
{
return d - > hwirq ;
}
2020-04-25 15:11:20 +01:00
static inline bool gic_irq_in_rdist ( struct irq_data * d )
2014-06-30 16:01:31 +01:00
{
2020-04-25 15:11:20 +01:00
switch ( get_intid_range ( d ) ) {
case SGI_RANGE :
case PPI_RANGE :
case EPPI_RANGE :
return true ;
default :
return false ;
}
2014-06-30 16:01:31 +01:00
}
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
static inline void __iomem * gic_dist_base_alias ( struct irq_data * d )
{
if ( static_branch_unlikely ( & gic_nvidia_t241_erratum ) ) {
irq_hw_number_t hwirq = irqd_to_hwirq ( d ) ;
u32 chip ;
/*
* For the erratum T241 - FABRIC - 4 , read accesses to GICD_In { E }
* registers are directed to the chip that owns the SPI . The
* the alias region can also be used for writes to the
* GICD_In { E } except GICD_ICENABLERn . Each chip has support
* for 320 { E } SPIs . Mappings for all 4 chips :
* Chip0 = 32 - 351
* Chip1 = 352 - 671
* Chip2 = 672 - 991
* Chip3 = 4096 - 4415
*/
switch ( __get_intid_range ( hwirq ) ) {
case SPI_RANGE :
chip = ( hwirq - 32 ) / 320 ;
break ;
case ESPI_RANGE :
chip = 3 ;
break ;
default :
unreachable ( ) ;
}
return t241_dist_base_alias [ chip ] ;
}
return gic_data . dist_base ;
}
2014-06-30 16:01:31 +01:00
static inline void __iomem * gic_dist_base ( struct irq_data * d )
{
2019-07-16 14:41:40 +01:00
switch ( get_intid_range ( d ) ) {
2020-04-25 15:11:20 +01:00
case SGI_RANGE :
2019-07-16 14:41:40 +01:00
case PPI_RANGE :
2019-07-18 13:19:25 +01:00
case EPPI_RANGE :
2019-07-16 14:41:40 +01:00
/* SGI+PPI -> SGI_base for this CPU */
2014-06-30 16:01:31 +01:00
return gic_data_rdist_sgi_base ( ) ;
2019-07-16 14:41:40 +01:00
case SPI_RANGE :
2019-07-16 15:17:31 +01:00
case ESPI_RANGE :
2019-07-16 14:41:40 +01:00
/* SPI -> dist_base */
2014-06-30 16:01:31 +01:00
return gic_data . dist_base ;
2019-07-16 14:41:40 +01:00
default :
return NULL ;
}
2014-06-30 16:01:31 +01:00
}
2022-03-15 16:50:32 +00:00
static void gic_do_wait_for_rwp ( void __iomem * base , u32 bit )
2014-06-30 16:01:31 +01:00
{
u32 count = 1000000 ; /* 1s! */
2022-03-15 16:50:32 +00:00
while ( readl_relaxed ( base + GICD_CTLR ) & bit ) {
2014-06-30 16:01:31 +01:00
count - - ;
if ( ! count ) {
pr_err_ratelimited ( " RWP timeout, gone fishing \n " ) ;
return ;
}
cpu_relax ( ) ;
udelay ( 1 ) ;
2019-10-17 16:25:29 +08:00
}
2014-06-30 16:01:31 +01:00
}
/* Wait for completion of a distributor change */
static void gic_dist_wait_for_rwp ( void )
{
2022-03-15 16:50:32 +00:00
gic_do_wait_for_rwp ( gic_data . dist_base , GICD_CTLR_RWP ) ;
2014-06-30 16:01:31 +01:00
}
/* Wait for completion of a redistributor change */
static void gic_redist_wait_for_rwp ( void )
{
2022-03-15 16:50:32 +00:00
gic_do_wait_for_rwp ( gic_data_rdist_rd_base ( ) , GICR_CTLR_RWP ) ;
2014-06-30 16:01:31 +01:00
}
2015-10-01 13:47:14 +01:00
# ifdef CONFIG_ARM64
2015-09-21 22:58:35 +02:00
static u64 __maybe_unused gic_read_iar ( void )
{
2016-11-08 13:56:20 +00:00
if ( cpus_have_const_cap ( ARM64_WORKAROUND_CAVIUM_23154 ) )
2015-09-21 22:58:35 +02:00
return gic_read_iar_cavium_thunderx ( ) ;
else
return gic_read_iar_common ( ) ;
}
2015-10-01 13:47:14 +01:00
# endif
2014-06-30 16:01:31 +01:00
2014-08-26 16:03:34 +01:00
static void gic_enable_redist ( bool enable )
2014-06-30 16:01:31 +01:00
{
void __iomem * rbase ;
u32 count = 1000000 ; /* 1s! */
u32 val ;
2018-12-10 13:56:32 +00:00
if ( gic_data . flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996 )
return ;
2014-06-30 16:01:31 +01:00
rbase = gic_data_rdist_rd_base ( ) ;
val = readl_relaxed ( rbase + GICR_WAKER ) ;
2014-08-26 16:03:34 +01:00
if ( enable )
/* Wake up this CPU redistributor */
val & = ~ GICR_WAKER_ProcessorSleep ;
else
val | = GICR_WAKER_ProcessorSleep ;
2014-06-30 16:01:31 +01:00
writel_relaxed ( val , rbase + GICR_WAKER ) ;
2014-08-26 16:03:34 +01:00
if ( ! enable ) { /* Check that GICR_WAKER is writeable */
val = readl_relaxed ( rbase + GICR_WAKER ) ;
if ( ! ( val & GICR_WAKER_ProcessorSleep ) )
return ; /* No PM support in this redistributor */
}
2016-10-14 10:26:21 +03:00
while ( - - count ) {
2014-08-26 16:03:34 +01:00
val = readl_relaxed ( rbase + GICR_WAKER ) ;
2016-05-11 21:23:17 +02:00
if ( enable ^ ( bool ) ( val & GICR_WAKER_ChildrenAsleep ) )
2014-08-26 16:03:34 +01:00
break ;
2014-06-30 16:01:31 +01:00
cpu_relax ( ) ;
udelay ( 1 ) ;
2019-10-17 16:25:29 +08:00
}
2014-08-26 16:03:34 +01:00
if ( ! count )
pr_err_ratelimited ( " redistributor failed to %s... \n " ,
enable ? " wakeup " : " sleep " ) ;
2014-06-30 16:01:31 +01:00
}
/*
* Routines to disable , enable , EOI and route interrupts
*/
2019-07-16 14:41:40 +01:00
static u32 convert_offset_index ( struct irq_data * d , u32 offset , u32 * index )
{
switch ( get_intid_range ( d ) ) {
2020-04-25 15:11:20 +01:00
case SGI_RANGE :
2019-07-16 14:41:40 +01:00
case PPI_RANGE :
case SPI_RANGE :
* index = d - > hwirq ;
return offset ;
2019-07-18 13:19:25 +01:00
case EPPI_RANGE :
/*
* Contrary to the ESPI range , the EPPI range is contiguous
* to the PPI range in the registers , so let ' s adjust the
* displacement accordingly . Consistency is overrated .
*/
* index = d - > hwirq - EPPI_BASE_INTID + 32 ;
return offset ;
2019-07-16 15:17:31 +01:00
case ESPI_RANGE :
* index = d - > hwirq - ESPI_BASE_INTID ;
switch ( offset ) {
case GICD_ISENABLER :
return GICD_ISENABLERnE ;
case GICD_ICENABLER :
return GICD_ICENABLERnE ;
case GICD_ISPENDR :
return GICD_ISPENDRnE ;
case GICD_ICPENDR :
return GICD_ICPENDRnE ;
case GICD_ISACTIVER :
return GICD_ISACTIVERnE ;
case GICD_ICACTIVER :
return GICD_ICACTIVERnE ;
case GICD_IPRIORITYR :
return GICD_IPRIORITYRnE ;
case GICD_ICFGR :
return GICD_ICFGRnE ;
case GICD_IROUTER :
return GICD_IROUTERnE ;
default :
break ;
}
break ;
2019-07-16 14:41:40 +01:00
default :
break ;
}
WARN_ON ( 1 ) ;
* index = d - > hwirq ;
return offset ;
}
2015-03-18 11:01:24 +00:00
static int gic_peek_irq ( struct irq_data * d , u32 offset )
{
void __iomem * base ;
2019-07-16 14:41:40 +01:00
u32 index , mask ;
offset = convert_offset_index ( d , offset , & index ) ;
mask = 1 < < ( index % 32 ) ;
2015-03-18 11:01:24 +00:00
if ( gic_irq_in_rdist ( d ) )
base = gic_data_rdist_sgi_base ( ) ;
else
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
base = gic_dist_base_alias ( d ) ;
2015-03-18 11:01:24 +00:00
2019-07-16 14:41:40 +01:00
return ! ! ( readl_relaxed ( base + offset + ( index / 32 ) * 4 ) & mask ) ;
2015-03-18 11:01:24 +00:00
}
2014-06-30 16:01:31 +01:00
static void gic_poke_irq ( struct irq_data * d , u32 offset )
{
void __iomem * base ;
2019-07-16 14:41:40 +01:00
u32 index , mask ;
offset = convert_offset_index ( d , offset , & index ) ;
mask = 1 < < ( index % 32 ) ;
2014-06-30 16:01:31 +01:00
2022-04-05 19:38:57 +01:00
if ( gic_irq_in_rdist ( d ) )
2014-06-30 16:01:31 +01:00
base = gic_data_rdist_sgi_base ( ) ;
2022-04-05 19:38:57 +01:00
else
2014-06-30 16:01:31 +01:00
base = gic_data . dist_base ;
2019-07-16 14:41:40 +01:00
writel_relaxed ( mask , base + offset + ( index / 32 ) * 4 ) ;
2014-06-30 16:01:31 +01:00
}
static void gic_mask_irq ( struct irq_data * d )
{
gic_poke_irq ( d , GICD_ICENABLER ) ;
2022-04-05 19:38:57 +01:00
if ( gic_irq_in_rdist ( d ) )
gic_redist_wait_for_rwp ( ) ;
else
gic_dist_wait_for_rwp ( ) ;
2014-06-30 16:01:31 +01:00
}
2015-08-26 17:00:42 +01:00
static void gic_eoimode1_mask_irq ( struct irq_data * d )
{
gic_mask_irq ( d ) ;
2015-08-26 17:00:43 +01:00
/*
* When masking a forwarded interrupt , make sure it is
* deactivated as well .
*
* This ensures that an interrupt that is getting
* disabled / masked will not get " stuck " , because there is
* noone to deactivate it ( guest is being terminated ) .
*/
2015-09-15 13:19:16 +02:00
if ( irqd_is_forwarded_to_vcpu ( d ) )
2015-08-26 17:00:43 +01:00
gic_poke_irq ( d , GICD_ICACTIVER ) ;
2015-08-26 17:00:42 +01:00
}
2014-06-30 16:01:31 +01:00
static void gic_unmask_irq ( struct irq_data * d )
{
gic_poke_irq ( d , GICD_ISENABLER ) ;
}
2019-01-31 14:58:57 +00:00
static inline bool gic_supports_nmi ( void )
{
return IS_ENABLED ( CONFIG_ARM64_PSEUDO_NMI ) & &
static_branch_likely ( & supports_pseudo_nmis ) ;
}
2015-03-18 11:01:24 +00:00
static int gic_irq_set_irqchip_state ( struct irq_data * d ,
enum irqchip_irq_state which , bool val )
{
u32 reg ;
2020-04-25 15:24:01 +01:00
if ( d - > hwirq > = 8192 ) /* SGI/PPI/SPI only */
2015-03-18 11:01:24 +00:00
return - EINVAL ;
switch ( which ) {
case IRQCHIP_STATE_PENDING :
reg = val ? GICD_ISPENDR : GICD_ICPENDR ;
break ;
case IRQCHIP_STATE_ACTIVE :
reg = val ? GICD_ISACTIVER : GICD_ICACTIVER ;
break ;
case IRQCHIP_STATE_MASKED :
2022-04-05 19:38:57 +01:00
if ( val ) {
gic_mask_irq ( d ) ;
return 0 ;
}
reg = GICD_ISENABLER ;
2015-03-18 11:01:24 +00:00
break ;
default :
return - EINVAL ;
}
gic_poke_irq ( d , reg ) ;
return 0 ;
}
static int gic_irq_get_irqchip_state ( struct irq_data * d ,
enum irqchip_irq_state which , bool * val )
{
2019-07-16 15:17:31 +01:00
if ( d - > hwirq > = 8192 ) /* PPI/SPI only */
2015-03-18 11:01:24 +00:00
return - EINVAL ;
switch ( which ) {
case IRQCHIP_STATE_PENDING :
* val = gic_peek_irq ( d , GICD_ISPENDR ) ;
break ;
case IRQCHIP_STATE_ACTIVE :
* val = gic_peek_irq ( d , GICD_ISACTIVER ) ;
break ;
case IRQCHIP_STATE_MASKED :
* val = ! gic_peek_irq ( d , GICD_ISENABLER ) ;
break ;
default :
return - EINVAL ;
}
return 0 ;
}
2019-01-31 14:58:59 +00:00
static void gic_irq_set_prio ( struct irq_data * d , u8 prio )
{
void __iomem * base = gic_dist_base ( d ) ;
2019-07-16 14:41:40 +01:00
u32 offset , index ;
2019-01-31 14:58:59 +00:00
2019-07-16 14:41:40 +01:00
offset = convert_offset_index ( d , GICD_IPRIORITYR , & index ) ;
writeb_relaxed ( prio , base + offset + index ) ;
2019-01-31 14:58:59 +00:00
}
2021-07-29 17:27:47 +00:00
static u32 __gic_get_ppi_index ( irq_hw_number_t hwirq )
2019-07-18 12:53:05 +01:00
{
2021-07-29 17:27:47 +00:00
switch ( __get_intid_range ( hwirq ) ) {
2019-07-18 12:53:05 +01:00
case PPI_RANGE :
2021-07-29 17:27:47 +00:00
return hwirq - 16 ;
2019-07-18 13:19:25 +01:00
case EPPI_RANGE :
2021-07-29 17:27:47 +00:00
return hwirq - EPPI_BASE_INTID + 16 ;
2019-07-18 12:53:05 +01:00
default :
unreachable ( ) ;
}
}
2021-07-29 17:27:47 +00:00
static u32 gic_get_ppi_index ( struct irq_data * d )
{
return __gic_get_ppi_index ( d - > hwirq ) ;
}
2019-01-31 14:58:59 +00:00
static int gic_irq_nmi_setup ( struct irq_data * d )
{
struct irq_desc * desc = irq_to_desc ( d - > irq ) ;
if ( ! gic_supports_nmi ( ) )
return - EINVAL ;
if ( gic_peek_irq ( d , GICD_ISENABLER ) ) {
pr_err ( " Cannot set NMI property of enabled IRQ %u \n " , d - > irq ) ;
return - EINVAL ;
}
/*
* A secondary irq_chip should be in charge of LPI request ,
* it should not be possible to get there
*/
if ( WARN_ON ( gic_irq ( d ) > = 8192 ) )
return - EINVAL ;
/* desc lock should already be held */
2019-07-18 12:53:05 +01:00
if ( gic_irq_in_rdist ( d ) ) {
u32 idx = gic_get_ppi_index ( d ) ;
2019-01-31 14:58:59 +00:00
/* Setting up PPI as NMI, only switch handler for first NMI */
2019-07-18 12:53:05 +01:00
if ( ! refcount_inc_not_zero ( & ppi_nmi_refs [ idx ] ) ) {
refcount_set ( & ppi_nmi_refs [ idx ] , 1 ) ;
2019-01-31 14:58:59 +00:00
desc - > handle_irq = handle_percpu_devid_fasteoi_nmi ;
}
} else {
desc - > handle_irq = handle_fasteoi_nmi ;
}
gic_irq_set_prio ( d , GICD_INT_NMI_PRI ) ;
return 0 ;
}
static void gic_irq_nmi_teardown ( struct irq_data * d )
{
struct irq_desc * desc = irq_to_desc ( d - > irq ) ;
if ( WARN_ON ( ! gic_supports_nmi ( ) ) )
return ;
if ( gic_peek_irq ( d , GICD_ISENABLER ) ) {
pr_err ( " Cannot set NMI property of enabled IRQ %u \n " , d - > irq ) ;
return ;
}
/*
* A secondary irq_chip should be in charge of LPI request ,
* it should not be possible to get there
*/
if ( WARN_ON ( gic_irq ( d ) > = 8192 ) )
return ;
/* desc lock should already be held */
2019-07-18 12:53:05 +01:00
if ( gic_irq_in_rdist ( d ) ) {
u32 idx = gic_get_ppi_index ( d ) ;
2019-01-31 14:58:59 +00:00
/* Tearing down NMI, only switch handler for last NMI */
2019-07-18 12:53:05 +01:00
if ( refcount_dec_and_test ( & ppi_nmi_refs [ idx ] ) )
2019-01-31 14:58:59 +00:00
desc - > handle_irq = handle_percpu_devid_irq ;
} else {
desc - > handle_irq = handle_fasteoi_irq ;
}
gic_irq_set_prio ( d , GICD_INT_DEF_PRI ) ;
}
2023-07-04 17:50:34 +02:00
static bool gic_arm64_erratum_2941627_needed ( struct irq_data * d )
{
enum gic_intid_range range ;
if ( ! static_branch_unlikely ( & gic_arm64_2941627_erratum ) )
return false ;
range = get_intid_range ( d ) ;
/*
* The workaround is needed if the IRQ is an SPI and
* the target cpu is different from the one we are
* executing on .
*/
return ( range = = SPI_RANGE | | range = = ESPI_RANGE ) & &
! cpumask_test_cpu ( raw_smp_processor_id ( ) ,
irq_data_get_effective_affinity_mask ( d ) ) ;
}
2014-06-30 16:01:31 +01:00
static void gic_eoi_irq ( struct irq_data * d )
{
irqchip/gic-v3: Refactor ISB + EOIR at ack time
There are cases where a context synchronization event is necessary
between an IRQ being raised and being handled, and there are races such
that we cannot rely upon the exception entry being subsequent to the
interrupt being raised. To fix this, we place an ISB between a read of
IAR and the subsequent invocation of an IRQ handler.
When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking
its handler, and we have a write to EOIR for this. As this write to EOIR
requires an ISB, and this is provided by the gic_write_eoir() helper, we
omit the usual ISB in this case, with the logic being:
| if (static_branch_likely(&supports_deactivate_key))
| gic_write_eoir(irqnr);
| else
| isb();
This is somewhat opaque, and it would be a little clearer if there were
an unconditional ISB, with only the write to EOIR being conditional,
e.g.
| if (static_branch_likely(&supports_deactivate_key))
| write_gicreg(irqnr, ICC_EOIR1_EL1);
|
| isb();
This patch rewrites the code that way, with this logic factored into a
new helper function with comments explaining what the ISB is for, as
were originally laid out in commit:
39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq")
Note that since then, we removed the IAR polling in commit:
342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop")
... which removed one of the two race conditions.
For consistency, other portions of the driver are made to manipulate
EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir()
helper function is removed.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com
2022-05-13 14:30:37 +01:00
write_gicreg ( gic_irq ( d ) , ICC_EOIR1_EL1 ) ;
isb ( ) ;
2023-07-04 17:50:34 +02:00
if ( gic_arm64_erratum_2941627_needed ( d ) ) {
/*
* Make sure the GIC stream deactivate packet
* issued by ICC_EOIR1_EL1 has completed before
* deactivating through GICD_IACTIVER .
*/
dsb ( sy ) ;
gic_poke_irq ( d , GICD_ICACTIVER ) ;
}
2014-06-30 16:01:31 +01:00
}
2015-08-26 17:00:42 +01:00
static void gic_eoimode1_eoi_irq ( struct irq_data * d )
{
/*
2015-08-26 17:00:43 +01:00
* No need to deactivate an LPI , or an interrupt that
* is is getting forwarded to a vcpu .
2015-08-26 17:00:42 +01:00
*/
2015-09-15 13:19:16 +02:00
if ( gic_irq ( d ) > = 8192 | | irqd_is_forwarded_to_vcpu ( d ) )
2015-08-26 17:00:42 +01:00
return ;
2023-07-04 17:50:34 +02:00
if ( ! gic_arm64_erratum_2941627_needed ( d ) )
gic_write_dir ( gic_irq ( d ) ) ;
else
gic_poke_irq ( d , GICD_ICACTIVER ) ;
2015-08-26 17:00:42 +01:00
}
2014-06-30 16:01:31 +01:00
static int gic_set_type ( struct irq_data * d , unsigned int type )
{
2019-07-18 13:19:25 +01:00
enum gic_intid_range range ;
2014-06-30 16:01:31 +01:00
unsigned int irq = gic_irq ( d ) ;
void __iomem * base ;
2019-07-16 14:41:40 +01:00
u32 offset , index ;
2019-07-16 14:35:17 +01:00
int ret ;
2014-06-30 16:01:31 +01:00
2019-07-18 13:19:25 +01:00
range = get_intid_range ( d ) ;
2020-04-25 15:24:01 +01:00
/* Interrupt configuration for SGIs can't be changed */
if ( range = = SGI_RANGE )
return type ! = IRQ_TYPE_EDGE_RISING ? - EINVAL : 0 ;
2015-01-20 16:52:59 +00:00
/* SPIs have restrictions on the supported types */
2019-07-18 13:19:25 +01:00
if ( ( range = = SPI_RANGE | | range = = ESPI_RANGE ) & &
type ! = IRQ_TYPE_LEVEL_HIGH & & type ! = IRQ_TYPE_EDGE_RISING )
2014-06-30 16:01:31 +01:00
return - EINVAL ;
2022-04-05 19:38:57 +01:00
if ( gic_irq_in_rdist ( d ) )
2014-06-30 16:01:31 +01:00
base = gic_data_rdist_sgi_base ( ) ;
2022-04-05 19:38:57 +01:00
else
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
base = gic_dist_base_alias ( d ) ;
2014-06-30 16:01:31 +01:00
2019-07-16 14:41:40 +01:00
offset = convert_offset_index ( d , GICD_ICFGR , & index ) ;
2019-07-16 14:35:17 +01:00
2022-04-05 19:38:57 +01:00
ret = gic_configure_irq ( index , type , base + offset , NULL ) ;
2019-07-18 13:19:25 +01:00
if ( ret & & ( range = = PPI_RANGE | | range = = EPPI_RANGE ) ) {
2019-07-16 14:35:17 +01:00
/* Misconfigured PPIs are usually not fatal */
2019-07-18 13:19:25 +01:00
pr_warn ( " GIC: PPI INTID%d is secure or misconfigured \n " , irq ) ;
2019-07-16 14:35:17 +01:00
ret = 0 ;
}
return ret ;
2014-06-30 16:01:31 +01:00
}
2015-08-26 17:00:43 +01:00
static int gic_irq_set_vcpu_affinity ( struct irq_data * d , void * vcpu )
{
2020-04-25 15:24:01 +01:00
if ( get_intid_range ( d ) = = SGI_RANGE )
return - EINVAL ;
2015-09-15 13:19:16 +02:00
if ( vcpu )
irqd_set_forwarded_to_vcpu ( d ) ;
else
irqd_clr_forwarded_to_vcpu ( d ) ;
2015-08-26 17:00:43 +01:00
return 0 ;
}
2023-05-29 16:21:42 +01:00
static u64 gic_cpu_to_affinity ( int cpu )
2014-06-30 16:01:31 +01:00
{
2023-05-29 16:21:42 +01:00
u64 mpidr = cpu_logical_map ( cpu ) ;
2014-06-30 16:01:31 +01:00
u64 aff ;
2023-05-22 19:06:43 +08:00
/* ASR8601 needs to have its affinities shifted down... */
if ( unlikely ( gic_data . flags & FLAGS_WORKAROUND_ASR_ERRATUM_8601001 ) )
mpidr = ( MPIDR_AFFINITY_LEVEL ( mpidr , 1 ) |
( MPIDR_AFFINITY_LEVEL ( mpidr , 2 ) < < 8 ) ) ;
2015-10-01 13:47:15 +01:00
aff = ( ( u64 ) MPIDR_AFFINITY_LEVEL ( mpidr , 3 ) < < 32 |
2014-06-30 16:01:31 +01:00
MPIDR_AFFINITY_LEVEL ( mpidr , 2 ) < < 16 |
MPIDR_AFFINITY_LEVEL ( mpidr , 1 ) < < 8 |
MPIDR_AFFINITY_LEVEL ( mpidr , 0 ) ) ;
return aff ;
}
2019-01-31 14:58:58 +00:00
static void gic_deactivate_unhandled ( u32 irqnr )
{
if ( static_branch_likely ( & supports_deactivate_key ) ) {
if ( irqnr < 8192 )
gic_write_dir ( irqnr ) ;
} else {
irqchip/gic-v3: Refactor ISB + EOIR at ack time
There are cases where a context synchronization event is necessary
between an IRQ being raised and being handled, and there are races such
that we cannot rely upon the exception entry being subsequent to the
interrupt being raised. To fix this, we place an ISB between a read of
IAR and the subsequent invocation of an IRQ handler.
When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking
its handler, and we have a write to EOIR for this. As this write to EOIR
requires an ISB, and this is provided by the gic_write_eoir() helper, we
omit the usual ISB in this case, with the logic being:
| if (static_branch_likely(&supports_deactivate_key))
| gic_write_eoir(irqnr);
| else
| isb();
This is somewhat opaque, and it would be a little clearer if there were
an unconditional ISB, with only the write to EOIR being conditional,
e.g.
| if (static_branch_likely(&supports_deactivate_key))
| write_gicreg(irqnr, ICC_EOIR1_EL1);
|
| isb();
This patch rewrites the code that way, with this logic factored into a
new helper function with comments explaining what the ISB is for, as
were originally laid out in commit:
39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq")
Note that since then, we removed the IAR polling in commit:
342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop")
... which removed one of the two race conditions.
For consistency, other portions of the driver are made to manipulate
EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir()
helper function is removed.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com
2022-05-13 14:30:37 +01:00
write_gicreg ( irqnr , ICC_EOIR1_EL1 ) ;
isb ( ) ;
2019-01-31 14:58:58 +00:00
}
}
irqchip/gic-v3: Refactor ISB + EOIR at ack time
There are cases where a context synchronization event is necessary
between an IRQ being raised and being handled, and there are races such
that we cannot rely upon the exception entry being subsequent to the
interrupt being raised. To fix this, we place an ISB between a read of
IAR and the subsequent invocation of an IRQ handler.
When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking
its handler, and we have a write to EOIR for this. As this write to EOIR
requires an ISB, and this is provided by the gic_write_eoir() helper, we
omit the usual ISB in this case, with the logic being:
| if (static_branch_likely(&supports_deactivate_key))
| gic_write_eoir(irqnr);
| else
| isb();
This is somewhat opaque, and it would be a little clearer if there were
an unconditional ISB, with only the write to EOIR being conditional,
e.g.
| if (static_branch_likely(&supports_deactivate_key))
| write_gicreg(irqnr, ICC_EOIR1_EL1);
|
| isb();
This patch rewrites the code that way, with this logic factored into a
new helper function with comments explaining what the ISB is for, as
were originally laid out in commit:
39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq")
Note that since then, we removed the IAR polling in commit:
342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop")
... which removed one of the two race conditions.
For consistency, other portions of the driver are made to manipulate
EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir()
helper function is removed.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com
2022-05-13 14:30:37 +01:00
/*
* Follow a read of the IAR with any HW maintenance that needs to happen prior
* to invoking the relevant IRQ handler . We must do two things :
*
* ( 1 ) Ensure instruction ordering between a read of IAR and subsequent
* instructions in the IRQ handler using an ISB .
*
* It is possible for the IAR to report an IRQ which was signalled * after *
* the CPU took an IRQ exception as multiple interrupts can race to be
* recognized by the GIC , earlier interrupts could be withdrawn , and / or
* later interrupts could be prioritized by the GIC .
*
* For devices which are tightly coupled to the CPU , such as PMUs , a
* context synchronization event is necessary to ensure that system
* register state is not stale , as these may have been indirectly written
* * after * exception entry .
*
* ( 2 ) Deactivate the interrupt when EOI mode 1 is in use .
*/
static inline void gic_complete_ack ( u32 irqnr )
2019-01-31 14:58:58 +00:00
{
if ( static_branch_likely ( & supports_deactivate_key ) )
irqchip/gic-v3: Refactor ISB + EOIR at ack time
There are cases where a context synchronization event is necessary
between an IRQ being raised and being handled, and there are races such
that we cannot rely upon the exception entry being subsequent to the
interrupt being raised. To fix this, we place an ISB between a read of
IAR and the subsequent invocation of an IRQ handler.
When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking
its handler, and we have a write to EOIR for this. As this write to EOIR
requires an ISB, and this is provided by the gic_write_eoir() helper, we
omit the usual ISB in this case, with the logic being:
| if (static_branch_likely(&supports_deactivate_key))
| gic_write_eoir(irqnr);
| else
| isb();
This is somewhat opaque, and it would be a little clearer if there were
an unconditional ISB, with only the write to EOIR being conditional,
e.g.
| if (static_branch_likely(&supports_deactivate_key))
| write_gicreg(irqnr, ICC_EOIR1_EL1);
|
| isb();
This patch rewrites the code that way, with this logic factored into a
new helper function with comments explaining what the ISB is for, as
were originally laid out in commit:
39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq")
Note that since then, we removed the IAR polling in commit:
342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop")
... which removed one of the two race conditions.
For consistency, other portions of the driver are made to manipulate
EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir()
helper function is removed.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com
2022-05-13 14:30:37 +01:00
write_gicreg ( irqnr , ICC_EOIR1_EL1 ) ;
2019-06-11 10:38:09 +01:00
irqchip/gic-v3: Refactor ISB + EOIR at ack time
There are cases where a context synchronization event is necessary
between an IRQ being raised and being handled, and there are races such
that we cannot rely upon the exception entry being subsequent to the
interrupt being raised. To fix this, we place an ISB between a read of
IAR and the subsequent invocation of an IRQ handler.
When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking
its handler, and we have a write to EOIR for this. As this write to EOIR
requires an ISB, and this is provided by the gic_write_eoir() helper, we
omit the usual ISB in this case, with the logic being:
| if (static_branch_likely(&supports_deactivate_key))
| gic_write_eoir(irqnr);
| else
| isb();
This is somewhat opaque, and it would be a little clearer if there were
an unconditional ISB, with only the write to EOIR being conditional,
e.g.
| if (static_branch_likely(&supports_deactivate_key))
| write_gicreg(irqnr, ICC_EOIR1_EL1);
|
| isb();
This patch rewrites the code that way, with this logic factored into a
new helper function with comments explaining what the ISB is for, as
were originally laid out in commit:
39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq")
Note that since then, we removed the IAR polling in commit:
342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop")
... which removed one of the two race conditions.
For consistency, other portions of the driver are made to manipulate
EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir()
helper function is removed.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com
2022-05-13 14:30:37 +01:00
isb ( ) ;
2019-01-31 14:58:58 +00:00
}
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
static bool gic_rpr_is_nmi_prio ( void )
2021-06-10 15:13:46 +01:00
{
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
if ( ! gic_supports_nmi ( ) )
return false ;
2019-01-31 14:58:58 +00:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
return unlikely ( gic_read_rpr ( ) = = GICD_INT_RPR_PRI ( GICD_INT_NMI_PRI ) ) ;
}
2021-06-10 15:13:46 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
static bool gic_irqnr_is_special ( u32 irqnr )
{
return irqnr > = 1020 & & irqnr < = 1023 ;
}
2021-06-10 15:13:46 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
static void __gic_handle_irq ( u32 irqnr , struct pt_regs * regs )
{
if ( gic_irqnr_is_special ( irqnr ) )
return ;
2021-06-10 15:13:46 +01:00
irqchip/gic-v3: Refactor ISB + EOIR at ack time
There are cases where a context synchronization event is necessary
between an IRQ being raised and being handled, and there are races such
that we cannot rely upon the exception entry being subsequent to the
interrupt being raised. To fix this, we place an ISB between a read of
IAR and the subsequent invocation of an IRQ handler.
When EOI mode 1 is in use, we need to EOI an interrupt prior to invoking
its handler, and we have a write to EOIR for this. As this write to EOIR
requires an ISB, and this is provided by the gic_write_eoir() helper, we
omit the usual ISB in this case, with the logic being:
| if (static_branch_likely(&supports_deactivate_key))
| gic_write_eoir(irqnr);
| else
| isb();
This is somewhat opaque, and it would be a little clearer if there were
an unconditional ISB, with only the write to EOIR being conditional,
e.g.
| if (static_branch_likely(&supports_deactivate_key))
| write_gicreg(irqnr, ICC_EOIR1_EL1);
|
| isb();
This patch rewrites the code that way, with this logic factored into a
new helper function with comments explaining what the ISB is for, as
were originally laid out in commit:
39a06b67c2c1256b ("irqchip/gic: Ensure we have an ISB between ack and ->handle_irq")
Note that since then, we removed the IAR polling in commit:
342677d70ab92142 ("irqchip/gic-v3: Remove acknowledge loop")
... which removed one of the two race conditions.
For consistency, other portions of the driver are made to manipulate
EOIR using write_gicreg() and explcit ISBs, and the gic_write_eoir()
helper function is removed.
There should be no functional change as a result of this patch.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will.deacon@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-3-mark.rutland@arm.com
2022-05-13 14:30:37 +01:00
gic_complete_ack ( irqnr ) ;
2021-06-10 15:13:46 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
if ( generic_handle_domain_irq ( gic_data . domain , irqnr ) ) {
WARN_ONCE ( true , " Unexpected interrupt (irqnr %u) \n " , irqnr ) ;
2019-01-31 14:58:58 +00:00
gic_deactivate_unhandled ( irqnr ) ;
2021-06-10 15:13:46 +01:00
}
2019-01-31 14:58:58 +00:00
}
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
static void __gic_handle_nmi ( u32 irqnr , struct pt_regs * regs )
2021-06-10 15:13:46 +01:00
{
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
if ( gic_irqnr_is_special ( irqnr ) )
return ;
2021-06-10 15:13:46 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
gic_complete_ack ( irqnr ) ;
2021-06-10 15:13:46 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
if ( generic_handle_domain_nmi ( gic_data . domain , irqnr ) ) {
WARN_ONCE ( true , " Unexpected pseudo-NMI (irqnr %u) \n " , irqnr ) ;
gic_deactivate_unhandled ( irqnr ) ;
2021-06-10 15:13:46 +01:00
}
}
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
/*
* An exception has been taken from a context with IRQs enabled , and this could
* be an IRQ or an NMI .
*
* The entry code called us with DAIF . IF set to keep NMIs masked . We must clear
* DAIF . IF ( and update ICC_PMR_EL1 to mask regular IRQs ) prior to returning ,
* after handling any NMI but before handling any IRQ .
*
* The entry code has performed IRQ entry , and if an NMI is detected we must
* perform NMI entry / exit around invoking the handler .
*/
static void __gic_handle_irq_from_irqson ( struct pt_regs * regs )
2014-06-30 16:01:31 +01:00
{
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
bool is_nmi ;
2015-10-01 13:47:15 +01:00
u32 irqnr ;
2014-06-30 16:01:31 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
irqnr = gic_read_iar ( ) ;
2014-06-30 16:01:31 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
is_nmi = gic_rpr_is_nmi_prio ( ) ;
irqchip/gic-v3: Do not enable irqs when handling spurious interrups
We triggered the following error while running our 4.19 kernel
with the pseudo-NMI patches backported to it:
[ 14.816231] ------------[ cut here ]------------
[ 14.816231] kernel BUG at irq.c:99!
[ 14.816232] Internal error: Oops - BUG: 0 [#1] SMP
[ 14.816232] Process swapper/0 (pid: 0, stack limit = 0x(____ptrval____))
[ 14.816233] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 4.19.95.aarch64 #14
[ 14.816233] Hardware name: evb (DT)
[ 14.816234] pstate: 80400085 (Nzcv daIf +PAN -UAO)
[ 14.816234] pc : asm_nmi_enter+0x94/0x98
[ 14.816235] lr : asm_nmi_enter+0x18/0x98
[ 14.816235] sp : ffff000008003c50
[ 14.816235] pmr_save: 00000070
[ 14.816237] x29: ffff000008003c50 x28: ffff0000095f56c0
[ 14.816238] x27: 0000000000000000 x26: ffff000008004000
[ 14.816239] x25: 00000000015e0000 x24: ffff8008fb916000
[ 14.816240] x23: 0000000020400005 x22: ffff0000080817cc
[ 14.816241] x21: ffff000008003da0 x20: 0000000000000060
[ 14.816242] x19: 00000000000003ff x18: ffffffffffffffff
[ 14.816243] x17: 0000000000000008 x16: 003d090000000000
[ 14.816244] x15: ffff0000095ea6c8 x14: ffff8008fff5ab40
[ 14.816244] x13: ffff8008fff58b9d x12: 0000000000000000
[ 14.816245] x11: ffff000008c8a200 x10: 000000008e31fca5
[ 14.816246] x9 : ffff000008c8a208 x8 : 000000000000000f
[ 14.816247] x7 : 0000000000000004 x6 : ffff8008fff58b9e
[ 14.816248] x5 : 0000000000000000 x4 : 0000000080000000
[ 14.816249] x3 : 0000000000000000 x2 : 0000000080000000
[ 14.816250] x1 : 0000000000120000 x0 : ffff0000095f56c0
[ 14.816251] Call trace:
[ 14.816251] asm_nmi_enter+0x94/0x98
[ 14.816251] el1_irq+0x8c/0x180 (IRQ C)
[ 14.816252] gic_handle_irq+0xbc/0x2e4
[ 14.816252] el1_irq+0xcc/0x180 (IRQ B)
[ 14.816253] arch_timer_handler_virt+0x38/0x58
[ 14.816253] handle_percpu_devid_irq+0x90/0x240
[ 14.816253] generic_handle_irq+0x34/0x50
[ 14.816254] __handle_domain_irq+0x68/0xc0
[ 14.816254] gic_handle_irq+0xf8/0x2e4
[ 14.816255] el1_irq+0xcc/0x180 (IRQ A)
[ 14.816255] arch_cpu_idle+0x34/0x1c8
[ 14.816255] default_idle_call+0x24/0x44
[ 14.816256] do_idle+0x1d0/0x2c8
[ 14.816256] cpu_startup_entry+0x28/0x30
[ 14.816256] rest_init+0xb8/0xc8
[ 14.816257] start_kernel+0x4c8/0x4f4
[ 14.816257] Code: 940587f1 d5384100 b9401001 36a7fd01 (d4210000)
[ 14.816258] Modules linked in: start_dp(O) smeth(O)
[ 15.103092] ---[ end trace 701753956cb14aa8 ]---
[ 15.103093] Kernel panic - not syncing: Fatal exception in interrupt
[ 15.103099] SMP: stopping secondary CPUs
[ 15.103100] Kernel Offset: disabled
[ 15.103100] CPU features: 0x36,a2400218
[ 15.103100] Memory Limit: none
which is cause by a 'BUG_ON(in_nmi())' in nmi_enter().
From the call trace, we can find three interrupts (noted A, B, C above):
interrupt (A) is preempted by (B), which is further interrupted by (C).
Subsequent investigations show that (B) results in nmi_enter() being
called, but that it actually is a spurious interrupt. Furthermore,
interrupts are reenabled in the context of (B), and (C) fires with
NMI priority. We end-up with a nested NMI situation, something
we definitely do not want to (and cannot) handle.
The bug here is that spurious interrupts should never result in any
state change, and we should just return to the interrupted context.
Moving the handling of spurious interrupts as early as possible in
the GICv3 handler fixes this issue.
Fixes: 3f1f3234bc2d ("irqchip/gic-v3: Switch to PMR masking before calling IRQ handler")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: He Ying <heying24@huawei.com>
[maz: rewrote commit message, corrected Fixes: tag]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20210423083516.170111-1-heying24@huawei.com
Cc: stable@vger.kernel.org
2021-04-23 04:35:16 -04:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
if ( is_nmi ) {
nmi_enter ( ) ;
__gic_handle_nmi ( irqnr , regs ) ;
nmi_exit ( ) ;
2019-01-31 14:58:58 +00:00
}
2019-01-31 14:58:44 +00:00
if ( gic_prio_masking_enabled ( ) ) {
gic_pmr_mask_irqs ( ) ;
gic_arch_enable_irqs ( ) ;
}
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
if ( ! is_nmi )
__gic_handle_irq ( irqnr , regs ) ;
}
2020-04-25 15:24:01 +01:00
irqchip/gic-v3: Fix priority mask handling
When a kernel is built with CONFIG_ARM64_PSEUDO_NMI=y and pseudo-NMIs
are enabled at runtime, GICv3's gic_handle_irq() can leave DAIF and
ICC_PMR_EL1 in an unexpected state in some cases, breaking subsequent
usage of local_irq_enable() and resulting in softirqs being run with
IRQs erroneously masked (possibly resulting in deadlocks).
This can happen when an IRQ exception is taken from a context where
regular IRQs were unmasked, and either:
(1) ICC_IAR1_EL1 indicates a special INTID (e.g. as a result of an IRQ
being withdrawn since the IRQ exception was taken).
(2) ICC_IAR1_EL1 and ICC_RPR_EL1 indicate an NMI was acknowledged.
When an NMI is taken from a context where regular IRQs were masked,
there is no problem.
When CONFIG_ARM64_DEBUG_PRIORITY_MASKING=y, this can be detected with
perf, e.g.
| # ./perf record -a -g -e cycles:k ls -alR / > /dev/null 2>&1
| ------------[ cut here ]------------
| WARNING: CPU: 0 PID: 14 at arch/arm64/include/asm/irqflags.h:32 arch_local_irq_enable+0x4c/0x6c
| Modules linked in:
| CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 5.18.0-rc5-00004-g876c38e3d20b #12
| Hardware name: linux,dummy-virt (DT)
| pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
| pc : arch_local_irq_enable+0x4c/0x6c
| lr : __do_softirq+0x110/0x5d8
| sp : ffff8000080bbbc0
| pmr_save: 000000f0
| x29: ffff8000080bbbc0 x28: ffff316ac3a6ca40 x27: 0000000000000000
| x26: 0000000000000000 x25: ffffa04611c06008 x24: ffffa04611c06008
| x23: 0000000040400005 x22: 0000000000000200 x21: ffff8000080bbe20
| x20: ffffa0460fe10320 x19: 0000000000000009 x18: 0000000000000000
| x17: ffff91252dfa9000 x16: ffff800008004000 x15: 0000000000004000
| x14: 0000000000000028 x13: ffffa0460fe17578 x12: ffffa0460fed4294
| x11: ffffa0460fedc168 x10: ffffffffffffff80 x9 : ffffa0460fe10a70
| x8 : ffffa0460fedc168 x7 : 000000000000b762 x6 : 00000000057c3bdf
| x5 : ffff8000080bbb18 x4 : 0000000000000000 x3 : 0000000000000001
| x2 : ffff91252dfa9000 x1 : 0000000000000060 x0 : 00000000000000f0
| Call trace:
| arch_local_irq_enable+0x4c/0x6c
| __irq_exit_rcu+0x180/0x1ac
| irq_exit_rcu+0x1c/0x44
| el1_interrupt+0x4c/0xe4
| el1h_64_irq_handler+0x18/0x24
| el1h_64_irq+0x74/0x78
| smpboot_thread_fn+0x68/0x2c0
| kthread+0x124/0x130
| ret_from_fork+0x10/0x20
| irq event stamp: 193241
| hardirqs last enabled at (193240): [<ffffa0460fe10a9c>] __do_softirq+0x10c/0x5d8
| hardirqs last disabled at (193241): [<ffffa0461102ffe4>] el1_dbg+0x24/0x90
| softirqs last enabled at (193234): [<ffffa0460fe10e00>] __do_softirq+0x470/0x5d8
| softirqs last disabled at (193239): [<ffffa0460fea9944>] __irq_exit_rcu+0x180/0x1ac
| ---[ end trace 0000000000000000 ]---
The necessary manipulation of DAIF and ICC_PMR_EL1 depends on the
interrupted context, but the structure of gic_handle_irq() makes this
also depend on whether the GIC reports an IRQ, NMI, or special INTID:
* When the interrupted context had regular IRQs masked (and hence the
interrupt must be an NMI), the entry code performs the NMI
entry/exit and gic_handle_irq() should return with DAIF and
ICC_PMR_EL1 unchanged.
This is handled correctly today.
* When the interrupted context had regular IRQs unmasked, the entry code
performs IRQ entry/exit, but expects gic_handle_irq() to always update
ICC_PMR_EL1 and DAIF.IF to unmask NMIs (but not regular IRQs) prior to
returning (which it must do prior to invoking any regular IRQ
handler).
This unbalanced calling convention is necessary because we don't know
whether an NMI has been taken until acknowledged by a read from
ICC_IAR1_EL1, and so we need to perform the read with NMI masked in
case an NMI has been taken (and needs to be handled with NMIs masked).
Unfortunately, this is not handled consistently:
- When ICC_IAR1_EL1 reports a special INTID, gic_handle_irq() returns
immediately without manipulating ICC_PMR_EL1 and DAIF.
- When RPR_EL1 indicates an NMI, gic_handle_irq() calls
gic_handle_nmi() to invoke the NMI handler, then returns without
manipulating ICC_PMR_EL1 and DAIF.
- For regular IRQs, gic_handle_irq() manipulates ICC_PMR_EL1 and DAIF
prior to invoking the IRQ handler.
There were related problems with special INTID handling in the past,
where if an exception was taken from a context with regular IRQs masked
and ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would
erroneously unmask NMIs in NMI context permitted an unexpected nested
NMI. That case specifically was fixed by commit:
a97709f563a078e2 ("irqchip/gic-v3: Do not enable irqs when handling spurious interrups")
... but unfortunately that commit added an inverse problem, where if an
exception was taken from a context with regular IRQs *unmasked* and
ICC_IAR_EL1 reported a special INTID, gic_handle_irq() would erroneously
fail to unmask NMIs (and consequently regular IRQs could not be
unmasked during softirq processing). Before and after that commit, if an
NMI was taken from a context with regular IRQs unmasked gic_handle_irq()
would not unmask NMIs prior to returning, leading to the same problem
with softirq handling.
This patch fixes this by restructuring gic_handle_irq(), splitting it
into separate irqson/irqsoff helper functions which consistently perform
the DAIF + ICC_PMR1_EL1 manipulation based upon the interrupted context,
regardless of the event indicated by ICC_IAR1_EL1.
The special INTID handling is moved into the low-level IRQ/NMI handler
invocation helper functions, so that early returns don't prevent the
required manipulation of DAIF + ICC_PMR_EL1.
Fixes: f32c926651dcd168 ("irqchip/gic-v3: Handle pseudo-NMIs")
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220513133038.226182-4-mark.rutland@arm.com
2022-05-13 14:30:38 +01:00
/*
* An exception has been taken from a context with IRQs disabled , which can only
* be an NMI .
*
* The entry code called us with DAIF . IF set to keep NMIs masked . We must leave
* DAIF . IF ( and ICC_PMR_EL1 ) unchanged .
*
* The entry code has performed NMI entry .
*/
static void __gic_handle_irq_from_irqsoff ( struct pt_regs * regs )
{
u64 pmr ;
u32 irqnr ;
/*
* We were in a context with IRQs disabled . However , the
* entry code has set PMR to a value that allows any
* interrupt to be acknowledged , and not just NMIs . This can
* lead to surprising effects if the NMI has been retired in
* the meantime , and that there is an IRQ pending . The IRQ
* would then be taken in NMI context , something that nobody
* wants to debug twice .
*
* Until we sort this , drop PMR again to a level that will
* actually only allow NMIs before reading IAR , and then
* restore it to what it was .
*/
pmr = gic_read_pmr ( ) ;
gic_pmr_mask_irqs ( ) ;
isb ( ) ;
irqnr = gic_read_iar ( ) ;
gic_write_pmr ( pmr ) ;
__gic_handle_nmi ( irqnr , regs ) ;
}
static asmlinkage void __exception_irq_entry gic_handle_irq ( struct pt_regs * regs )
{
if ( unlikely ( gic_supports_nmi ( ) & & ! interrupts_enabled ( regs ) ) )
__gic_handle_irq_from_irqsoff ( regs ) ;
else
__gic_handle_irq_from_irqson ( regs ) ;
2014-06-30 16:01:31 +01:00
}
2019-01-31 14:58:54 +00:00
static u32 gic_get_pribits ( void )
{
u32 pribits ;
pribits = gic_read_ctlr ( ) ;
pribits & = ICC_CTLR_EL1_PRI_BITS_MASK ;
pribits > > = ICC_CTLR_EL1_PRI_BITS_SHIFT ;
pribits + + ;
return pribits ;
}
static bool gic_has_group0 ( void )
{
u32 val ;
2019-01-31 14:58:55 +00:00
u32 old_pmr ;
old_pmr = gic_read_pmr ( ) ;
2019-01-31 14:58:54 +00:00
/*
* Let ' s find out if Group0 is under control of EL3 or not by
* setting the highest possible , non - zero priority in PMR .
*
* If SCR_EL3 . FIQ is set , the priority gets shifted down in
* order for the CPU interface to set bit 7 , and keep the
* actual priority in the non - secure range . In the process , it
* looses the least significant bit and the actual priority
* becomes 0x80 . Reading it back returns 0 , indicating that
* we ' re don ' t have access to Group0 .
*/
gic_write_pmr ( BIT ( 8 - gic_get_pribits ( ) ) ) ;
val = gic_read_pmr ( ) ;
2019-01-31 14:58:55 +00:00
gic_write_pmr ( old_pmr ) ;
2019-01-31 14:58:54 +00:00
return val ! = 0 ;
}
2014-06-30 16:01:31 +01:00
static void __init gic_dist_init ( void )
{
unsigned int i ;
u64 affinity ;
void __iomem * base = gic_data . dist_base ;
2020-03-04 20:33:08 +00:00
u32 val ;
2014-06-30 16:01:31 +01:00
/* Disable the distributor */
writel_relaxed ( 0 , base + GICD_CTLR ) ;
gic_dist_wait_for_rwp ( ) ;
2016-05-06 19:41:56 +01:00
/*
* Configure SPIs as non - secure Group - 1. This will only matter
* if the GIC only has a single security state . This will not
* do the right thing if the kernel is running in secure mode ,
* but that ' s not the intended use case anyway .
*/
2019-07-16 15:17:31 +01:00
for ( i = 32 ; i < GIC_LINE_NR ; i + = 32 )
2016-05-06 19:41:56 +01:00
writel_relaxed ( ~ 0 , base + GICD_IGROUPR + i / 8 ) ;
2019-07-16 15:17:31 +01:00
/* Extended SPI range, not handled by the GICv2/GICv3 common code */
for ( i = 0 ; i < GIC_ESPI_NR ; i + = 32 ) {
writel_relaxed ( ~ 0U , base + GICD_ICENABLERnE + i / 8 ) ;
writel_relaxed ( ~ 0U , base + GICD_ICACTIVERnE + i / 8 ) ;
}
for ( i = 0 ; i < GIC_ESPI_NR ; i + = 32 )
writel_relaxed ( ~ 0U , base + GICD_IGROUPRnE + i / 8 ) ;
for ( i = 0 ; i < GIC_ESPI_NR ; i + = 16 )
writel_relaxed ( 0 , base + GICD_ICFGRnE + i / 4 ) ;
for ( i = 0 ; i < GIC_ESPI_NR ; i + = 4 )
writel_relaxed ( GICD_INT_DEF_PRI_X4 , base + GICD_IPRIORITYRnE + i ) ;
2022-04-05 19:38:57 +01:00
/* Now do the common stuff */
gic_dist_config ( base , GIC_LINE_NR , NULL ) ;
2014-06-30 16:01:31 +01:00
2020-03-04 20:33:08 +00:00
val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1 ;
if ( gic_data . rdists . gicd_typer2 & GICD_TYPER2_nASSGIcap ) {
pr_info ( " Enabling SGIs without active state \n " ) ;
val | = GICD_CTLR_nASSGIreq ;
}
2022-04-05 19:38:57 +01:00
/* Enable distributor with ARE, Group1, and wait for it to drain */
2020-03-04 20:33:08 +00:00
writel_relaxed ( val , base + GICD_CTLR ) ;
2022-04-05 19:38:57 +01:00
gic_dist_wait_for_rwp ( ) ;
2014-06-30 16:01:31 +01:00
/*
* Set all global interrupts to the boot CPU only . ARE must be
* enabled .
*/
2023-05-29 16:21:42 +01:00
affinity = gic_cpu_to_affinity ( smp_processor_id ( ) ) ;
2019-07-16 15:17:31 +01:00
for ( i = 32 ; i < GIC_LINE_NR ; i + + )
2015-10-01 13:47:16 +01:00
gic_write_irouter ( affinity , base + GICD_IROUTER + i * 8 ) ;
2019-07-16 15:17:31 +01:00
for ( i = 0 ; i < GIC_ESPI_NR ; i + + )
gic_write_irouter ( affinity , base + GICD_IROUTERnE + i * 8 ) ;
2014-06-30 16:01:31 +01:00
}
2016-12-19 17:00:38 +00:00
static int gic_iterate_rdists ( int ( * fn ) ( struct redist_region * , void __iomem * ) )
2014-06-30 16:01:31 +01:00
{
2016-12-19 17:00:38 +00:00
int ret = - ENODEV ;
2014-06-30 16:01:31 +01:00
int i ;
2014-11-24 14:35:10 +00:00
for ( i = 0 ; i < gic_data . nr_redist_regions ; i + + ) {
void __iomem * ptr = gic_data . redist_regions [ i ] . redist_base ;
2016-12-19 17:00:38 +00:00
u64 typer ;
2014-06-30 16:01:31 +01:00
u32 reg ;
reg = readl_relaxed ( ptr + GICR_PIDR2 ) & GIC_PIDR2_ARCH_MASK ;
if ( reg ! = GIC_PIDR2_ARCH_GICv3 & &
reg ! = GIC_PIDR2_ARCH_GICv4 ) { /* We're in trouble... */
pr_warn ( " No redistributor present @%p \n " , ptr ) ;
break ;
}
do {
2015-10-01 13:47:16 +01:00
typer = gic_read_typer ( ptr + GICR_TYPER ) ;
2016-12-19 17:00:38 +00:00
ret = fn ( gic_data . redist_regions + i , ptr ) ;
if ( ! ret )
2014-06-30 16:01:31 +01:00
return 0 ;
2016-01-19 14:11:16 +01:00
if ( gic_data . redist_regions [ i ] . single_redist )
break ;
2014-06-30 16:01:31 +01:00
if ( gic_data . redist_stride ) {
ptr + = gic_data . redist_stride ;
} else {
ptr + = SZ_64K * 2 ; /* Skip RD_base + SGI_base */
if ( typer & GICR_TYPER_VLPIS )
ptr + = SZ_64K * 2 ; /* Skip VLPI_base + reserved page */
}
} while ( ! ( typer & GICR_TYPER_LAST ) ) ;
}
2016-12-19 17:00:38 +00:00
return ret ? - ENODEV : 0 ;
}
static int __gic_populate_rdist ( struct redist_region * region , void __iomem * ptr )
{
2023-05-29 16:21:42 +01:00
unsigned long mpidr ;
2016-12-19 17:00:38 +00:00
u64 typer ;
u32 aff ;
/*
* Convert affinity to a 32 bit value that can be matched to
* GICR_TYPER bits [ 63 : 32 ] .
*/
2023-05-29 16:21:42 +01:00
mpidr = gic_cpu_to_affinity ( smp_processor_id ( ) ) ;
2016-12-19 17:00:38 +00:00
aff = ( MPIDR_AFFINITY_LEVEL ( mpidr , 3 ) < < 24 |
MPIDR_AFFINITY_LEVEL ( mpidr , 2 ) < < 16 |
MPIDR_AFFINITY_LEVEL ( mpidr , 1 ) < < 8 |
MPIDR_AFFINITY_LEVEL ( mpidr , 0 ) ) ;
typer = gic_read_typer ( ptr + GICR_TYPER ) ;
if ( ( typer > > 32 ) = = aff ) {
u64 offset = ptr - region - > redist_base ;
2020-03-04 20:33:12 +00:00
raw_spin_lock_init ( & gic_data_rdist ( ) - > rd_lock ) ;
2016-12-19 17:00:38 +00:00
gic_data_rdist_rd_base ( ) = ptr ;
gic_data_rdist ( ) - > phys_base = region - > phys_base + offset ;
pr_info ( " CPU%d: found redistributor %lx region %d:%pa \n " ,
smp_processor_id ( ) , mpidr ,
( int ) ( region - gic_data . redist_regions ) ,
& gic_data_rdist ( ) - > phys_base ) ;
return 0 ;
}
/* Try next one */
return 1 ;
}
static int gic_populate_rdist ( void )
{
if ( gic_iterate_rdists ( __gic_populate_rdist ) = = 0 )
return 0 ;
2014-06-30 16:01:31 +01:00
/* We couldn't even deal with ourselves... */
2015-10-01 13:47:15 +01:00
WARN ( true , " CPU%d: mpidr %lx has no re-distributor! \n " ,
2016-12-19 17:00:38 +00:00
smp_processor_id ( ) ,
( unsigned long ) cpu_logical_map ( smp_processor_id ( ) ) ) ;
2014-06-30 16:01:31 +01:00
return - ENODEV ;
}
2019-07-18 11:15:14 +01:00
static int __gic_update_rdist_properties ( struct redist_region * region ,
void __iomem * ptr )
2016-12-19 17:01:52 +00:00
{
u64 typer = gic_read_typer ( ptr + GICR_TYPER ) ;
2022-04-05 19:38:56 +01:00
u32 ctlr = readl_relaxed ( ptr + GICR_CTLR ) ;
2019-12-24 11:10:24 +00:00
2022-09-28 10:39:27 +08:00
/* Boot-time cleanup */
2021-12-16 14:32:27 +00:00
if ( ( typer & GICR_TYPER_VLPIS ) & & ( typer & GICR_TYPER_RVPEID ) ) {
u64 val ;
/* Deactivate any present vPE */
val = gicr_read_vpendbaser ( ptr + SZ_128K + GICR_VPENDBASER ) ;
if ( val & GICR_VPENDBASER_Valid )
gicr_write_vpendbaser ( GICR_VPENDBASER_PendingLast ,
ptr + SZ_128K + GICR_VPENDBASER ) ;
/* Mark the VPE table as invalid */
val = gicr_read_vpropbaser ( ptr + SZ_128K + GICR_VPROPBASER ) ;
val & = ~ GICR_VPROPBASER_4_1_VALID ;
gicr_write_vpropbaser ( val , ptr + SZ_128K + GICR_VPROPBASER ) ;
}
2016-12-19 17:01:52 +00:00
gic_data . rdists . has_vlpis & = ! ! ( typer & GICR_TYPER_VLPIS ) ;
2019-12-24 11:10:24 +00:00
2022-04-05 19:38:56 +01:00
/*
* TYPER . RVPEID implies some form of DirectLPI , no matter what the
* doc says . . . : - / And CTLR . IR implies another subset of DirectLPI
* that the ITS driver can make use of for LPIs ( and not VLPIs ) .
*
* These are 3 different ways to express the same thing , depending
* on the revision of the architecture and its relaxations over
* time . Just group them under the ' direct_lpi ' banner .
*/
2019-12-24 11:10:24 +00:00
gic_data . rdists . has_rvpeid & = ! ! ( typer & GICR_TYPER_RVPEID ) ;
gic_data . rdists . has_direct_lpi & = ( ! ! ( typer & GICR_TYPER_DirectLPIS ) |
2022-04-05 19:38:56 +01:00
! ! ( ctlr & GICR_CTLR_IR ) |
2019-12-24 11:10:24 +00:00
gic_data . rdists . has_rvpeid ) ;
2020-04-10 11:13:26 +01:00
gic_data . rdists . has_vpend_valid_dirty & = ! ! ( typer & GICR_TYPER_DIRTY ) ;
2019-12-24 11:10:24 +00:00
/* Detect non-sensical configurations */
if ( WARN_ON_ONCE ( gic_data . rdists . has_rvpeid & & ! gic_data . rdists . has_vlpis ) ) {
gic_data . rdists . has_direct_lpi = false ;
gic_data . rdists . has_vlpis = false ;
gic_data . rdists . has_rvpeid = false ;
}
2019-07-18 13:19:25 +01:00
gic_data . ppi_nr = min ( GICR_TYPER_NR_PPIS ( typer ) , gic_data . ppi_nr ) ;
2016-12-19 17:01:52 +00:00
return 1 ;
}
2019-07-18 11:15:14 +01:00
static void gic_update_rdist_properties ( void )
2016-12-19 17:01:52 +00:00
{
2019-07-18 11:15:14 +01:00
gic_data . ppi_nr = UINT_MAX ;
gic_iterate_rdists ( __gic_update_rdist_properties ) ;
if ( WARN_ON ( gic_data . ppi_nr = = UINT_MAX ) )
gic_data . ppi_nr = 0 ;
2022-04-05 19:38:56 +01:00
pr_info ( " GICv3 features: %d PPIs%s%s \n " ,
gic_data . ppi_nr ,
gic_data . has_rss ? " , RSS " : " " ,
gic_data . rdists . has_direct_lpi ? " , DirectLPI " : " " ) ;
2020-04-10 11:13:26 +01:00
if ( gic_data . rdists . has_vlpis )
pr_info ( " GICv4 features: %s%s%s \n " ,
gic_data . rdists . has_direct_lpi ? " DirectLPI " : " " ,
gic_data . rdists . has_rvpeid ? " RVPEID " : " " ,
gic_data . rdists . has_vpend_valid_dirty ? " Valid+Dirty " : " " ) ;
2016-12-19 17:01:52 +00:00
}
2019-01-31 14:58:57 +00:00
/* Check whether it's single security state view */
static inline bool gic_dist_security_disabled ( void )
{
return readl_relaxed ( gic_data . dist_base + GICD_CTLR ) & GICD_CTLR_DS ;
}
2014-08-26 16:03:35 +01:00
static void gic_cpu_sys_reg_init ( void )
{
2017-10-06 10:24:00 -05:00
int i , cpu = smp_processor_id ( ) ;
2023-05-29 16:21:42 +01:00
u64 mpidr = gic_cpu_to_affinity ( cpu ) ;
2017-10-06 10:24:00 -05:00
u64 need_rss = MPIDR_RS ( mpidr ) ;
2018-03-20 09:46:42 +00:00
bool group0 ;
2019-01-31 14:58:54 +00:00
u32 pribits ;
2017-10-06 10:24:00 -05:00
2015-09-30 11:48:01 +01:00
/*
* Need to check that the SRE bit has actually been set . If
* not , it means that SRE is disabled at EL2 . We ' re going to
* die painfully , and there is nothing we can do about it .
*
* Kindly inform the luser .
*/
if ( ! gic_enable_sre ( ) )
pr_err ( " GIC: unable to set SRE (disabled at EL2), panic ahead \n " ) ;
2014-08-26 16:03:35 +01:00
2019-01-31 14:58:54 +00:00
pribits = gic_get_pribits ( ) ;
2018-03-20 09:46:42 +00:00
2019-01-31 14:58:54 +00:00
group0 = gic_has_group0 ( ) ;
2018-03-20 09:46:42 +00:00
2014-08-26 16:03:35 +01:00
/* Set priority mask register */
2019-01-31 14:58:57 +00:00
if ( ! gic_prio_masking_enabled ( ) ) {
2019-01-31 14:58:55 +00:00
write_gicreg ( DEFAULT_PMR_VALUE , ICC_PMR_EL1 ) ;
2020-09-12 16:37:07 +01:00
} else if ( gic_supports_nmi ( ) ) {
2019-01-31 14:58:57 +00:00
/*
* Mismatch configuration with boot CPU , the system is likely
* to die as interrupt masking will not work properly on all
* CPUs
2020-09-12 16:37:07 +01:00
*
* The boot CPU calls this function before enabling NMI support ,
* and as a result we ' ll never see this warning in the boot path
* for that CPU .
2019-01-31 14:58:57 +00:00
*/
2020-09-12 16:37:07 +01:00
if ( static_branch_unlikely ( & gic_nonsecure_priorities ) )
WARN_ON ( ! group0 | | gic_dist_security_disabled ( ) ) ;
else
WARN_ON ( group0 & & ! gic_dist_security_disabled ( ) ) ;
2019-01-31 14:58:57 +00:00
}
2014-08-26 16:03:35 +01:00
2016-08-19 17:13:09 +01:00
/*
* Some firmwares hand over to the kernel with the BPR changed from
* its reset value ( and with a value large enough to prevent
* any pre - emptive interrupts from working at all ) . Writing a zero
* to BPR restores is reset value .
*/
gic_write_bpr1 ( 0 ) ;
2018-03-26 14:09:25 -07:00
if ( static_branch_likely ( & supports_deactivate_key ) ) {
2015-08-26 17:00:42 +01:00
/* EOI drops priority only (mode 1) */
gic_write_ctlr ( ICC_CTLR_EL1_EOImode_drop ) ;
} else {
/* EOI deactivates interrupt too (mode 0) */
gic_write_ctlr ( ICC_CTLR_EL1_EOImode_drop_dir ) ;
}
2014-08-26 16:03:35 +01:00
2018-03-20 09:46:42 +00:00
/* Always whack Group0 before Group1 */
if ( group0 ) {
switch ( pribits ) {
case 8 :
case 7 :
write_gicreg ( 0 , ICC_AP0R3_EL1 ) ;
write_gicreg ( 0 , ICC_AP0R2_EL1 ) ;
2020-08-23 17:36:59 -05:00
fallthrough ;
2018-03-20 09:46:42 +00:00
case 6 :
write_gicreg ( 0 , ICC_AP0R1_EL1 ) ;
2020-08-23 17:36:59 -05:00
fallthrough ;
2018-03-20 09:46:42 +00:00
case 5 :
case 4 :
write_gicreg ( 0 , ICC_AP0R0_EL1 ) ;
}
isb ( ) ;
}
2018-03-09 14:53:19 +00:00
2018-03-20 09:46:42 +00:00
switch ( pribits ) {
2018-03-09 14:53:19 +00:00
case 8 :
case 7 :
write_gicreg ( 0 , ICC_AP1R3_EL1 ) ;
write_gicreg ( 0 , ICC_AP1R2_EL1 ) ;
2020-08-23 17:36:59 -05:00
fallthrough ;
2018-03-09 14:53:19 +00:00
case 6 :
write_gicreg ( 0 , ICC_AP1R1_EL1 ) ;
2020-08-23 17:36:59 -05:00
fallthrough ;
2018-03-09 14:53:19 +00:00
case 5 :
case 4 :
write_gicreg ( 0 , ICC_AP1R0_EL1 ) ;
}
isb ( ) ;
2014-08-26 16:03:35 +01:00
/* ... and let's hit the road... */
gic_write_grpen1 ( 1 ) ;
2017-10-06 10:24:00 -05:00
/* Keep the RSS capability status in per_cpu variable */
per_cpu ( has_rss , cpu ) = ! ! ( gic_read_ctlr ( ) & ICC_CTLR_EL1_RSS ) ;
/* Check all the CPUs have capable of sending SGIs to other CPUs */
for_each_online_cpu ( i ) {
bool have_rss = per_cpu ( has_rss , i ) & & per_cpu ( has_rss , cpu ) ;
2023-05-29 16:21:42 +01:00
need_rss | = MPIDR_RS ( gic_cpu_to_affinity ( i ) ) ;
2017-10-06 10:24:00 -05:00
if ( need_rss & & ( ! have_rss ) )
pr_crit ( " CPU%d (%lx) can't SGI CPU%d (%lx), no RSS \n " ,
cpu , ( unsigned long ) mpidr ,
2023-05-29 16:21:42 +01:00
i , ( unsigned long ) gic_cpu_to_affinity ( i ) ) ;
2017-10-06 10:24:00 -05:00
}
/**
* GIC spec says , when ICC_CTLR_EL1 . RSS = = 1 and GICD_TYPER . RSS = = 0 ,
* writing ICC_ASGI1R_EL1 register with RS ! = 0 is a CONSTRAINED
* UNPREDICTABLE choice of :
* - The write is ignored .
* - The RS field is treated as 0.
*/
if ( need_rss & & ( ! gic_data . has_rss ) )
pr_crit_once ( " RSS is required but GICD doesn't support it \n " ) ;
2014-08-26 16:03:35 +01:00
}
2018-02-25 11:27:04 +00:00
static bool gicv3_nolpi ;
static int __init gicv3_nolpi_cfg ( char * buf )
{
2022-11-01 22:13:51 +01:00
return kstrtobool ( buf , & gicv3_nolpi ) ;
2018-02-25 11:27:04 +00:00
}
early_param ( " irqchip.gicv3_nolpi " , gicv3_nolpi_cfg ) ;
2014-11-24 14:35:18 +00:00
static int gic_dist_supports_lpis ( void )
{
2018-07-27 14:51:04 +01:00
return ( IS_ENABLED ( CONFIG_ARM_GIC_V3_ITS ) & &
! ! ( readl_relaxed ( gic_data . dist_base + GICD_TYPER ) & GICD_TYPER_LPIS ) & &
! gicv3_nolpi ) ;
2014-11-24 14:35:18 +00:00
}
2014-06-30 16:01:31 +01:00
static void gic_cpu_init ( void )
{
void __iomem * rbase ;
2019-07-18 11:15:14 +01:00
int i ;
2014-06-30 16:01:31 +01:00
/* Register ourselves with the rest of the world */
if ( gic_populate_rdist ( ) )
return ;
2014-08-26 16:03:34 +01:00
gic_enable_redist ( true ) ;
2014-06-30 16:01:31 +01:00
2019-07-25 15:30:51 +01:00
WARN ( ( gic_data . ppi_nr > 16 | | GIC_ESPI_NR ! = 0 ) & &
! ( gic_read_ctlr ( ) & ICC_CTLR_EL1_ExtRange ) ,
" Distributor has extended ranges, but CPU%d doesn't \n " ,
smp_processor_id ( ) ) ;
2014-06-30 16:01:31 +01:00
rbase = gic_data_rdist_sgi_base ( ) ;
2016-05-06 19:41:56 +01:00
/* Configure SGIs/PPIs as non-secure Group-1 */
2019-07-18 11:15:14 +01:00
for ( i = 0 ; i < gic_data . ppi_nr + 16 ; i + = 32 )
writel_relaxed ( ~ 0 , rbase + GICR_IGROUPR0 + i / 8 ) ;
2016-05-06 19:41:56 +01:00
2019-07-18 11:15:14 +01:00
gic_cpu_config ( rbase , gic_data . ppi_nr + 16 , gic_redist_wait_for_rwp ) ;
2014-06-30 16:01:31 +01:00
2014-08-26 16:03:35 +01:00
/* initialise system registers */
gic_cpu_sys_reg_init ( ) ;
2014-06-30 16:01:31 +01:00
}
# ifdef CONFIG_SMP
2016-07-13 17:16:05 +00:00
2017-10-06 10:24:00 -05:00
# define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT)
# define MPIDR_TO_SGI_CLUSTER_ID(mpidr) ((mpidr) & ~0xFUL)
2016-07-13 17:16:05 +00:00
static int gic_starting_cpu ( unsigned int cpu )
2014-06-30 16:01:31 +01:00
{
2016-07-13 17:16:05 +00:00
gic_cpu_init ( ) ;
2018-07-27 14:51:04 +01:00
if ( gic_dist_supports_lpis ( ) )
its_cpu_init ( ) ;
2016-07-13 17:16:05 +00:00
return 0 ;
2014-06-30 16:01:31 +01:00
}
static u16 gic_compute_target_list ( int * base_cpu , const struct cpumask * mask ,
2015-10-01 13:47:15 +01:00
unsigned long cluster_id )
2014-06-30 16:01:31 +01:00
{
2016-09-19 18:29:15 +01:00
int next_cpu , cpu = * base_cpu ;
2023-05-29 16:21:42 +01:00
unsigned long mpidr ;
2014-06-30 16:01:31 +01:00
u16 tlist = 0 ;
2023-05-29 16:21:42 +01:00
mpidr = gic_cpu_to_affinity ( cpu ) ;
2014-06-30 16:01:31 +01:00
while ( cpu < nr_cpu_ids ) {
tlist | = 1 < < ( mpidr & 0xf ) ;
2016-09-19 18:29:15 +01:00
next_cpu = cpumask_next ( cpu , mask ) ;
if ( next_cpu > = nr_cpu_ids )
2014-06-30 16:01:31 +01:00
goto out ;
2016-09-19 18:29:15 +01:00
cpu = next_cpu ;
2014-06-30 16:01:31 +01:00
2023-05-29 16:21:42 +01:00
mpidr = gic_cpu_to_affinity ( cpu ) ;
2014-06-30 16:01:31 +01:00
2017-10-06 10:24:00 -05:00
if ( cluster_id ! = MPIDR_TO_SGI_CLUSTER_ID ( mpidr ) ) {
2014-06-30 16:01:31 +01:00
cpu - - ;
goto out ;
}
}
out :
* base_cpu = cpu ;
return tlist ;
}
2014-11-12 13:46:06 +00:00
# define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \
( MPIDR_AFFINITY_LEVEL ( cluster_id , level ) \
< < ICC_SGI1R_AFFINITY_ # # level # # _SHIFT )
2014-06-30 16:01:31 +01:00
static void gic_send_sgi ( u64 cluster_id , u16 tlist , unsigned int irq )
{
u64 val ;
2014-11-12 13:46:06 +00:00
val = ( MPIDR_TO_SGI_AFFINITY ( cluster_id , 3 ) |
MPIDR_TO_SGI_AFFINITY ( cluster_id , 2 ) |
irq < < ICC_SGI1R_SGI_ID_SHIFT |
MPIDR_TO_SGI_AFFINITY ( cluster_id , 1 ) |
2017-10-06 10:24:00 -05:00
MPIDR_TO_SGI_RS ( cluster_id ) |
2014-11-12 13:46:06 +00:00
tlist < < ICC_SGI1R_TARGET_LIST_SHIFT ) ;
2014-06-30 16:01:31 +01:00
2018-02-02 09:20:29 -05:00
pr_devel ( " CPU%d: ICC_SGI1R_EL1 %llx \n " , smp_processor_id ( ) , val ) ;
2014-06-30 16:01:31 +01:00
gic_write_sgi1r ( val ) ;
}
2020-04-25 15:24:01 +01:00
static void gic_ipi_send_mask ( struct irq_data * d , const struct cpumask * mask )
2014-06-30 16:01:31 +01:00
{
int cpu ;
2020-04-25 15:24:01 +01:00
if ( WARN_ON ( d - > hwirq > = 16 ) )
2014-06-30 16:01:31 +01:00
return ;
/*
* Ensure that stores to Normal memory are visible to the
* other CPUs before issuing the IPI .
*/
irqchip/gic-v3: Use dsb(ishst) to order writes with ICC_SGI1R_EL1 accesses
A dsb(ishst) barrier should be enough to order previous writes with
the system register generating the SGI, as we only need to guarantee
the visibility of data to other CPUs in the inner shareable domain
before we send the SGI.
A micro-benchmark is written to verify the performance impact on
kunpeng920 machine with 2 sockets, each socket has 2 dies, and
each die has 24 CPUs, so totally the system has 2 * 2 * 24 = 96
CPUs. ~2% performance improvement can be seen by this benchmark.
The code of benchmark module:
#include <linux/module.h>
#include <linux/timekeeping.h>
volatile int data0 ____cacheline_aligned;
volatile int data1 ____cacheline_aligned;
volatile int data2 ____cacheline_aligned;
volatile int data3 ____cacheline_aligned;
volatile int data4 ____cacheline_aligned;
volatile int data5 ____cacheline_aligned;
volatile int data6 ____cacheline_aligned;
static void ipi_latency_func(void *val)
{
}
static int __init ipi_latency_init(void)
{
ktime_t stime, etime, delta;
int cpu, i;
int start = smp_processor_id();
stime = ktime_get();
for ( i = 0; i < 1000; i++)
for (cpu = 0; cpu < 96; cpu++) {
data0 = data1 = data2 = data3 = data4 = data5 = data6 = cpu;
smp_call_function_single(cpu, ipi_latency_func, NULL, 1);
}
etime = ktime_get();
delta = ktime_sub(etime, stime);
printk("%s ipi from cpu%d to cpu0-95 delta of 1000times:%lld\n",
__func__, start, delta);
return 0;
}
module_init(ipi_latency_init);
static void ipi_latency_exit(void)
{
}
module_exit(ipi_latency_exit);
MODULE_DESCRIPTION("IPI benchmark");
MODULE_LICENSE("GPL");
run the below commands 10 times on both Vanilla and the kernel with this
patch:
# taskset -c 0 insmod test.ko
# rmmod test
The result on vanilla:
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126757449
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126784249
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126177703
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127022281
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126184883
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127374585
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:125778089
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126974441
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:127357625
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:126228184
The result on the kernel with this patch:
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124467401
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123474209
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123558497
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122993951
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:122984223
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123323609
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:124507583
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123386963
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123340664
ipi_latency_init ipi from cpu0 to cpu0-95 delta of 1000times:123285324
Signed-off-by: Barry Song <song.bao.hua@hisilicon.com>
[maz: tidied up commit message]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20220220061910.6155-1-21cnbao@gmail.com
2022-02-20 19:19:10 +13:00
dsb ( ishst ) ;
2014-06-30 16:01:31 +01:00
2015-03-05 10:49:16 +10:30
for_each_cpu ( cpu , mask ) {
2023-05-29 16:21:42 +01:00
u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID ( gic_cpu_to_affinity ( cpu ) ) ;
2014-06-30 16:01:31 +01:00
u16 tlist ;
tlist = gic_compute_target_list ( & cpu , mask , cluster_id ) ;
2020-04-25 15:24:01 +01:00
gic_send_sgi ( cluster_id , tlist , d - > hwirq ) ;
2014-06-30 16:01:31 +01:00
}
/* Force the above writes to ICC_SGI1R_EL1 to be executed */
isb ( ) ;
}
2020-04-22 13:28:57 +02:00
static void __init gic_smp_init ( void )
2014-06-30 16:01:31 +01:00
{
2020-04-25 15:24:01 +01:00
struct irq_fwspec sgi_fwspec = {
. fwnode = gic_data . fwnode ,
. param_count = 1 ,
} ;
int base_sgi ;
2016-12-21 20:19:56 +01:00
cpuhp_setup_state_nocalls ( CPUHP_AP_IRQ_GIC_STARTING ,
2016-12-21 20:19:54 +01:00
" irqchip/arm/gicv3:starting " ,
gic_starting_cpu , NULL ) ;
2020-04-25 15:24:01 +01:00
/* Register all 8 non-secure SGIs */
2022-12-13 15:08:43 +01:00
base_sgi = irq_domain_alloc_irqs ( gic_data . domain , 8 , NUMA_NO_NODE , & sgi_fwspec ) ;
2020-04-25 15:24:01 +01:00
if ( WARN_ON ( base_sgi < = 0 ) )
return ;
set_smp_ipi_range ( base_sgi , 8 ) ;
2014-06-30 16:01:31 +01:00
}
static int gic_set_affinity ( struct irq_data * d , const struct cpumask * mask_val ,
bool force )
{
2017-07-04 10:56:35 +01:00
unsigned int cpu ;
2019-07-16 14:41:40 +01:00
u32 offset , index ;
2014-06-30 16:01:31 +01:00
void __iomem * reg ;
int enabled ;
u64 val ;
2017-07-04 10:56:35 +01:00
if ( force )
cpu = cpumask_first ( mask_val ) ;
else
cpu = cpumask_any_and ( mask_val , cpu_online_mask ) ;
2017-06-30 10:58:28 +01:00
if ( cpu > = nr_cpu_ids )
return - EINVAL ;
2014-06-30 16:01:31 +01:00
if ( gic_irq_in_rdist ( d ) )
return - EINVAL ;
/* If interrupt was enabled, disable it first */
enabled = gic_peek_irq ( d , GICD_ISENABLER ) ;
if ( enabled )
gic_mask_irq ( d ) ;
2019-07-16 14:41:40 +01:00
offset = convert_offset_index ( d , GICD_IROUTER , & index ) ;
reg = gic_dist_base ( d ) + offset + ( index * 8 ) ;
2023-05-29 16:21:42 +01:00
val = gic_cpu_to_affinity ( cpu ) ;
2014-06-30 16:01:31 +01:00
2015-10-01 13:47:16 +01:00
gic_write_irouter ( val , reg ) ;
2014-06-30 16:01:31 +01:00
/*
* If the interrupt was enabled , enabled it again . Otherwise ,
* just wait for the distributor to have digested our changes .
*/
if ( enabled )
gic_unmask_irq ( d ) ;
2017-08-18 09:39:17 +01:00
irq_data_update_effective_affinity ( d , cpumask_of ( cpu ) ) ;
2016-02-19 16:22:43 +01:00
return IRQ_SET_MASK_OK_DONE ;
2014-06-30 16:01:31 +01:00
}
# else
# define gic_set_affinity NULL
2020-04-25 15:24:01 +01:00
# define gic_ipi_send_mask NULL
2014-06-30 16:01:31 +01:00
# define gic_smp_init() do { } while(0)
# endif
2020-07-30 18:03:20 +01:00
static int gic_retrigger ( struct irq_data * data )
{
return ! gic_irq_set_irqchip_state ( data , IRQCHIP_STATE_PENDING , true ) ;
}
2014-08-26 16:03:35 +01:00
# ifdef CONFIG_CPU_PM
static int gic_cpu_pm_notifier ( struct notifier_block * self ,
unsigned long cmd , void * v )
{
if ( cmd = = CPU_PM_EXIT ) {
2016-08-17 13:49:19 +01:00
if ( gic_dist_security_disabled ( ) )
gic_enable_redist ( true ) ;
2014-08-26 16:03:35 +01:00
gic_cpu_sys_reg_init ( ) ;
2016-08-17 13:49:19 +01:00
} else if ( cmd = = CPU_PM_ENTER & & gic_dist_security_disabled ( ) ) {
2014-08-26 16:03:35 +01:00
gic_write_grpen1 ( 0 ) ;
gic_enable_redist ( false ) ;
}
return NOTIFY_OK ;
}
static struct notifier_block gic_cpu_pm_notifier_block = {
. notifier_call = gic_cpu_pm_notifier ,
} ;
static void gic_cpu_pm_init ( void )
{
cpu_pm_register_notifier ( & gic_cpu_pm_notifier_block ) ;
}
# else
static inline void gic_cpu_pm_init ( void ) { }
# endif /* CONFIG_CPU_PM */
2014-06-30 16:01:31 +01:00
static struct irq_chip gic_chip = {
. name = " GICv3 " ,
. irq_mask = gic_mask_irq ,
. irq_unmask = gic_unmask_irq ,
. irq_eoi = gic_eoi_irq ,
. irq_set_type = gic_set_type ,
. irq_set_affinity = gic_set_affinity ,
2020-07-30 18:03:20 +01:00
. irq_retrigger = gic_retrigger ,
2015-03-18 11:01:24 +00:00
. irq_get_irqchip_state = gic_irq_get_irqchip_state ,
. irq_set_irqchip_state = gic_irq_set_irqchip_state ,
2019-01-31 14:58:59 +00:00
. irq_nmi_setup = gic_irq_nmi_setup ,
. irq_nmi_teardown = gic_irq_nmi_teardown ,
2020-04-25 15:24:01 +01:00
. ipi_send_mask = gic_ipi_send_mask ,
2018-08-17 09:18:01 +01:00
. flags = IRQCHIP_SET_TYPE_MASKED |
IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND ,
2014-06-30 16:01:31 +01:00
} ;
2015-08-26 17:00:42 +01:00
static struct irq_chip gic_eoimode1_chip = {
. name = " GICv3 " ,
. irq_mask = gic_eoimode1_mask_irq ,
. irq_unmask = gic_unmask_irq ,
. irq_eoi = gic_eoimode1_eoi_irq ,
. irq_set_type = gic_set_type ,
. irq_set_affinity = gic_set_affinity ,
2020-07-30 18:03:20 +01:00
. irq_retrigger = gic_retrigger ,
2015-08-26 17:00:42 +01:00
. irq_get_irqchip_state = gic_irq_get_irqchip_state ,
. irq_set_irqchip_state = gic_irq_set_irqchip_state ,
2015-08-26 17:00:43 +01:00
. irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity ,
2019-01-31 14:58:59 +00:00
. irq_nmi_setup = gic_irq_nmi_setup ,
. irq_nmi_teardown = gic_irq_nmi_teardown ,
2020-04-25 15:24:01 +01:00
. ipi_send_mask = gic_ipi_send_mask ,
2018-08-17 09:18:01 +01:00
. flags = IRQCHIP_SET_TYPE_MASKED |
IRQCHIP_SKIP_SET_WAKE |
IRQCHIP_MASK_ON_SUSPEND ,
2015-08-26 17:00:42 +01:00
} ;
2014-06-30 16:01:31 +01:00
static int gic_irq_domain_map ( struct irq_domain * d , unsigned int irq ,
irq_hw_number_t hw )
{
2015-08-26 17:00:42 +01:00
struct irq_chip * chip = & gic_chip ;
2020-07-30 18:03:21 +01:00
struct irq_data * irqd = irq_desc_get_irq_data ( irq_to_desc ( irq ) ) ;
2015-08-26 17:00:42 +01:00
2018-03-26 14:09:25 -07:00
if ( static_branch_likely ( & supports_deactivate_key ) )
2015-08-26 17:00:42 +01:00
chip = & gic_eoimode1_chip ;
2019-07-16 14:41:40 +01:00
switch ( __get_intid_range ( hw ) ) {
2020-04-25 15:11:20 +01:00
case SGI_RANGE :
2019-07-16 14:41:40 +01:00
case PPI_RANGE :
2019-07-18 13:19:25 +01:00
case EPPI_RANGE :
2014-06-30 16:01:31 +01:00
irq_set_percpu_devid ( irq ) ;
2015-08-26 17:00:42 +01:00
irq_domain_set_info ( d , irq , hw , chip , d - > host_data ,
2014-11-24 14:35:09 +00:00
handle_percpu_devid_irq , NULL , NULL ) ;
2019-07-16 14:41:40 +01:00
break ;
case SPI_RANGE :
2019-07-16 15:17:31 +01:00
case ESPI_RANGE :
2015-08-26 17:00:42 +01:00
irq_domain_set_info ( d , irq , hw , chip , d - > host_data ,
2014-11-24 14:35:09 +00:00
handle_fasteoi_irq , NULL , NULL ) ;
2015-08-29 18:01:22 -05:00
irq_set_probe ( irq ) ;
2020-07-30 18:03:21 +01:00
irqd_set_single_target ( irqd ) ;
2019-07-16 14:41:40 +01:00
break ;
case LPI_RANGE :
2014-11-24 14:35:18 +00:00
if ( ! gic_dist_supports_lpis ( ) )
return - EPERM ;
2015-08-26 17:00:42 +01:00
irq_domain_set_info ( d , irq , hw , chip , d - > host_data ,
2014-11-24 14:35:18 +00:00
handle_fasteoi_irq , NULL , NULL ) ;
2019-07-16 14:41:40 +01:00
break ;
default :
return - EPERM ;
2014-11-24 14:35:18 +00:00
}
2020-07-30 18:03:21 +01:00
/* Prevents SW retriggers which mess up the ACK/EOI ordering */
irqd_set_handle_enforce_irqctx ( irqd ) ;
2014-06-30 16:01:31 +01:00
return 0 ;
}
2015-10-13 12:51:33 +01:00
static int gic_irq_domain_translate ( struct irq_domain * d ,
struct irq_fwspec * fwspec ,
unsigned long * hwirq ,
unsigned int * type )
2014-06-30 16:01:31 +01:00
{
2020-04-25 15:24:01 +01:00
if ( fwspec - > param_count = = 1 & & fwspec - > param [ 0 ] < 16 ) {
* hwirq = fwspec - > param [ 0 ] ;
* type = IRQ_TYPE_EDGE_RISING ;
return 0 ;
}
2015-10-13 12:51:33 +01:00
if ( is_of_node ( fwspec - > fwnode ) ) {
if ( fwspec - > param_count < 3 )
return - EINVAL ;
2014-06-30 16:01:31 +01:00
2015-10-14 12:27:16 +01:00
switch ( fwspec - > param [ 0 ] ) {
case 0 : /* SPI */
* hwirq = fwspec - > param [ 1 ] + 32 ;
break ;
case 1 : /* PPI */
* hwirq = fwspec - > param [ 1 ] + 16 ;
break ;
2019-07-16 15:17:31 +01:00
case 2 : /* ESPI */
* hwirq = fwspec - > param [ 1 ] + ESPI_BASE_INTID ;
break ;
2019-07-18 13:19:25 +01:00
case 3 : /* EPPI */
* hwirq = fwspec - > param [ 1 ] + EPPI_BASE_INTID ;
break ;
2015-10-14 12:27:16 +01:00
case GIC_IRQ_TYPE_LPI : /* LPI */
* hwirq = fwspec - > param [ 1 ] ;
break ;
2019-07-18 13:19:25 +01:00
case GIC_IRQ_TYPE_PARTITION :
* hwirq = fwspec - > param [ 1 ] ;
if ( fwspec - > param [ 1 ] > = 16 )
* hwirq + = EPPI_BASE_INTID - 16 ;
else
* hwirq + = 16 ;
break ;
2015-10-14 12:27:16 +01:00
default :
return - EINVAL ;
}
2015-10-13 12:51:33 +01:00
* type = fwspec - > param [ 2 ] & IRQ_TYPE_SENSE_MASK ;
2018-03-16 14:35:17 +00:00
2018-03-20 13:44:09 +00:00
/*
* Make it clear that broken DTs are . . . broken .
2021-03-22 04:21:30 +01:00
* Partitioned PPIs are an unfortunate exception .
2018-03-20 13:44:09 +00:00
*/
WARN_ON ( * type = = IRQ_TYPE_NONE & &
fwspec - > param [ 0 ] ! = GIC_IRQ_TYPE_PARTITION ) ;
2015-10-13 12:51:33 +01:00
return 0 ;
2014-06-30 16:01:31 +01:00
}
2016-01-19 14:11:15 +01:00
if ( is_fwnode_irqchip ( fwspec - > fwnode ) ) {
if ( fwspec - > param_count ! = 2 )
return - EINVAL ;
2022-04-04 12:08:42 +01:00
if ( fwspec - > param [ 0 ] < 16 ) {
pr_err ( FW_BUG " Illegal GSI%d translation request \n " ,
fwspec - > param [ 0 ] ) ;
return - EINVAL ;
}
2016-01-19 14:11:15 +01:00
* hwirq = fwspec - > param [ 0 ] ;
* type = fwspec - > param [ 1 ] ;
2018-03-16 14:35:17 +00:00
WARN_ON ( * type = = IRQ_TYPE_NONE ) ;
2016-01-19 14:11:15 +01:00
return 0 ;
}
2015-10-13 12:51:33 +01:00
return - EINVAL ;
2014-06-30 16:01:31 +01:00
}
2014-11-24 14:35:09 +00:00
static int gic_irq_domain_alloc ( struct irq_domain * domain , unsigned int virq ,
unsigned int nr_irqs , void * arg )
{
int i , ret ;
irq_hw_number_t hwirq ;
unsigned int type = IRQ_TYPE_NONE ;
2015-10-13 12:51:33 +01:00
struct irq_fwspec * fwspec = arg ;
2014-11-24 14:35:09 +00:00
2015-10-13 12:51:33 +01:00
ret = gic_irq_domain_translate ( domain , fwspec , & hwirq , & type ) ;
2014-11-24 14:35:09 +00:00
if ( ret )
return ret ;
2017-07-04 10:56:33 +01:00
for ( i = 0 ; i < nr_irqs ; i + + ) {
ret = gic_irq_domain_map ( domain , virq + i , hwirq + i ) ;
if ( ret )
return ret ;
}
2014-11-24 14:35:09 +00:00
return 0 ;
}
static void gic_irq_domain_free ( struct irq_domain * domain , unsigned int virq ,
unsigned int nr_irqs )
{
int i ;
for ( i = 0 ; i < nr_irqs ; i + + ) {
struct irq_data * d = irq_domain_get_irq_data ( domain , virq + i ) ;
irq_set_handler ( virq + i , NULL ) ;
irq_domain_reset_irq_data ( d ) ;
}
}
2021-07-29 17:27:48 +00:00
static bool fwspec_is_partitioned_ppi ( struct irq_fwspec * fwspec ,
irq_hw_number_t hwirq )
{
enum gic_intid_range range ;
if ( ! gic_data . ppi_descs )
return false ;
if ( ! is_of_node ( fwspec - > fwnode ) )
return false ;
if ( fwspec - > param_count < 4 | | ! fwspec - > param [ 3 ] )
return false ;
range = __get_intid_range ( hwirq ) ;
if ( range ! = PPI_RANGE & & range ! = EPPI_RANGE )
return false ;
return true ;
}
2016-04-11 09:57:54 +01:00
static int gic_irq_domain_select ( struct irq_domain * d ,
struct irq_fwspec * fwspec ,
enum irq_domain_bus_token bus_token )
{
2021-07-29 17:27:48 +00:00
unsigned int type , ret , ppi_idx ;
irq_hw_number_t hwirq ;
2016-04-11 09:57:54 +01:00
/* Not for us */
if ( fwspec - > fwnode ! = d - > fwnode )
return 0 ;
/* If this is not DT, then we have a single domain */
if ( ! is_of_node ( fwspec - > fwnode ) )
return 1 ;
2021-07-29 17:27:48 +00:00
ret = gic_irq_domain_translate ( d , fwspec , & hwirq , & type ) ;
if ( WARN_ON_ONCE ( ret ) )
return 0 ;
if ( ! fwspec_is_partitioned_ppi ( fwspec , hwirq ) )
return d = = gic_data . domain ;
2016-04-11 09:57:54 +01:00
/*
* If this is a PPI and we have a 4 th ( non - null ) parameter ,
* then we need to match the partition domain .
*/
2021-07-29 17:27:48 +00:00
ppi_idx = __gic_get_ppi_index ( hwirq ) ;
return d = = partition_get_domain ( gic_data . ppi_descs [ ppi_idx ] ) ;
2016-04-11 09:57:54 +01:00
}
2014-06-30 16:01:31 +01:00
static const struct irq_domain_ops gic_irq_domain_ops = {
2015-10-13 12:51:33 +01:00
. translate = gic_irq_domain_translate ,
2014-11-24 14:35:09 +00:00
. alloc = gic_irq_domain_alloc ,
. free = gic_irq_domain_free ,
2016-04-11 09:57:54 +01:00
. select = gic_irq_domain_select ,
} ;
static int partition_domain_translate ( struct irq_domain * d ,
struct irq_fwspec * fwspec ,
unsigned long * hwirq ,
unsigned int * type )
{
2021-07-29 17:27:48 +00:00
unsigned long ppi_intid ;
2016-04-11 09:57:54 +01:00
struct device_node * np ;
2021-07-29 17:27:48 +00:00
unsigned int ppi_idx ;
2016-04-11 09:57:54 +01:00
int ret ;
2019-07-18 13:05:17 +01:00
if ( ! gic_data . ppi_descs )
return - ENOMEM ;
2016-04-11 09:57:54 +01:00
np = of_find_node_by_phandle ( fwspec - > param [ 3 ] ) ;
if ( WARN_ON ( ! np ) )
return - EINVAL ;
2021-07-29 17:27:48 +00:00
ret = gic_irq_domain_translate ( d , fwspec , & ppi_intid , type ) ;
if ( WARN_ON_ONCE ( ret ) )
return 0 ;
ppi_idx = __gic_get_ppi_index ( ppi_intid ) ;
ret = partition_translate_id ( gic_data . ppi_descs [ ppi_idx ] ,
2016-04-11 09:57:54 +01:00
of_node_to_fwnode ( np ) ) ;
if ( ret < 0 )
return ret ;
* hwirq = ret ;
* type = fwspec - > param [ 2 ] & IRQ_TYPE_SENSE_MASK ;
return 0 ;
}
static const struct irq_domain_ops partition_domain_ops = {
. translate = partition_domain_translate ,
. select = gic_irq_domain_select ,
2014-06-30 16:01:31 +01:00
} ;
2018-12-10 13:56:32 +00:00
static bool gic_enable_quirk_msm8996 ( void * data )
{
struct gic_chip_data * d = data ;
d - > flags | = FLAGS_WORKAROUND_GICR_WAKER_MSM8996 ;
return true ;
}
2023-05-15 13:13:51 -07:00
static bool gic_enable_quirk_mtk_gicr ( void * data )
{
struct gic_chip_data * d = data ;
d - > flags | = FLAGS_WORKAROUND_MTK_GICR_SAVE ;
return true ;
}
2020-03-11 11:56:49 +00:00
static bool gic_enable_quirk_cavium_38539 ( void * data )
{
struct gic_chip_data * d = data ;
d - > flags | = FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 ;
return true ;
}
2019-07-31 17:29:33 +01:00
static bool gic_enable_quirk_hip06_07 ( void * data )
{
struct gic_chip_data * d = data ;
/*
* HIP06 GICD_IIDR clashes with GIC - 600 product number ( despite
* not being an actual ARM implementation ) . The saving grace is
* that GIC - 600 doesn ' t have ESPI , so nothing to do in that case .
* HIP07 doesn ' t even have a proper IIDR , and still pretends to
* have ESPI . In both cases , put them right .
*/
if ( d - > rdists . gicd_typer & GICD_TYPER_ESPI ) {
/* Zero both ESPI and the RES0 field next to it... */
d - > rdists . gicd_typer & = ~ GENMASK ( 9 , 8 ) ;
return true ;
}
return false ;
}
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
# define T241_CHIPN_MASK GENMASK_ULL(45, 44)
# define T241_CHIP_GICDA_OFFSET 0x1580000
# define SMCCC_SOC_ID_T241 0x036b0241
static bool gic_enable_quirk_nvidia_t241 ( void * data )
{
s32 soc_id = arm_smccc_get_soc_id_version ( ) ;
unsigned long chip_bmask = 0 ;
phys_addr_t phys ;
u32 i ;
/* Check JEP106 code for NVIDIA T241 chip (036b:0241) */
if ( ( soc_id < 0 ) | | ( soc_id ! = SMCCC_SOC_ID_T241 ) )
return false ;
/* Find the chips based on GICR regions PHYS addr */
for ( i = 0 ; i < gic_data . nr_redist_regions ; i + + ) {
chip_bmask | = BIT ( FIELD_GET ( T241_CHIPN_MASK ,
( u64 ) gic_data . redist_regions [ i ] . phys_base ) ) ;
}
if ( hweight32 ( chip_bmask ) < 3 )
return false ;
/* Setup GICD alias regions */
for ( i = 0 ; i < ARRAY_SIZE ( t241_dist_base_alias ) ; i + + ) {
if ( chip_bmask & BIT ( i ) ) {
phys = gic_data . dist_phys_base + T241_CHIP_GICDA_OFFSET ;
phys | = FIELD_PREP ( T241_CHIPN_MASK , i ) ;
t241_dist_base_alias [ i ] = ioremap ( phys , SZ_64K ) ;
WARN_ON_ONCE ( ! t241_dist_base_alias [ i ] ) ;
}
}
static_branch_enable ( & gic_nvidia_t241_erratum ) ;
return true ;
}
2023-05-22 19:06:43 +08:00
static bool gic_enable_quirk_asr8601 ( void * data )
{
struct gic_chip_data * d = data ;
d - > flags | = FLAGS_WORKAROUND_ASR_ERRATUM_8601001 ;
return true ;
}
2023-07-04 17:50:34 +02:00
static bool gic_enable_quirk_arm64_2941627 ( void * data )
{
static_branch_enable ( & gic_arm64_2941627_erratum ) ;
return true ;
}
2019-07-31 17:29:33 +01:00
static const struct gic_quirk gic_quirks [ ] = {
{
. desc = " GICv3: Qualcomm MSM8996 broken firmware " ,
. compatible = " qcom,msm8996-gic-v3 " ,
. init = gic_enable_quirk_msm8996 ,
} ,
2023-05-22 19:06:43 +08:00
{
. desc = " GICv3: ASR erratum 8601001 " ,
. compatible = " asr,asr8601-gic-v3 " ,
. init = gic_enable_quirk_asr8601 ,
} ,
2023-05-15 13:13:51 -07:00
{
. desc = " GICv3: Mediatek Chromebook GICR save problem " ,
. property = " mediatek,broken-save-restore-fw " ,
. init = gic_enable_quirk_mtk_gicr ,
} ,
2019-07-31 17:29:33 +01:00
{
. desc = " GICv3: HIP06 erratum 161010803 " ,
. iidr = 0x0204043b ,
. mask = 0xffffffff ,
. init = gic_enable_quirk_hip06_07 ,
} ,
{
. desc = " GICv3: HIP07 erratum 161010803 " ,
. iidr = 0x00000000 ,
. mask = 0xffffffff ,
. init = gic_enable_quirk_hip06_07 ,
} ,
2020-03-11 11:56:49 +00:00
{
/*
* Reserved register accesses generate a Synchronous
* External Abort . This erratum applies to :
* - ThunderX : CN88xx
* - OCTEON TX : CN83xx , CN81xx
* - OCTEON TX2 : CN93xx , CN96xx , CN98xx , CNF95xx *
*/
. desc = " GICv3: Cavium erratum 38539 " ,
. iidr = 0xa000034c ,
. mask = 0xe8f00fff ,
. init = gic_enable_quirk_cavium_38539 ,
} ,
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
{
. desc = " GICv3: NVIDIA erratum T241-FABRIC-4 " ,
. iidr = 0x0402043b ,
. mask = 0xffffffff ,
. init = gic_enable_quirk_nvidia_t241 ,
} ,
2023-07-04 17:50:34 +02:00
{
/*
* GIC - 700 : 2941627 workaround - IP variant [ 0 , 1 ]
*
*/
. desc = " GICv3: ARM64 erratum 2941627 " ,
. iidr = 0x0400043b ,
. mask = 0xff0e0fff ,
. init = gic_enable_quirk_arm64_2941627 ,
} ,
{
/*
* GIC - 700 : 2941627 workaround - IP variant [ 2 ]
*/
. desc = " GICv3: ARM64 erratum 2941627 " ,
. iidr = 0x0402043b ,
. mask = 0xff0f0fff ,
. init = gic_enable_quirk_arm64_2941627 ,
} ,
2019-07-31 17:29:33 +01:00
{
}
} ;
2019-01-31 14:58:57 +00:00
static void gic_enable_nmi_support ( void )
{
2019-01-31 14:58:59 +00:00
int i ;
2019-07-18 12:53:05 +01:00
if ( ! gic_prio_masking_enabled ( ) )
return ;
2023-05-15 13:13:51 -07:00
if ( gic_data . flags & FLAGS_WORKAROUND_MTK_GICR_SAVE ) {
pr_warn ( " Skipping NMI enable due to firmware issues \n " ) ;
return ;
}
2019-07-18 12:53:05 +01:00
ppi_nmi_refs = kcalloc ( gic_data . ppi_nr , sizeof ( * ppi_nmi_refs ) , GFP_KERNEL ) ;
if ( ! ppi_nmi_refs )
return ;
for ( i = 0 ; i < gic_data . ppi_nr ; i + + )
2019-01-31 14:58:59 +00:00
refcount_set ( & ppi_nmi_refs [ i ] , 0 ) ;
2020-09-12 16:37:06 +01:00
pr_info ( " Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation \n " ,
arm64: add ARM64_HAS_GIC_PRIO_RELAXED_SYNC cpucap
When Priority Mask Hint Enable (PMHE) == 0b1, the GIC may use the PMR
value to determine whether to signal an IRQ to a PE, and consequently
after a change to the PMR value, a DSB SY may be required to ensure that
interrupts are signalled to a CPU in finite time. When PMHE == 0b0,
interrupts are always signalled to the relevant PE, and all masking
occurs locally, without requiring a DSB SY.
Since commit:
f226650494c6aa87 ("arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear")
... we handle this dynamically: in most cases a static key is used to
determine whether to issue a DSB SY, but the entry code must read from
ICC_CTLR_EL1 as static keys aren't accessible from plain assembly.
It would be much nicer to use an alternative instruction sequence for
the DSB, as this would avoid the need to read from ICC_CTLR_EL1 in the
entry code, and for most other code this will result in simpler code
generation with fewer instructions and fewer branches.
This patch adds a new ARM64_HAS_GIC_PRIO_RELAXED_SYNC cpucap which is
only set when ICC_CTLR_EL1.PMHE == 0b0 (and GIC priority masking is in
use). This allows us to replace the existing users of the
`gic_pmr_sync` static key with alternative sequences which default to a
DSB SY and are relaxed to a NOP when PMHE is not in use.
The entry assembly management of the PMR is slightly restructured to use
a branch (rather than multiple NOPs) when priority masking is not in
use. This is more in keeping with other alternatives in the entry
assembly, and permits the use of a separate alternatives for the
PMHE-dependent DSB SY (and removal of the conditional branch this
currently requires). For consistency I've adjusted both the save and
restore paths.
According to bloat-o-meter, when building defconfig +
CONFIG_ARM64_PSEUDO_NMI=y this shrinks the kernel text by ~4KiB:
| add/remove: 4/2 grow/shrink: 42/310 up/down: 332/-5032 (-4700)
The resulting vmlinux is ~66KiB smaller, though the resulting Image size
is unchanged due to padding and alignment:
| [mark@lakrids:~/src/linux]% ls -al vmlinux-*
| -rwxr-xr-x 1 mark mark 137508344 Jan 17 14:11 vmlinux-after
| -rwxr-xr-x 1 mark mark 137575440 Jan 17 13:49 vmlinux-before
| [mark@lakrids:~/src/linux]% ls -al Image-*
| -rw-r--r-- 1 mark mark 38777344 Jan 17 14:11 Image-after
| -rw-r--r-- 1 mark mark 38777344 Jan 17 13:49 Image-before
Prior to this patch we did not verify the state of ICC_CTLR_EL1.PMHE on
secondary CPUs. As of this patch this is verified by the cpufeature code
when using GIC priority masking (i.e. when using pseudo-NMIs).
Note that since commit:
7e3a57fa6ca831fa ("arm64: Document ICC_CTLR_EL3.PMHE setting requirements")
... Documentation/arm64/booting.rst specifies:
| - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across
| all CPUs the kernel is executing on, and must stay constant
| for the lifetime of the kernel.
... so that should not adversely affect any compliant systems, and as
we'll only check for the absense of PMHE when using pseudo-NMIs, this
will only fire when such mismatch will adversely affect the system.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Cc: Mark Brown <broonie@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230130145429.903791-5-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-01-30 14:54:28 +00:00
gic_has_relaxed_pmr_sync ( ) ? " relaxed " : " forced " ) ;
2019-10-02 10:06:12 +01:00
2020-09-12 16:37:07 +01:00
/*
* How priority values are used by the GIC depends on two things :
* the security state of the GIC ( controlled by the GICD_CTRL . DS bit )
* and if Group 0 interrupts can be delivered to Linux in the non - secure
* world as FIQs ( controlled by the SCR_EL3 . FIQ bit ) . These affect the
2022-07-15 13:12:58 +08:00
* ICC_PMR_EL1 register and the priority that software assigns to
2020-09-12 16:37:07 +01:00
* interrupts :
*
* GICD_CTRL . DS | SCR_EL3 . FIQ | ICC_PMR_EL1 | Group 1 priority
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* 1 | - | unchanged | unchanged
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* 0 | 1 | non - secure | non - secure
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
* 0 | 0 | unchanged | non - secure
*
* where non - secure means that the value is right - shifted by one and the
* MSB bit set , to make it fit in the non - secure priority range .
*
* In the first two cases , where ICC_PMR_EL1 and the interrupt priority
* are both either modified or unchanged , we can use the same set of
* priorities .
*
* In the last case , where only the interrupt priorities are modified to
* be in the non - secure range , we use a different PMR value to mask IRQs
* and the rest of the values that we use remain unchanged .
*/
if ( gic_has_group0 ( ) & & ! gic_dist_security_disabled ( ) )
static_branch_enable ( & gic_nonsecure_priorities ) ;
2019-01-31 14:58:57 +00:00
static_branch_enable ( & supports_pseudo_nmis ) ;
2019-01-31 14:58:59 +00:00
if ( static_branch_likely ( & supports_deactivate_key ) )
gic_eoimode1_chip . flags | = IRQCHIP_SUPPORTS_NMI ;
else
gic_chip . flags | = IRQCHIP_SUPPORTS_NMI ;
2019-01-31 14:58:57 +00:00
}
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
static int __init gic_init_bases ( phys_addr_t dist_phys_base ,
void __iomem * dist_base ,
2016-01-19 14:11:14 +01:00
struct redist_region * rdist_regs ,
u32 nr_redist_regions ,
u64 redist_stride ,
struct fwnode_handle * handle )
2014-06-30 16:01:31 +01:00
{
2014-11-24 14:35:10 +00:00
u32 typer ;
2014-06-30 16:01:31 +01:00
int err ;
2015-08-26 17:00:42 +01:00
if ( ! is_hyp_mode_available ( ) )
2018-03-26 14:09:25 -07:00
static_branch_disable ( & supports_deactivate_key ) ;
2015-08-26 17:00:42 +01:00
2018-03-26 14:09:25 -07:00
if ( static_branch_likely ( & supports_deactivate_key ) )
2015-08-26 17:00:42 +01:00
pr_info ( " GIC: Using split EOI/Deactivate mode \n " ) ;
2016-04-11 09:57:54 +01:00
gic_data . fwnode = handle ;
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
gic_data . dist_phys_base = dist_phys_base ;
2014-06-30 16:01:31 +01:00
gic_data . dist_base = dist_base ;
2014-11-24 14:35:10 +00:00
gic_data . redist_regions = rdist_regs ;
gic_data . nr_redist_regions = nr_redist_regions ;
2014-06-30 16:01:31 +01:00
gic_data . redist_stride = redist_stride ;
/*
* Find out how many interrupts are supported .
*/
2014-11-24 14:35:10 +00:00
typer = readl_relaxed ( gic_data . dist_base + GICD_TYPER ) ;
2018-05-30 17:29:52 +01:00
gic_data . rdists . gicd_typer = typer ;
2019-07-31 17:29:33 +01:00
gic_enable_quirks ( readl_relaxed ( gic_data . dist_base + GICD_IIDR ) ,
gic_quirks , & gic_data ) ;
2019-07-16 15:17:31 +01:00
pr_info ( " %d SPIs implemented \n " , GIC_LINE_NR - 32 ) ;
pr_info ( " %d Extended SPIs implemented \n " , GIC_ESPI_NR ) ;
2019-12-24 11:10:25 +00:00
2020-03-11 11:56:49 +00:00
/*
* ThunderX1 explodes on reading GICD_TYPER2 , in violation of the
* architecture spec ( which says that reserved registers are RES0 ) .
*/
if ( ! ( gic_data . flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 ) )
gic_data . rdists . gicd_typer2 = readl_relaxed ( gic_data . dist_base + GICD_TYPER2 ) ;
2019-12-24 11:10:25 +00:00
2016-01-19 14:11:14 +01:00
gic_data . domain = irq_domain_create_tree ( handle , & gic_irq_domain_ops ,
& gic_data ) ;
2014-11-24 14:35:10 +00:00
gic_data . rdists . rdist = alloc_percpu ( typeof ( * gic_data . rdists . rdist ) ) ;
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
if ( ! static_branch_unlikely ( & gic_nvidia_t241_erratum ) ) {
/* Disable GICv4.x features for the erratum T241-FABRIC-4 */
gic_data . rdists . has_rvpeid = true ;
gic_data . rdists . has_vlpis = true ;
gic_data . rdists . has_direct_lpi = true ;
gic_data . rdists . has_vpend_valid_dirty = true ;
}
2014-06-30 16:01:31 +01:00
2014-11-24 14:35:10 +00:00
if ( WARN_ON ( ! gic_data . domain ) | | WARN_ON ( ! gic_data . rdists . rdist ) ) {
2014-06-30 16:01:31 +01:00
err = - ENOMEM ;
goto out_free ;
}
2020-03-12 11:20:55 +08:00
irq_domain_update_bus_token ( gic_data . domain , DOMAIN_BUS_WIRED ) ;
2017-10-06 10:24:00 -05:00
gic_data . has_rss = ! ! ( typer & GICD_TYPER_RSS ) ;
2018-05-08 13:14:36 +01:00
if ( typer & GICD_TYPER_MBIS ) {
err = mbi_init ( handle , gic_data . domain ) ;
if ( err )
pr_err ( " Failed to initialize MBIs \n " ) ;
}
2014-06-30 16:01:31 +01:00
set_handle_irq ( gic_handle_irq ) ;
2019-07-18 11:15:14 +01:00
gic_update_rdist_properties ( ) ;
2016-12-19 17:01:52 +00:00
2014-06-30 16:01:31 +01:00
gic_dist_init ( ) ;
gic_cpu_init ( ) ;
2020-04-25 15:24:01 +01:00
gic_smp_init ( ) ;
2014-08-26 16:03:35 +01:00
gic_cpu_pm_init ( ) ;
2014-06-30 16:01:31 +01:00
2018-07-27 14:51:04 +01:00
if ( gic_dist_supports_lpis ( ) ) {
its_init ( handle , & gic_data . rdists , gic_data . domain ) ;
its_cpu_init ( ) ;
irqchip/gic-v3-its: Postpone LPI pending table freeing and memreserve
Memory used by the LPI tables have to be made persistent for kexec to have
a chance to work, as explained in [1]. If they have been made persistent
and we are booting into a kexec'd kernel, we also need to free the pages
that were preemptively allocated by the new kernel for those tables.
Both of those operations currently happen during its_cpu_init(), which
happens in a _STARTING (IOW atomic) cpuhp callback for secondary
CPUs. efi_mem_reserve_iomem() issues a GFP_ATOMIC allocation, which
unfortunately doesn't work under PREEMPT_RT (this ends up grabbing a
non-raw spinlock, which can sleep under PREEMPT_RT). Similarly, freeing the
pages ends up grabbing a sleepable spinlock.
Since the memreserve is only required by kexec, it doesn't have to be done
so early in the secondary boot process. Issue the reservation in a new
CPUHP_AP_ONLINE_DYN cpuhp callback, and piggy-back the page freeing on top
of it. A CPU gets to run the body of this new callback exactly once.
As kexec issues a machine_shutdown() prior to machine_kexec(), it will be
serialized vs a CPU being plugged to life by the hotplug machinery - either
the CPU will have been brought up and have had its redistributor's pending
table memreserved, or it never went online and will have its table
allocated by the new kernel.
[1]: https://lore.kernel.org/lkml/20180921195954.21574-1-marc.zyngier@arm.com/
Signed-off-by: Valentin Schneider <valentin.schneider@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211027151506.2085066-3-valentin.schneider@arm.com
2021-10-27 16:15:05 +01:00
its_lpi_memreserve_init ( ) ;
2019-06-10 13:52:01 +03:00
} else {
if ( IS_ENABLED ( CONFIG_ARM_GIC_V2M ) )
gicv2m_init ( handle , gic_data . domain ) ;
2018-07-27 14:51:04 +01:00
}
2019-07-18 12:53:05 +01:00
gic_enable_nmi_support ( ) ;
2019-01-31 14:58:57 +00:00
2014-06-30 16:01:31 +01:00
return 0 ;
out_free :
if ( gic_data . domain )
irq_domain_remove ( gic_data . domain ) ;
2014-11-24 14:35:10 +00:00
free_percpu ( gic_data . rdists . rdist ) ;
2016-01-19 14:11:14 +01:00
return err ;
}
static int __init gic_validate_dist_version ( void __iomem * dist_base )
{
u32 reg = readl_relaxed ( dist_base + GICD_PIDR2 ) & GIC_PIDR2_ARCH_MASK ;
if ( reg ! = GIC_PIDR2_ARCH_GICv3 & & reg ! = GIC_PIDR2_ARCH_GICv4 )
return - ENODEV ;
return 0 ;
}
2016-04-11 09:57:54 +01:00
/* Create all possible partitions at boot time */
Small release overall.
- x86: miscellaneous fixes, AVIC support (local APIC virtualization,
AMD version)
- s390: polling for interrupts after a VCPU goes to halted state is
now enabled for s390; use hardware provided information about facility
bits that do not need any hypervisor activity, and other fixes for
cpu models and facilities; improve perf output; floating interrupt
controller improvements.
- MIPS: miscellaneous fixes
- PPC: bugfixes only
- ARM: 16K page size support, generic firmware probing layer for
timer and GIC
Christoffer Dall (KVM-ARM maintainer) says:
"There are a few changes in this pull request touching things outside
KVM, but they should all carry the necessary acks and it made the
merge process much easier to do it this way."
though actually the irqchip maintainers' acks didn't make it into the
patches. Marc Zyngier, who is both irqchip and KVM-ARM maintainer,
later acked at http://mid.gmane.org/573351D1.4060303@arm.com
"more formally and for documentation purposes".
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v2.0.22 (GNU/Linux)
iQEcBAABAgAGBQJXPJjyAAoJEL/70l94x66DhioH/j4fwQ0FmfPSM9PArzaFHQdx
LNE3tU4+bobbsy1BJr4DiAaOUQn3DAgwUvGLWXdeLiOXtoWXBiFHKaxlqEsCA6iQ
xcTH1TgfxsVoqGQ6bT9X/2GCx70heYpcWG3f+zqBy7ZfFmQykLAC/HwOr52VQL8f
hUFi3YmTHcnorp0n5Xg+9r3+RBS4D/kTbtdn6+KCLnPJ0RcgNkI3/NcafTemoofw
Tkv8+YYFNvKV13qlIfVqxMa0GwWI3pP6YaNKhaS5XO8Pu16HuuF1JthJsUBDzwBa
RInp8R9MoXgsBYhLpz3jc9vWG7G9yDl5LehsD9KOUGOaFYJ7sQN+QZOusa6jFgA=
=llO5
-----END PGP SIGNATURE-----
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"Small release overall.
x86:
- miscellaneous fixes
- AVIC support (local APIC virtualization, AMD version)
s390:
- polling for interrupts after a VCPU goes to halted state is now
enabled for s390
- use hardware provided information about facility bits that do not
need any hypervisor activity, and other fixes for cpu models and
facilities
- improve perf output
- floating interrupt controller improvements.
MIPS:
- miscellaneous fixes
PPC:
- bugfixes only
ARM:
- 16K page size support
- generic firmware probing layer for timer and GIC
Christoffer Dall (KVM-ARM maintainer) says:
"There are a few changes in this pull request touching things
outside KVM, but they should all carry the necessary acks and it
made the merge process much easier to do it this way."
though actually the irqchip maintainers' acks didn't make it into the
patches. Marc Zyngier, who is both irqchip and KVM-ARM maintainer,
later acked at http://mid.gmane.org/573351D1.4060303@arm.com ('more
formally and for documentation purposes')"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (82 commits)
KVM: MTRR: remove MSR 0x2f8
KVM: x86: make hwapic_isr_update and hwapic_irr_update look the same
svm: Manage vcpu load/unload when enable AVIC
svm: Do not intercept CR8 when enable AVIC
svm: Do not expose x2APIC when enable AVIC
KVM: x86: Introducing kvm_x86_ops.apicv_post_state_restore
svm: Add VMEXIT handlers for AVIC
svm: Add interrupt injection via AVIC
KVM: x86: Detect and Initialize AVIC support
svm: Introduce new AVIC VMCB registers
KVM: split kvm_vcpu_wake_up from kvm_vcpu_kick
KVM: x86: Introducing kvm_x86_ops VCPU blocking/unblocking hooks
KVM: x86: Introducing kvm_x86_ops VM init/destroy hooks
KVM: x86: Rename kvm_apic_get_reg to kvm_lapic_get_reg
KVM: x86: Misc LAPIC changes to expose helper functions
KVM: shrink halt polling even more for invalid wakeups
KVM: s390: set halt polling to 80 microseconds
KVM: halt_polling: provide a way to qualify wakeups during poll
KVM: PPC: Book3S HV: Re-enable XICS fast path for irqfd-generated interrupts
kvm: Conditionally register IRQ bypass consumer
...
2016-05-19 11:27:09 -07:00
static void __init gic_populate_ppi_partitions ( struct device_node * gic_node )
2016-04-11 09:57:54 +01:00
{
struct device_node * parts_node , * child_part ;
int part_idx = 0 , i ;
int nr_parts ;
struct partition_affinity * parts ;
2017-11-11 17:51:25 +01:00
parts_node = of_get_child_by_name ( gic_node , " ppi-partitions " ) ;
2016-04-11 09:57:54 +01:00
if ( ! parts_node )
return ;
2019-07-18 13:05:17 +01:00
gic_data . ppi_descs = kcalloc ( gic_data . ppi_nr , sizeof ( * gic_data . ppi_descs ) , GFP_KERNEL ) ;
if ( ! gic_data . ppi_descs )
2022-06-01 12:09:28 +04:00
goto out_put_node ;
2019-07-18 13:05:17 +01:00
2016-04-11 09:57:54 +01:00
nr_parts = of_get_child_count ( parts_node ) ;
if ( ! nr_parts )
2017-11-11 17:51:25 +01:00
goto out_put_node ;
2016-04-11 09:57:54 +01:00
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:03:40 -07:00
parts = kcalloc ( nr_parts , sizeof ( * parts ) , GFP_KERNEL ) ;
2016-04-11 09:57:54 +01:00
if ( WARN_ON ( ! parts ) )
2017-11-11 17:51:25 +01:00
goto out_put_node ;
2016-04-11 09:57:54 +01:00
for_each_child_of_node ( parts_node , child_part ) {
struct partition_affinity * part ;
int n ;
part = & parts [ part_idx ] ;
part - > partition_id = of_node_to_fwnode ( child_part ) ;
2018-08-27 19:56:15 -05:00
pr_info ( " GIC: PPI partition %pOFn[%d] { " ,
child_part , part_idx ) ;
2016-04-11 09:57:54 +01:00
n = of_property_count_elems_of_size ( child_part , " affinity " ,
sizeof ( u32 ) ) ;
WARN_ON ( n < = 0 ) ;
for ( i = 0 ; i < n ; i + + ) {
int err , cpu ;
u32 cpu_phandle ;
struct device_node * cpu_node ;
err = of_property_read_u32_index ( child_part , " affinity " ,
i , & cpu_phandle ) ;
if ( WARN_ON ( err ) )
continue ;
cpu_node = of_find_node_by_phandle ( cpu_phandle ) ;
if ( WARN_ON ( ! cpu_node ) )
continue ;
2018-01-02 11:25:29 +00:00
cpu = of_cpu_node_to_id ( cpu_node ) ;
2022-06-01 12:09:29 +04:00
if ( WARN_ON ( cpu < 0 ) ) {
of_node_put ( cpu_node ) ;
2016-04-11 09:57:54 +01:00
continue ;
2022-06-01 12:09:29 +04:00
}
2016-04-11 09:57:54 +01:00
2017-07-18 16:43:10 -05:00
pr_cont ( " %pOF[%d] " , cpu_node , cpu ) ;
2016-04-11 09:57:54 +01:00
cpumask_set_cpu ( cpu , & part - > mask ) ;
2022-06-01 12:09:29 +04:00
of_node_put ( cpu_node ) ;
2016-04-11 09:57:54 +01:00
}
pr_cont ( " } \n " ) ;
part_idx + + ;
}
2019-07-18 13:05:17 +01:00
for ( i = 0 ; i < gic_data . ppi_nr ; i + + ) {
2016-04-11 09:57:54 +01:00
unsigned int irq ;
struct partition_desc * desc ;
struct irq_fwspec ppi_fwspec = {
. fwnode = gic_data . fwnode ,
. param_count = 3 ,
. param = {
2018-03-20 13:44:09 +00:00
[ 0 ] = GIC_IRQ_TYPE_PARTITION ,
2016-04-11 09:57:54 +01:00
[ 1 ] = i ,
[ 2 ] = IRQ_TYPE_NONE ,
} ,
} ;
irq = irq_create_fwspec_mapping ( & ppi_fwspec ) ;
if ( WARN_ON ( ! irq ) )
continue ;
desc = partition_create_desc ( gic_data . fwnode , parts , nr_parts ,
irq , & partition_domain_ops ) ;
if ( WARN_ON ( ! desc ) )
continue ;
gic_data . ppi_descs [ i ] = desc ;
}
2017-11-11 17:51:25 +01:00
out_put_node :
of_node_put ( parts_node ) ;
2016-04-11 09:57:54 +01:00
}
2016-04-11 16:32:57 +01:00
static void __init gic_of_setup_kvm_info ( struct device_node * node )
{
int ret ;
struct resource r ;
u32 gicv_idx ;
gic_v3_kvm_info . type = GIC_V3 ;
gic_v3_kvm_info . maint_irq = irq_of_parse_and_map ( node , 0 ) ;
if ( ! gic_v3_kvm_info . maint_irq )
return ;
if ( of_property_read_u32 ( node , " #redistributor-regions " ,
& gicv_idx ) )
gicv_idx = 1 ;
gicv_idx + = 3 ; /* Also skip GICD, GICC, GICH */
ret = of_address_to_resource ( node , gicv_idx , & r ) ;
if ( ! ret )
gic_v3_kvm_info . vcpu = r ;
2017-06-25 14:10:46 +01:00
gic_v3_kvm_info . has_v4 = gic_data . rdists . has_vlpis ;
2020-03-04 20:33:13 +00:00
gic_v3_kvm_info . has_v4_1 = gic_data . rdists . has_rvpeid ;
2021-02-27 10:23:45 +00:00
vgic_set_kvm_info ( & gic_v3_kvm_info ) ;
2016-04-11 16:32:57 +01:00
}
2022-06-16 14:46:46 +01:00
static void gic_request_region ( resource_size_t base , resource_size_t size ,
const char * name )
{
if ( ! request_mem_region ( base , size , name ) )
pr_warn_once ( FW_BUG " %s region %pa has overlapping address \n " ,
name , & base ) ;
}
static void __iomem * gic_of_iomap ( struct device_node * node , int idx ,
const char * name , struct resource * res )
{
void __iomem * base ;
int ret ;
ret = of_address_to_resource ( node , idx , res ) ;
if ( ret )
return IOMEM_ERR_PTR ( ret ) ;
gic_request_region ( res - > start , resource_size ( res ) , name ) ;
base = of_iomap ( node , idx ) ;
return base ? : IOMEM_ERR_PTR ( - ENOMEM ) ;
}
2016-01-19 14:11:14 +01:00
static int __init gic_of_init ( struct device_node * node , struct device_node * parent )
{
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
phys_addr_t dist_phys_base ;
2016-01-19 14:11:14 +01:00
void __iomem * dist_base ;
struct redist_region * rdist_regs ;
2022-06-16 14:46:46 +01:00
struct resource res ;
2016-01-19 14:11:14 +01:00
u64 redist_stride ;
u32 nr_redist_regions ;
int err , i ;
2022-06-16 14:46:46 +01:00
dist_base = gic_of_iomap ( node , 0 , " GICD " , & res ) ;
2022-04-12 16:28:15 +01:00
if ( IS_ERR ( dist_base ) ) {
2017-07-18 16:43:10 -05:00
pr_err ( " %pOF: unable to map gic dist registers \n " , node ) ;
2022-04-12 16:28:15 +01:00
return PTR_ERR ( dist_base ) ;
2016-01-19 14:11:14 +01:00
}
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
dist_phys_base = res . start ;
2016-01-19 14:11:14 +01:00
err = gic_validate_dist_version ( dist_base ) ;
if ( err ) {
2017-07-18 16:43:10 -05:00
pr_err ( " %pOF: no distributor detected, giving up \n " , node ) ;
2016-01-19 14:11:14 +01:00
goto out_unmap_dist ;
}
if ( of_property_read_u32 ( node , " #redistributor-regions " , & nr_redist_regions ) )
nr_redist_regions = 1 ;
treewide: kzalloc() -> kcalloc()
The kzalloc() function has a 2-factor argument form, kcalloc(). This
patch replaces cases of:
kzalloc(a * b, gfp)
with:
kcalloc(a * b, gfp)
as well as handling cases of:
kzalloc(a * b * c, gfp)
with:
kzalloc(array3_size(a, b, c), gfp)
as it's slightly less ugly than:
kzalloc_array(array_size(a, b), c, gfp)
This does, however, attempt to ignore constant size factors like:
kzalloc(4 * 1024, gfp)
though any constants defined via macros get caught up in the conversion.
Any factors with a sizeof() of "unsigned char", "char", and "u8" were
dropped, since they're redundant.
The Coccinelle script used for this was:
// Fix redundant parens around sizeof().
@@
type TYPE;
expression THING, E;
@@
(
kzalloc(
- (sizeof(TYPE)) * E
+ sizeof(TYPE) * E
, ...)
|
kzalloc(
- (sizeof(THING)) * E
+ sizeof(THING) * E
, ...)
)
// Drop single-byte sizes and redundant parens.
@@
expression COUNT;
typedef u8;
typedef __u8;
@@
(
kzalloc(
- sizeof(u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * (COUNT)
+ COUNT
, ...)
|
kzalloc(
- sizeof(u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(__u8) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(char) * COUNT
+ COUNT
, ...)
|
kzalloc(
- sizeof(unsigned char) * COUNT
+ COUNT
, ...)
)
// 2-factor product with sizeof(type/expression) and identifier or constant.
@@
type TYPE;
expression THING;
identifier COUNT_ID;
constant COUNT_CONST;
@@
(
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_ID)
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_ID
+ COUNT_ID, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (COUNT_CONST)
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * COUNT_CONST
+ COUNT_CONST, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_ID)
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_ID
+ COUNT_ID, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (COUNT_CONST)
+ COUNT_CONST, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * COUNT_CONST
+ COUNT_CONST, sizeof(THING)
, ...)
)
// 2-factor product, only identifiers.
@@
identifier SIZE, COUNT;
@@
- kzalloc
+ kcalloc
(
- SIZE * COUNT
+ COUNT, SIZE
, ...)
// 3-factor product with 1 sizeof(type) or sizeof(expression), with
// redundant parens removed.
@@
expression THING;
identifier STRIDE, COUNT;
type TYPE;
@@
(
kzalloc(
- sizeof(TYPE) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(TYPE) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(TYPE))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * (COUNT) * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * (STRIDE)
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
|
kzalloc(
- sizeof(THING) * COUNT * STRIDE
+ array3_size(COUNT, STRIDE, sizeof(THING))
, ...)
)
// 3-factor product with 2 sizeof(variable), with redundant parens removed.
@@
expression THING1, THING2;
identifier COUNT;
type TYPE1, TYPE2;
@@
(
kzalloc(
- sizeof(TYPE1) * sizeof(TYPE2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(THING1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(THING1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * COUNT
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
|
kzalloc(
- sizeof(TYPE1) * sizeof(THING2) * (COUNT)
+ array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
, ...)
)
// 3-factor product, only identifiers, with redundant parens removed.
@@
identifier STRIDE, SIZE, COUNT;
@@
(
kzalloc(
- (COUNT) * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * STRIDE * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- (COUNT) * (STRIDE) * (SIZE)
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
|
kzalloc(
- COUNT * STRIDE * SIZE
+ array3_size(COUNT, STRIDE, SIZE)
, ...)
)
// Any remaining multi-factor products, first at least 3-factor products,
// when they're not all constants...
@@
expression E1, E2, E3;
constant C1, C2, C3;
@@
(
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(
- (E1) * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * E3
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- (E1) * (E2) * (E3)
+ array3_size(E1, E2, E3)
, ...)
|
kzalloc(
- E1 * E2 * E3
+ array3_size(E1, E2, E3)
, ...)
)
// And then all remaining 2 factors products when they're not all constants,
// keeping sizeof() as the second factor argument.
@@
expression THING, E1, E2;
type TYPE;
constant C1, C2, C3;
@@
(
kzalloc(sizeof(THING) * C2, ...)
|
kzalloc(sizeof(TYPE) * C2, ...)
|
kzalloc(C1 * C2 * C3, ...)
|
kzalloc(C1 * C2, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * (E2)
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(TYPE) * E2
+ E2, sizeof(TYPE)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * (E2)
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- sizeof(THING) * E2
+ E2, sizeof(THING)
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * E2
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- (E1) * (E2)
+ E1, E2
, ...)
|
- kzalloc
+ kcalloc
(
- E1 * E2
+ E1, E2
, ...)
)
Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 14:03:40 -07:00
rdist_regs = kcalloc ( nr_redist_regions , sizeof ( * rdist_regs ) ,
GFP_KERNEL ) ;
2016-01-19 14:11:14 +01:00
if ( ! rdist_regs ) {
err = - ENOMEM ;
goto out_unmap_dist ;
}
for ( i = 0 ; i < nr_redist_regions ; i + + ) {
2022-06-16 14:46:46 +01:00
rdist_regs [ i ] . redist_base = gic_of_iomap ( node , 1 + i , " GICR " , & res ) ;
if ( IS_ERR ( rdist_regs [ i ] . redist_base ) ) {
2017-07-18 16:43:10 -05:00
pr_err ( " %pOF: couldn't map region %d \n " , node , i ) ;
2016-01-19 14:11:14 +01:00
err = - ENODEV ;
goto out_unmap_rdist ;
}
rdist_regs [ i ] . phys_base = res . start ;
}
if ( of_property_read_u64 ( node , " redistributor-stride " , & redist_stride ) )
redist_stride = 0 ;
2018-12-10 13:56:31 +00:00
gic_enable_of_quirks ( node , gic_quirks , & gic_data ) ;
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
err = gic_init_bases ( dist_phys_base , dist_base , rdist_regs ,
nr_redist_regions , redist_stride , & node - > fwnode ) ;
2016-04-11 09:57:54 +01:00
if ( err )
goto out_unmap_rdist ;
gic_populate_ppi_partitions ( node ) ;
2016-12-06 22:00:52 +01:00
2018-03-26 14:09:25 -07:00
if ( static_branch_likely ( & supports_deactivate_key ) )
2016-12-06 22:00:52 +01:00
gic_of_setup_kvm_info ( node ) ;
2016-04-11 09:57:54 +01:00
return 0 ;
2016-01-19 14:11:14 +01:00
2014-06-30 16:01:31 +01:00
out_unmap_rdist :
2014-11-24 14:35:10 +00:00
for ( i = 0 ; i < nr_redist_regions ; i + + )
2022-04-12 16:28:15 +01:00
if ( rdist_regs [ i ] . redist_base & & ! IS_ERR ( rdist_regs [ i ] . redist_base ) )
2014-11-24 14:35:10 +00:00
iounmap ( rdist_regs [ i ] . redist_base ) ;
kfree ( rdist_regs ) ;
2014-06-30 16:01:31 +01:00
out_unmap_dist :
iounmap ( dist_base ) ;
return err ;
}
IRQCHIP_DECLARE ( gic_v3 , " arm,gic-v3 " , gic_of_init ) ;
2016-01-19 14:11:15 +01:00
# ifdef CONFIG_ACPI
2016-04-11 16:32:56 +01:00
static struct
{
void __iomem * dist_base ;
struct redist_region * redist_regs ;
u32 nr_redist_regions ;
bool single_redist ;
2019-12-16 11:24:57 +00:00
int enabled_rdists ;
2016-04-11 16:32:57 +01:00
u32 maint_irq ;
int maint_irq_mode ;
phys_addr_t vcpu_base ;
2016-04-11 16:32:56 +01:00
} acpi_data __initdata ;
2016-01-19 14:11:16 +01:00
static void __init
gic_acpi_register_redist ( phys_addr_t phys_base , void __iomem * redist_base )
{
static int count = 0 ;
2016-04-11 16:32:56 +01:00
acpi_data . redist_regs [ count ] . phys_base = phys_base ;
acpi_data . redist_regs [ count ] . redist_base = redist_base ;
acpi_data . redist_regs [ count ] . single_redist = acpi_data . single_redist ;
2016-01-19 14:11:16 +01:00
count + + ;
}
2016-01-19 14:11:15 +01:00
static int __init
2019-03-11 14:55:57 -06:00
gic_acpi_parse_madt_redist ( union acpi_subtable_headers * header ,
2016-01-19 14:11:15 +01:00
const unsigned long end )
{
struct acpi_madt_generic_redistributor * redist =
( struct acpi_madt_generic_redistributor * ) header ;
void __iomem * redist_base ;
redist_base = ioremap ( redist - > base_address , redist - > length ) ;
if ( ! redist_base ) {
pr_err ( " Couldn't map GICR region @%llx \n " , redist - > base_address ) ;
return - ENOMEM ;
}
2022-06-16 14:46:46 +01:00
gic_request_region ( redist - > base_address , redist - > length , " GICR " ) ;
2016-01-19 14:11:15 +01:00
2016-01-19 14:11:16 +01:00
gic_acpi_register_redist ( redist - > base_address , redist_base ) ;
2016-01-19 14:11:15 +01:00
return 0 ;
}
2016-01-19 14:11:16 +01:00
static int __init
2019-03-11 14:55:57 -06:00
gic_acpi_parse_madt_gicc ( union acpi_subtable_headers * header ,
2016-01-19 14:11:16 +01:00
const unsigned long end )
{
struct acpi_madt_generic_interrupt * gicc =
( struct acpi_madt_generic_interrupt * ) header ;
2016-04-11 16:32:56 +01:00
u32 reg = readl_relaxed ( acpi_data . dist_base + GICD_PIDR2 ) & GIC_PIDR2_ARCH_MASK ;
2016-01-19 14:11:16 +01:00
u32 size = reg = = GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2 ;
void __iomem * redist_base ;
2017-12-05 13:16:21 -06:00
/* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */
if ( ! ( gicc - > flags & ACPI_MADT_ENABLED ) )
return 0 ;
2016-01-19 14:11:16 +01:00
redist_base = ioremap ( gicc - > gicr_base_address , size ) ;
if ( ! redist_base )
return - ENOMEM ;
2022-06-16 14:46:46 +01:00
gic_request_region ( gicc - > gicr_base_address , size , " GICR " ) ;
2016-01-19 14:11:16 +01:00
gic_acpi_register_redist ( gicc - > gicr_base_address , redist_base ) ;
return 0 ;
}
static int __init gic_acpi_collect_gicr_base ( void )
{
acpi_tbl_entry_handler redist_parser ;
enum acpi_madt_type type ;
2016-04-11 16:32:56 +01:00
if ( acpi_data . single_redist ) {
2016-01-19 14:11:16 +01:00
type = ACPI_MADT_TYPE_GENERIC_INTERRUPT ;
redist_parser = gic_acpi_parse_madt_gicc ;
} else {
type = ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR ;
redist_parser = gic_acpi_parse_madt_redist ;
}
/* Collect redistributor base addresses in GICR entries */
if ( acpi_table_parse_madt ( type , redist_parser , 0 ) > 0 )
return 0 ;
pr_info ( " No valid GICR entries exist \n " ) ;
return - ENODEV ;
}
2019-03-11 14:55:57 -06:00
static int __init gic_acpi_match_gicr ( union acpi_subtable_headers * header ,
2016-01-19 14:11:15 +01:00
const unsigned long end )
{
/* Subtable presence means that redist exists, that's it */
return 0 ;
}
2019-03-11 14:55:57 -06:00
static int __init gic_acpi_match_gicc ( union acpi_subtable_headers * header ,
2016-01-19 14:11:16 +01:00
const unsigned long end )
{
struct acpi_madt_generic_interrupt * gicc =
( struct acpi_madt_generic_interrupt * ) header ;
/*
* If GICC is enabled and has valid gicr base address , then it means
* GICR base is presented via GICC
*/
2019-12-16 11:24:57 +00:00
if ( ( gicc - > flags & ACPI_MADT_ENABLED ) & & gicc - > gicr_base_address ) {
acpi_data . enabled_rdists + + ;
2016-01-19 14:11:16 +01:00
return 0 ;
2019-12-16 11:24:57 +00:00
}
2016-01-19 14:11:16 +01:00
2017-12-05 13:16:21 -06:00
/*
* It ' s perfectly valid firmware can pass disabled GICC entry , driver
* should not treat as errors , skip the entry instead of probe fail .
*/
if ( ! ( gicc - > flags & ACPI_MADT_ENABLED ) )
return 0 ;
2016-01-19 14:11:16 +01:00
return - ENODEV ;
}
static int __init gic_acpi_count_gicr_regions ( void )
{
int count ;
/*
* Count how many redistributor regions we have . It is not allowed
* to mix redistributor description , GICR and GICC subtables have to be
* mutually exclusive .
*/
count = acpi_table_parse_madt ( ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR ,
gic_acpi_match_gicr , 0 ) ;
if ( count > 0 ) {
2016-04-11 16:32:56 +01:00
acpi_data . single_redist = false ;
2016-01-19 14:11:16 +01:00
return count ;
}
count = acpi_table_parse_madt ( ACPI_MADT_TYPE_GENERIC_INTERRUPT ,
gic_acpi_match_gicc , 0 ) ;
2019-12-16 11:24:57 +00:00
if ( count > 0 ) {
2016-04-11 16:32:56 +01:00
acpi_data . single_redist = true ;
2019-12-16 11:24:57 +00:00
count = acpi_data . enabled_rdists ;
}
2016-01-19 14:11:16 +01:00
return count ;
}
2016-01-19 14:11:15 +01:00
static bool __init acpi_validate_gic_table ( struct acpi_subtable_header * header ,
struct acpi_probe_entry * ape )
{
struct acpi_madt_generic_distributor * dist ;
int count ;
dist = ( struct acpi_madt_generic_distributor * ) header ;
if ( dist - > version ! = ape - > driver_data )
return false ;
/* We need to do that exercise anyway, the sooner the better */
2016-01-19 14:11:16 +01:00
count = gic_acpi_count_gicr_regions ( ) ;
2016-01-19 14:11:15 +01:00
if ( count < = 0 )
return false ;
2016-04-11 16:32:56 +01:00
acpi_data . nr_redist_regions = count ;
2016-01-19 14:11:15 +01:00
return true ;
}
2019-03-11 14:55:57 -06:00
static int __init gic_acpi_parse_virt_madt_gicc ( union acpi_subtable_headers * header ,
2016-04-11 16:32:57 +01:00
const unsigned long end )
{
struct acpi_madt_generic_interrupt * gicc =
( struct acpi_madt_generic_interrupt * ) header ;
int maint_irq_mode ;
static int first_madt = true ;
/* Skip unusable CPUs */
if ( ! ( gicc - > flags & ACPI_MADT_ENABLED ) )
return 0 ;
maint_irq_mode = ( gicc - > flags & ACPI_MADT_VGIC_IRQ_MODE ) ?
ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE ;
if ( first_madt ) {
first_madt = false ;
acpi_data . maint_irq = gicc - > vgic_interrupt ;
acpi_data . maint_irq_mode = maint_irq_mode ;
acpi_data . vcpu_base = gicc - > gicv_base_address ;
return 0 ;
}
/*
* The maintenance interrupt and GICV should be the same for every CPU
*/
if ( ( acpi_data . maint_irq ! = gicc - > vgic_interrupt ) | |
( acpi_data . maint_irq_mode ! = maint_irq_mode ) | |
( acpi_data . vcpu_base ! = gicc - > gicv_base_address ) )
return - EINVAL ;
return 0 ;
}
static bool __init gic_acpi_collect_virt_info ( void )
{
int count ;
count = acpi_table_parse_madt ( ACPI_MADT_TYPE_GENERIC_INTERRUPT ,
gic_acpi_parse_virt_madt_gicc , 0 ) ;
return ( count > 0 ) ;
}
2016-01-19 14:11:15 +01:00
# define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K)
2016-04-11 16:32:57 +01:00
# define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K)
# define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K)
static void __init gic_acpi_setup_kvm_info ( void )
{
int irq ;
if ( ! gic_acpi_collect_virt_info ( ) ) {
pr_warn ( " Unable to get hardware information used for virtualization \n " ) ;
return ;
}
gic_v3_kvm_info . type = GIC_V3 ;
irq = acpi_register_gsi ( NULL , acpi_data . maint_irq ,
acpi_data . maint_irq_mode ,
ACPI_ACTIVE_HIGH ) ;
if ( irq < = 0 )
return ;
gic_v3_kvm_info . maint_irq = irq ;
if ( acpi_data . vcpu_base ) {
struct resource * vcpu = & gic_v3_kvm_info . vcpu ;
vcpu - > flags = IORESOURCE_MEM ;
vcpu - > start = acpi_data . vcpu_base ;
vcpu - > end = vcpu - > start + ACPI_GICV2_VCPU_MEM_SIZE - 1 ;
}
2017-06-25 14:10:46 +01:00
gic_v3_kvm_info . has_v4 = gic_data . rdists . has_vlpis ;
2020-03-04 20:33:13 +00:00
gic_v3_kvm_info . has_v4_1 = gic_data . rdists . has_rvpeid ;
2021-02-27 10:23:45 +00:00
vgic_set_kvm_info ( & gic_v3_kvm_info ) ;
2016-04-11 16:32:57 +01:00
}
2016-01-19 14:11:15 +01:00
2022-07-20 18:51:21 +08:00
static struct fwnode_handle * gsi_domain_handle ;
static struct fwnode_handle * gic_v3_get_gsi_domain_id ( u32 gsi )
{
return gsi_domain_handle ;
}
2016-01-19 14:11:15 +01:00
static int __init
2020-05-30 16:34:29 +02:00
gic_acpi_init ( union acpi_subtable_headers * header , const unsigned long end )
2016-01-19 14:11:15 +01:00
{
struct acpi_madt_generic_distributor * dist ;
2016-04-11 16:32:56 +01:00
size_t size ;
2016-01-19 14:11:16 +01:00
int i , err ;
2016-01-19 14:11:15 +01:00
/* Get distributor base address */
dist = ( struct acpi_madt_generic_distributor * ) header ;
2016-04-11 16:32:56 +01:00
acpi_data . dist_base = ioremap ( dist - > base_address ,
ACPI_GICV3_DIST_MEM_SIZE ) ;
if ( ! acpi_data . dist_base ) {
2016-01-19 14:11:15 +01:00
pr_err ( " Unable to map GICD registers \n " ) ;
return - ENOMEM ;
}
2022-06-16 14:46:46 +01:00
gic_request_region ( dist - > base_address , ACPI_GICV3_DIST_MEM_SIZE , " GICD " ) ;
2016-01-19 14:11:15 +01:00
2016-04-11 16:32:56 +01:00
err = gic_validate_dist_version ( acpi_data . dist_base ) ;
2016-01-19 14:11:15 +01:00
if ( err ) {
2017-11-13 19:23:49 +05:30
pr_err ( " No distributor detected at @%p, giving up \n " ,
2016-04-11 16:32:56 +01:00
acpi_data . dist_base ) ;
2016-01-19 14:11:15 +01:00
goto out_dist_unmap ;
}
2016-04-11 16:32:56 +01:00
size = sizeof ( * acpi_data . redist_regs ) * acpi_data . nr_redist_regions ;
acpi_data . redist_regs = kzalloc ( size , GFP_KERNEL ) ;
if ( ! acpi_data . redist_regs ) {
2016-01-19 14:11:15 +01:00
err = - ENOMEM ;
goto out_dist_unmap ;
}
2016-01-19 14:11:16 +01:00
err = gic_acpi_collect_gicr_base ( ) ;
if ( err )
2016-01-19 14:11:15 +01:00
goto out_redist_unmap ;
2022-07-20 18:51:21 +08:00
gsi_domain_handle = irq_domain_alloc_fwnode ( & dist - > base_address ) ;
if ( ! gsi_domain_handle ) {
2016-01-19 14:11:15 +01:00
err = - ENOMEM ;
goto out_redist_unmap ;
}
irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4
The T241 platform suffers from the T241-FABRIC-4 erratum which causes
unexpected behavior in the GIC when multiple transactions are received
simultaneously from different sources. This hardware issue impacts
NVIDIA server platforms that use more than two T241 chips
interconnected. Each chip has support for 320 {E}SPIs.
This issue occurs when multiple packets from different GICs are
incorrectly interleaved at the target chip. The erratum text below
specifies exactly what can cause multiple transfer packets susceptible
to interleaving and GIC state corruption. GIC state corruption can
lead to a range of problems, including kernel panics, and unexpected
behavior.
>From the erratum text:
"In some cases, inter-socket AXI4 Stream packets with multiple
transfers, may be interleaved by the fabric when presented to ARM
Generic Interrupt Controller. GIC expects all transfers of a packet
to be delivered without any interleaving.
The following GICv3 commands may result in multiple transfer packets
over inter-socket AXI4 Stream interface:
- Register reads from GICD_I* and GICD_N*
- Register writes to 64-bit GICD registers other than GICD_IROUTERn*
- ITS command MOVALL
Multiple commands in GICv4+ utilize multiple transfer packets,
including VMOVP, VMOVI, VMAPP, and 64-bit register accesses."
This issue impacts system configurations with more than 2 sockets,
that require multi-transfer packets to be sent over inter-socket
AXI4 Stream interface between GIC instances on different sockets.
GICv4 cannot be supported. GICv3 SW model can only be supported
with the workaround. Single and Dual socket configurations are not
impacted by this issue and support GICv3 and GICv4."
Link: https://developer.nvidia.com/docs/t241-fabric-4/nvidia-t241-fabric-4-errata.pdf
Writing to the chip alias region of the GICD_In{E} registers except
GICD_ICENABLERn has an equivalent effect as writing to the global
distributor. The SPI interrupt deactivate path is not impacted by
the erratum.
To fix this problem, implement a workaround that ensures read accesses
to the GICD_In{E} registers are directed to the chip that owns the
SPI, and disable GICv4.x features. To simplify code changes, the
gic_configure_irq() function uses the same alias region for both read
and write operations to GICD_ICFGR.
Co-developed-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Vikram Sethi <vsethi@nvidia.com>
Signed-off-by: Shanker Donthineni <sdonthineni@nvidia.com>
Acked-by: Sudeep Holla <sudeep.holla@arm.com> (for SMCCC/SOC ID bits)
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20230319024314.3540573-2-sdonthineni@nvidia.com
2023-03-18 21:43:14 -05:00
err = gic_init_bases ( dist - > base_address , acpi_data . dist_base ,
acpi_data . redist_regs , acpi_data . nr_redist_regions ,
0 , gsi_domain_handle ) ;
2016-01-19 14:11:15 +01:00
if ( err )
goto out_fwhandle_free ;
2022-07-20 18:51:21 +08:00
acpi_set_irq_model ( ACPI_IRQ_MODEL_GIC , gic_v3_get_gsi_domain_id ) ;
2016-12-06 22:00:52 +01:00
2018-03-26 14:09:25 -07:00
if ( static_branch_likely ( & supports_deactivate_key ) )
2016-12-06 22:00:52 +01:00
gic_acpi_setup_kvm_info ( ) ;
2016-04-11 16:32:57 +01:00
2016-01-19 14:11:15 +01:00
return 0 ;
out_fwhandle_free :
2022-07-20 18:51:21 +08:00
irq_domain_free_fwnode ( gsi_domain_handle ) ;
2016-01-19 14:11:15 +01:00
out_redist_unmap :
2016-04-11 16:32:56 +01:00
for ( i = 0 ; i < acpi_data . nr_redist_regions ; i + + )
if ( acpi_data . redist_regs [ i ] . redist_base )
iounmap ( acpi_data . redist_regs [ i ] . redist_base ) ;
kfree ( acpi_data . redist_regs ) ;
2016-01-19 14:11:15 +01:00
out_dist_unmap :
2016-04-11 16:32:56 +01:00
iounmap ( acpi_data . dist_base ) ;
2016-01-19 14:11:15 +01:00
return err ;
}
IRQCHIP_ACPI_DECLARE ( gic_v3 , ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR ,
acpi_validate_gic_table , ACPI_MADT_GIC_VERSION_V3 ,
gic_acpi_init ) ;
IRQCHIP_ACPI_DECLARE ( gic_v4 , ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR ,
acpi_validate_gic_table , ACPI_MADT_GIC_VERSION_V4 ,
gic_acpi_init ) ;
IRQCHIP_ACPI_DECLARE ( gic_v3_or_v4 , ACPI_MADT_TYPE_GENERIC_DISTRIBUTOR ,
acpi_validate_gic_table , ACPI_MADT_GIC_VERSION_NONE ,
gic_acpi_init ) ;
# endif