From b95df3bd1ea31fadc9e1471a036b4c08199aa0f0 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Wed, 10 Jan 2024 07:40:07 +0000 Subject: [PATCH 01/11] arm64: irq: include MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sorting include files in alphabetic order in drivers/tty/serial/samsung.c revealed the following error: In file included from drivers/tty/serial/samsung_tty.c:24: ./arch/arm64/include/asm/irq.h:9:43: error: unknown type name ‘cpumask_t’ 9 | void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu); | ^~~~~~~~~ Include cpumask.h to avoid unknown type errors for parents of irq.h that don't include cpumask.h. Acked-by: Mark Rutland Signed-off-by: Tudor Ambarus Reviewed-by: Stephen Boyd Link: https://lore.kernel.org/r/20240110074007.4020016-1-tudor.ambarus@linaro.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/irq.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h index 50ce8b697ff3..e93548914c36 100644 --- a/arch/arm64/include/asm/irq.h +++ b/arch/arm64/include/asm/irq.h @@ -4,6 +4,8 @@ #ifndef __ASSEMBLER__ +#include + #include void arch_trigger_cpumask_backtrace(const cpumask_t *mask, int exclude_cpu); From 8c5a19cb17a71e52303150335b459c7d2d28a155 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 10 Jan 2024 14:26:20 +0100 Subject: [PATCH 02/11] arm64: scs: Work around full LTO issue with dynamic SCS Full LTO takes the '-mbranch-protection=none' passed to the compiler when generating the dynamic shadow call stack patching code as a hint to stop emitting PAC instructions altogether. (Thin LTO appears unaffected by this) Work around this by stripping unwind tables from the object in question, which should be sufficient to prevent the patching code from attempting to patch itself. Fixes: 3b619e22c460 ("arm64: implement dynamic shadow call stack for Clang") Signed-off-by: Ard Biesheuvel Reviewed-by: Sami Tolvanen Reviewed-by: Kees Cook Link: https://lore.kernel.org/r/20240110132619.258809-2-ardb+git@google.com Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index d95b3d6b471a..e5d03a7039b4 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -73,7 +73,13 @@ obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o -CFLAGS_patch-scs.o += -mbranch-protection=none + +# We need to prevent the SCS patching code from patching itself. Using +# -mbranch-protection=none here to avoid the patchable PAC opcodes from being +# generated triggers an issue with full LTO on Clang, which stops emitting PAC +# instructions altogether. So instead, omit the unwind tables used by the +# patching code, so it will not be able to locate its own PAC instructions. +CFLAGS_patch-scs.o += -fno-asynchronous-unwind-tables -fno-unwind-tables # Force dependency (vdso*-wrap.S includes vdso.so through incbin) $(obj)/vdso-wrap.o: $(obj)/vdso/vdso.so From 3931261ecf46151a5e779c96fb3da00677b6dc37 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 11 Jan 2024 12:24:48 +0100 Subject: [PATCH 03/11] arm64: fpsimd: Bring cond_yield asm macro in line with new rules We no longer disable softirqs or preemption when doing kernel mode SIMD, and so for fully preemptible kernels, there is no longer a need to do any explicit yielding (and for non-preemptible kernels, yielding is not needed either). That leaves voluntary preemption, where only explicit yield calls may result in a reschedule. To retain the existing behavior for such a configuration, we should take the new situation into account, where the preempt count will be zero rather than one, and yielding to pending softirqs is unnecessary. Fixes: aefbab8e77eb ("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch") Signed-off-by: Ard Biesheuvel Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20240111112447.577640-2-ardb+git@google.com Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 25 +++++++++---------------- arch/arm64/kernel/asm-offsets.c | 2 -- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 7b1975bf4b90..513787e43329 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -760,32 +760,25 @@ alternative_endif .endm /* - * Check whether preempt/bh-disabled asm code should yield as soon as - * it is able. This is the case if we are currently running in task - * context, and either a softirq is pending, or the TIF_NEED_RESCHED - * flag is set and re-enabling preemption a single time would result in - * a preempt count of zero. (Note that the TIF_NEED_RESCHED flag is - * stored negated in the top word of the thread_info::preempt_count + * Check whether asm code should yield as soon as it is able. This is + * the case if we are currently running in task context, and the + * TIF_NEED_RESCHED flag is set. (Note that the TIF_NEED_RESCHED flag + * is stored negated in the top word of the thread_info::preempt_count * field) */ - .macro cond_yield, lbl:req, tmp:req, tmp2:req + .macro cond_yield, lbl:req, tmp:req, tmp2 +#ifdef CONFIG_PREEMPT_VOLUNTARY get_current_task \tmp ldr \tmp, [\tmp, #TSK_TI_PREEMPT] /* * If we are serving a softirq, there is no point in yielding: the * softirq will not be preempted no matter what we do, so we should - * run to completion as quickly as we can. + * run to completion as quickly as we can. The preempt_count field will + * have BIT(SOFTIRQ_SHIFT) set in this case, so the zero check will + * catch this case too. */ - tbnz \tmp, #SOFTIRQ_SHIFT, .Lnoyield_\@ -#ifdef CONFIG_PREEMPTION - sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET cbz \tmp, \lbl #endif - adr_l \tmp, irq_stat + IRQ_CPUSTAT_SOFTIRQ_PENDING - get_this_cpu_offset \tmp2 - ldr w\tmp, [\tmp, \tmp2] - cbnz w\tmp, \lbl // yield on pending softirq in task context -.Lnoyield_\@: .endm /* diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5ff1942b04fc..5a7dbbe0ce63 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -117,8 +117,6 @@ int main(void) DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); BLANK(); DEFINE(PREEMPT_DISABLE_OFFSET, PREEMPT_DISABLE_OFFSET); - DEFINE(SOFTIRQ_SHIFT, SOFTIRQ_SHIFT); - DEFINE(IRQ_CPUSTAT_SOFTIRQ_PENDING, offsetof(irq_cpustat_t, __softirq_pending)); BLANK(); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); BLANK(); From 546b7cde9b1dd36089649101b75266564600ffe5 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 10 Jan 2024 11:29:20 -0600 Subject: [PATCH 04/11] arm64: Rename ARM64_WORKAROUND_2966298 In preparation to apply ARM64_WORKAROUND_2966298 for multiple errata, rename the kconfig and capability. No functional change. Cc: stable@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20240110-arm-errata-a510-v1-1-d02bc51aeeee@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 4 ++++ arch/arm64/kernel/cpu_errata.c | 4 ++-- arch/arm64/kernel/entry.S | 2 +- arch/arm64/tools/cpucaps | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b67e6934316f..96f31e235a1a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1037,8 +1037,12 @@ config ARM64_ERRATUM_2645198 If unsure, say Y. +config ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + bool + config ARM64_ERRATUM_2966298 bool "Cortex-A520: 2966298: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD default y help This option adds the workaround for ARM Cortex-A520 erratum 2966298. diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index e29e0fea63fb..cb5e0622168d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -713,10 +713,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { MIDR_FIXED(MIDR_CPU_VAR_REV(1,1), BIT(25)), }, #endif -#ifdef CONFIG_ARM64_ERRATUM_2966298 +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { .desc = "ARM erratum 2966298", - .capability = ARM64_WORKAROUND_2966298, + .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD, /* Cortex-A520 r0p0 - r0p1 */ ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), }, diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a6030913cd58..544ab46649f3 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,7 +428,7 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 -alternative_if ARM64_WORKAROUND_2966298 +alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD tlbi vale1, xzr dsb nsh alternative_else_nop_endif diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index 1e07d74d7a6c..b912b1409fc0 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -84,7 +84,6 @@ WORKAROUND_2077057 WORKAROUND_2457168 WORKAROUND_2645198 WORKAROUND_2658417 -WORKAROUND_2966298 WORKAROUND_AMPERE_AC03_CPU_38 WORKAROUND_TRBE_OVERWRITE_FILL_MODE WORKAROUND_TSB_FLUSH_FAILURE @@ -100,3 +99,4 @@ WORKAROUND_NVIDIA_CARMEL_CNP WORKAROUND_QCOM_FALKOR_E1003 WORKAROUND_REPEAT_TLBI WORKAROUND_SPECULATIVE_AT +WORKAROUND_SPECULATIVE_UNPRIV_LOAD From f827bcdafa2a2ac21c91e47f587e8d0c76195409 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 10 Jan 2024 11:29:21 -0600 Subject: [PATCH 05/11] arm64: errata: Add Cortex-A510 speculative unprivileged load workaround Implement the workaround for ARM Cortex-A510 erratum 3117295. On an affected Cortex-A510 core, a speculatively executed unprivileged load might leak data from a privileged load via a cache side channel. The issue only exists for loads within a translation regime with the same translation (e.g. same ASID and VMID). Therefore, the issue only affects the return to EL0. The erratum and workaround are the same as ARM Cortex-A520 erratum 2966298, so reuse the existing workaround. Cc: stable@vger.kernel.org Signed-off-by: Rob Herring Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20240110-arm-errata-a510-v1-2-d02bc51aeeee@kernel.org Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 14 ++++++++++++++ arch/arm64/kernel/cpu_errata.c | 17 +++++++++++++++-- 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index f47f63bcf67c..7acd64c61f50 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -71,6 +71,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A510 | #2658417 | ARM64_ERRATUM_2658417 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A510 | #3117295 | ARM64_ERRATUM_3117295 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A520 | #2966298 | ARM64_ERRATUM_2966298 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A53 | #826319 | ARM64_ERRATUM_826319 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 96f31e235a1a..bfd275249366 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1054,6 +1054,20 @@ config ARM64_ERRATUM_2966298 If unsure, say Y. +config ARM64_ERRATUM_3117295 + bool "Cortex-A510: 3117295: workaround for speculatively executed unprivileged load" + select ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + default y + help + This option adds the workaround for ARM Cortex-A510 erratum 3117295. + + On an affected Cortex-A510 core, a speculatively executed unprivileged + load might leak data from a privileged level via a cache side channel. + + Work around this problem by executing a TLBI before returning to EL0. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cb5e0622168d..967c7c7a4e7d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -416,6 +416,19 @@ static struct midr_range broken_aarch32_aes[] = { }; #endif /* CONFIG_ARM64_WORKAROUND_TRBE_WRITE_OUT_OF_RANGE */ +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD +static const struct midr_range erratum_spec_unpriv_load_list[] = { +#ifdef CONFIG_ARM64_ERRATUM_3117295 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A510), +#endif +#ifdef CONFIG_ARM64_ERRATUM_2966298 + /* Cortex-A520 r0p0 to r0p1 */ + MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), +#endif + {}, +}; +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { @@ -715,10 +728,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD { - .desc = "ARM erratum 2966298", + .desc = "ARM errata 2966298, 3117295", .capability = ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD, /* Cortex-A520 r0p0 - r0p1 */ - ERRATA_MIDR_REV_RANGE(MIDR_CORTEX_A520, 0, 0, 1), + ERRATA_MIDR_RANGE_LIST(erratum_spec_unpriv_load_list), }, #endif #ifdef CONFIG_AMPERE_ERRATUM_AC03_CPU_38 From 832dd634bd1b4e3bbe9f10b9c9ba5db6f6f2b97f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 16 Jan 2024 11:02:20 +0000 Subject: [PATCH 06/11] arm64: entry: fix ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD Currently the ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD workaround isn't quite right, as it is supposed to be applied after the last explicit memory access, but is immediately followed by an LDR. The ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD workaround is used to handle Cortex-A520 erratum 2966298 and Cortex-A510 erratum 3117295, which are described in: * https://developer.arm.com/documentation/SDEN2444153/0600/?lang=en * https://developer.arm.com/documentation/SDEN1873361/1600/?lang=en In both cases the workaround is described as: | If pagetable isolation is disabled, the context switch logic in the | kernel can be updated to execute the following sequence on affected | cores before exiting to EL0, and after all explicit memory accesses: | | 1. A non-shareable TLBI to any context and/or address, including | unused contexts or addresses, such as a `TLBI VALE1 Xzr`. | | 2. A DSB NSH to guarantee completion of the TLBI. The important part being that the TLBI+DSB must be placed "after all explicit memory accesses". Unfortunately, as-implemented, the TLBI+DSB is immediately followed by an LDR, as we have: | alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD | tlbi vale1, xzr | dsb nsh | alternative_else_nop_endif | alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 | ldr lr, [sp, #S_LR] | add sp, sp, #PT_REGS_SIZE // restore sp | eret | alternative_else_nop_endif | | [ ... KPTI exception return path ... ] This patch fixes this by reworking the logic to place the TLBI+DSB immediately before the ERET, after all explicit memory accesses. The ERET is currently in a separate alternative block, and alternatives cannot be nested. To account for this, the alternative block for ARM64_UNMAP_KERNEL_AT_EL0 is replaced with a single alternative branch to skip the KPTI logic, with the new shape of the logic being: | alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0 | [ ... KPTI exception return path ... ] | .L_skip_tramp_exit_\@: | | ldr lr, [sp, #S_LR] | add sp, sp, #PT_REGS_SIZE // restore sp | | alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD | tlbi vale1, xzr | dsb nsh | alternative_else_nop_endif | eret The new structure means that the workaround is only applied when KPTI is not in use; this is fine as noted in the documented implications of the erratum: | Pagetable isolation between EL0 and higher level ELs prevents the | issue from occurring. ... and as per the workaround description quoted above, the workaround is only necessary "If pagetable isolation is disabled". Fixes: 471470bc7052 ("arm64: errata: Add Cortex-A520 speculative unprivileged load workaround") Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Rob Herring Cc: Will Deacon Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20240116110221.420467-2-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 544ab46649f3..7fcbee0f6c0e 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -428,16 +428,9 @@ alternative_else_nop_endif ldp x28, x29, [sp, #16 * 14] .if \el == 0 -alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD - tlbi vale1, xzr - dsb nsh -alternative_else_nop_endif -alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 - ldr lr, [sp, #S_LR] - add sp, sp, #PT_REGS_SIZE // restore sp - eret -alternative_else_nop_endif #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 + alternative_insn "b .L_skip_tramp_exit_\@", nop, ARM64_UNMAP_KERNEL_AT_EL0 + msr far_el1, x29 ldr_this_cpu x30, this_cpu_vector, x29 @@ -446,7 +439,18 @@ alternative_else_nop_endif ldr lr, [sp, #S_LR] // restore x30 add sp, sp, #PT_REGS_SIZE // restore sp br x29 + +.L_skip_tramp_exit_\@: #endif + ldr lr, [sp, #S_LR] + add sp, sp, #PT_REGS_SIZE // restore sp + + /* This must be after the last explicit memory access */ +alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD + tlbi vale1, xzr + dsb nsh +alternative_else_nop_endif + eret .else ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp From da59f1d051d57e85eca49401a3a36d5a622babde Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 16 Jan 2024 11:02:21 +0000 Subject: [PATCH 07/11] arm64: entry: simplify kernel_exit logic For historical reasons, the non-KPTI exception return path is duplicated for EL1 and EL0, with the structure: .if \el == 0 [ KPTI handling ] ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp [ EL0 exception return workaround ] eret .else ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp [ EL1 exception return workaround ] eret .endif sb This would be simpler and clearer with the common portions factored out, e.g. .if \el == 0 [ KPTI handling ] .endif ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp .if \el == 0 [ EL0 exception return workaround ] .else [ EL1 exception return workaround ] .endif eret sb This expands to the same code, but is simpler for a human to follow as it avoids duplicates the restore of LR+SP, and makes it clear that the ERET is associated with the SB. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: James Morse Cc: Rob Herring Cc: Will Deacon Link: https://lore.kernel.org/r/20240116110221.420467-3-mark.rutland@arm.com Signed-off-by: Will Deacon --- arch/arm64/kernel/entry.S | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 7fcbee0f6c0e..7ef0e127b149 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -442,24 +442,23 @@ alternative_else_nop_endif .L_skip_tramp_exit_\@: #endif + .endif + ldr lr, [sp, #S_LR] add sp, sp, #PT_REGS_SIZE // restore sp + .if \el == 0 /* This must be after the last explicit memory access */ alternative_if ARM64_WORKAROUND_SPECULATIVE_UNPRIV_LOAD tlbi vale1, xzr dsb nsh alternative_else_nop_endif - eret .else - ldr lr, [sp, #S_LR] - add sp, sp, #PT_REGS_SIZE // restore sp - /* Ensure any device/NC reads complete */ alternative_insn nop, "dmb sy", ARM64_WORKAROUND_1508412 + .endif eret - .endif sb .endm From b7c510d049049409e8945b932f4b0b357fa17415 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 15 Jan 2024 18:42:38 +0000 Subject: [PATCH 08/11] arm64/ptrace: Don't flush ZA/ZT storage when writing ZA via ptrace When writing ZA we currently unconditionally flush the buffer used to store it as part of ensuring that it is allocated. Since this buffer is shared with ZT0 this means that a write to ZA when PSTATE.ZA is already set will corrupt the value of ZT0 on a SME2 system. Fix this by only flushing the backing storage if PSTATE.ZA was not previously set. This will mean that short or failed writes may leave stale data in the buffer, this seems as correct as our current behaviour and unlikely to be something that userspace will rely on. Fixes: f90b529bcbe5 ("arm64/sme: Implement ZT0 ptrace support") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240115-arm64-fix-ptrace-za-zt-v1-1-48617517028a@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/ptrace.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 20d7ef82de90..b3f64144b5cd 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1107,12 +1107,13 @@ static int za_set(struct task_struct *target, } } - /* Allocate/reinit ZA storage */ - sme_alloc(target, true); - if (!target->thread.sme_state) { - ret = -ENOMEM; - goto out; - } + /* + * Only flush the storage if PSTATE.ZA was not already set, + * otherwise preserve any existing data. + */ + sme_alloc(target, !thread_za_enabled(&target->thread)); + if (!target->thread.sme_state) + return -ENOMEM; /* If there is no data then disable ZA */ if (!count) { From 8410186ca48002092818500b7c209e569b47a2ac Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 15 Jan 2024 19:53:01 +0000 Subject: [PATCH 09/11] arm64/fpsimd: Remove spurious check for SVE support There is no need to check for SVE support when changing vector lengths, even if the system is SME only we still need SVE storage for the streaming SVE state. Fixes: d4d5be94a878 ("arm64/fpsimd: Ensure SME storage is allocated after SVE VL changes") Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20240115-arm64-sve-enabled-check-v1-1-a26360b00f6d@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 505f389be3e0..0983be2b1b61 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -898,10 +898,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, * allocate SVE now in case it is needed for use in streaming * mode. */ - if (system_supports_sve()) { - sve_free(task); - sve_alloc(task, true); - } + sve_free(task); + sve_alloc(task, true); if (free_sme) sme_free(task); From dc7eb8755797ed41a0d1b5c0c39df3c8f401b3d9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 15 Jan 2024 20:15:46 +0000 Subject: [PATCH 10/11] arm64/sme: Always exit sme_alloc() early with existing storage When sme_alloc() is called with existing storage and we are not flushing we will always allocate new storage, both leaking the existing storage and corrupting the state. Fix this by separating the checks for flushing and for existing storage as we do for SVE. Callers that reallocate (eg, due to changing the vector length) should call sme_free() themselves. Fixes: 5d0a8d2fba50 ("arm64/ptrace: Ensure that SME is set up for target when writing SSVE state") Signed-off-by: Mark Brown Cc: Link: https://lore.kernel.org/r/20240115-arm64-sme-flush-v1-1-7472bd3459b7@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 0983be2b1b61..a5dc6f764195 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -1217,8 +1217,10 @@ void fpsimd_release_task(struct task_struct *dead_task) */ void sme_alloc(struct task_struct *task, bool flush) { - if (task->thread.sme_state && flush) { - memset(task->thread.sme_state, 0, sme_state_size(task)); + if (task->thread.sme_state) { + if (flush) + memset(task->thread.sme_state, 0, + sme_state_size(task)); return; } From 1b20d0486a602417defb5bf33320d31b2a7a47f8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Wed, 17 Jan 2024 17:05:45 +0000 Subject: [PATCH 11/11] arm64: Fix silcon-errata.rst formatting Remove the errant blank lines to make the desired empty row separators around the Fujitsu and ASR entries in the main table, rather than them being their own separate tables which then look odd in the HTML view. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b6637654eda761e224f828a44a7bbc1eadf2ef88.1705511145.git.robin.murphy@arm.com Signed-off-by: Will Deacon --- Documentation/arch/arm64/silicon-errata.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 7acd64c61f50..b30bd6abffbe 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -227,11 +227,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Rockchip | RK3588 | #3588001 | ROCKCHIP_ERRATUM_3588001 | +----------------+-----------------+-----------------+-----------------------------+ - +----------------+-----------------+-----------------+-----------------------------+ | Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 | +----------------+-----------------+-----------------+-----------------------------+ - +----------------+-----------------+-----------------+-----------------------------+ | ASR | ASR8601 | #8601001 | N/A | +----------------+-----------------+-----------------+-----------------------------+