2019-06-03 07:44:50 +02:00
// SPDX-License-Identifier: GPL-2.0-only
2014-11-14 15:54:08 +00:00
/*
* alternative runtime patching
* inspired by the x86 version
*
* Copyright ( C ) 2014 ARM Ltd .
*/
# define pr_fmt(fmt) "alternatives: " fmt
# include <linux/init.h>
# include <linux/cpu.h>
2022-08-30 11:48:32 +01:00
# include <linux/elf.h>
2014-11-14 15:54:08 +00:00
# include <asm/cacheflush.h>
# include <asm/alternative.h>
# include <asm/cpufeature.h>
2015-06-01 10:47:40 +01:00
# include <asm/insn.h>
2022-08-30 11:48:32 +01:00
# include <asm/module.h>
2016-08-24 18:27:28 +01:00
# include <asm/sections.h>
2022-08-30 11:48:32 +01:00
# include <asm/vdso.h>
2014-11-14 15:54:08 +00:00
# include <linux/stop_machine.h>
2021-02-04 09:43:49 +08:00
# define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
2015-06-01 10:47:40 +01:00
# define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
# define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
2023-06-07 17:48:44 +01:00
# define ALT_CAP(a) ((a)->cpucap & ~ARM64_CB_BIT)
# define ALT_HAS_CB(a) ((a)->cpucap & ARM64_CB_BIT)
2022-09-12 17:22:08 +01:00
2020-06-30 14:06:04 +01:00
/* Volatile, as we may be patching the guts of READ_ONCE() */
static volatile int all_alternatives_applied ;
2019-01-31 14:58:52 +00:00
static DECLARE_BITMAP ( applied_alternatives , ARM64_NCAPS ) ;
2018-01-08 15:38:06 +00:00
2014-11-28 13:40:45 +00:00
struct alt_region {
struct alt_instr * begin ;
struct alt_instr * end ;
} ;
2023-06-07 17:48:44 +01:00
bool alternative_is_applied ( u16 cpucap )
2019-01-31 14:58:52 +00:00
{
2023-06-07 17:48:44 +01:00
if ( WARN_ON ( cpucap > = ARM64_NCAPS ) )
2019-01-31 14:58:52 +00:00
return false ;
2023-06-07 17:48:44 +01:00
return test_bit ( cpucap , applied_alternatives ) ;
2019-01-31 14:58:52 +00:00
}
2015-06-01 10:47:40 +01:00
/*
* Check if the target PC is within an alternative block .
*/
arm64: alternatives: mark patch_alternative() as `noinstr`
The alternatives code must be `noinstr` such that it does not patch itself,
as the cache invalidation is only performed after all the alternatives have
been applied.
Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update()
and get_alt_insn() with `__always_inline` since they are both only called
through patch_alternative().
Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused
a boot hang:
[ 0.241121] CPU: All CPU(s) started at EL2
The alternatives code was patching the atomics in __tsan_read4() from LL/SC
atomics to LSE atomics.
The following fragment is using LL/SC atomics in the .text section:
| <__tsan_unaligned_read4+304>: ldxr x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This LL/SC atomic sequence was to be replaced with LSE atomics. However since
the alternatives code was instrumentable, __tsan_read4() was being called after
only the first instruction was replaced, which led to the following code in memory:
| <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This caused an infinite loop as the `stxr` instruction never completed successfully,
so `w7` was always 0.
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2022-04-05 11:47:33 +01:00
static __always_inline bool branch_insn_requires_update ( struct alt_instr * alt , unsigned long pc )
2015-06-01 10:47:40 +01:00
{
2020-07-09 15:59:53 +03:00
unsigned long replptr = ( unsigned long ) ALT_REPL_PTR ( alt ) ;
return ! ( pc > = replptr & & pc < = ( replptr + alt - > alt_len ) ) ;
2015-06-01 10:47:40 +01:00
}
2016-09-09 14:07:13 +01:00
# define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
arm64: alternatives: mark patch_alternative() as `noinstr`
The alternatives code must be `noinstr` such that it does not patch itself,
as the cache invalidation is only performed after all the alternatives have
been applied.
Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update()
and get_alt_insn() with `__always_inline` since they are both only called
through patch_alternative().
Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused
a boot hang:
[ 0.241121] CPU: All CPU(s) started at EL2
The alternatives code was patching the atomics in __tsan_read4() from LL/SC
atomics to LSE atomics.
The following fragment is using LL/SC atomics in the .text section:
| <__tsan_unaligned_read4+304>: ldxr x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This LL/SC atomic sequence was to be replaced with LSE atomics. However since
the alternatives code was instrumentable, __tsan_read4() was being called after
only the first instruction was replaced, which led to the following code in memory:
| <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This caused an infinite loop as the `stxr` instruction never completed successfully,
so `w7` was always 0.
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2022-04-05 11:47:33 +01:00
static __always_inline u32 get_alt_insn ( struct alt_instr * alt , __le32 * insnptr , __le32 * altinsnptr )
2015-06-01 10:47:40 +01:00
{
u32 insn ;
insn = le32_to_cpu ( * altinsnptr ) ;
if ( aarch64_insn_is_branch_imm ( insn ) ) {
s32 offset = aarch64_get_branch_offset ( insn ) ;
unsigned long target ;
target = ( unsigned long ) altinsnptr + offset ;
/*
* If we ' re branching inside the alternate sequence ,
* do not rewrite the instruction , as it is already
* correct . Otherwise , generate the new instruction .
*/
if ( branch_insn_requires_update ( alt , target ) ) {
offset = target - ( unsigned long ) insnptr ;
insn = aarch64_set_branch_offset ( insn , offset ) ;
}
2016-09-09 14:07:13 +01:00
} else if ( aarch64_insn_is_adrp ( insn ) ) {
s32 orig_offset , new_offset ;
unsigned long target ;
/*
* If we ' re replacing an adrp instruction , which uses PC - relative
* immediate addressing , adjust the offset to reflect the new
* PC . adrp operates on 4 K aligned addresses .
*/
orig_offset = aarch64_insn_adrp_get_offset ( insn ) ;
target = align_down ( altinsnptr , SZ_4K ) + orig_offset ;
new_offset = target - align_down ( insnptr , SZ_4K ) ;
insn = aarch64_insn_adrp_set_offset ( insn , new_offset ) ;
2016-09-09 14:07:11 +01:00
} else if ( aarch64_insn_uses_literal ( insn ) ) {
/*
* Disallow patching unhandled instructions using PC relative
* literal addresses
*/
BUG ( ) ;
2015-06-01 10:47:40 +01:00
}
return insn ;
}
arm64: alternatives: mark patch_alternative() as `noinstr`
The alternatives code must be `noinstr` such that it does not patch itself,
as the cache invalidation is only performed after all the alternatives have
been applied.
Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update()
and get_alt_insn() with `__always_inline` since they are both only called
through patch_alternative().
Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused
a boot hang:
[ 0.241121] CPU: All CPU(s) started at EL2
The alternatives code was patching the atomics in __tsan_read4() from LL/SC
atomics to LSE atomics.
The following fragment is using LL/SC atomics in the .text section:
| <__tsan_unaligned_read4+304>: ldxr x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This LL/SC atomic sequence was to be replaced with LSE atomics. However since
the alternatives code was instrumentable, __tsan_read4() was being called after
only the first instruction was replaced, which led to the following code in memory:
| <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This caused an infinite loop as the `stxr` instruction never completed successfully,
so `w7` was always 0.
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2022-04-05 11:47:33 +01:00
static noinstr void patch_alternative ( struct alt_instr * alt ,
2017-12-03 12:02:14 +00:00
__le32 * origptr , __le32 * updptr , int nr_inst )
{
__le32 * replptr ;
int i ;
replptr = ALT_REPL_PTR ( alt ) ;
for ( i = 0 ; i < nr_inst ; i + + ) {
u32 insn ;
insn = get_alt_insn ( alt , origptr + i , replptr + i ) ;
updptr [ i ] = cpu_to_le32 ( insn ) ;
}
}
2018-06-22 09:31:15 +01:00
/*
* We provide our own , private D - cache cleaning function so that we don ' t
* accidentally call into the cache . S code , which is patched by us at
* runtime .
*/
arm64: alternatives: make clean_dcache_range_nopatch() noinstr-safe
When patching kernel alternatives, we need to be careful not to execute
kernel code which is itself subject to patching. In general, if code is
executed after the instructions in memory have been patched but prior to
the cache maintenance and barriers completing, it could lead to
UNPREDICTABLE results.
As our regular cache maintenance routines are patched with alternatives,
we have a clean_dcache_range_nopatch() function which is *intended* to
avoid patchable code and therefore supposed to be safe in the middle of
patching alternatives. Unfortunately, it's not marked as 'noinstr', and
so can be instrumented with patchable code.
Additionally, it calls read_sanitised_ftr_reg() (which may be
instrumented with patchable code) to find the sanitized value of
CTR_EL0.DminLine, and is therefore not safe to call during patching.
Luckily, since commit:
675b0563d6b26aa9 ("arm64: cpufeature: expose arm64_ftr_reg struct for CTR_EL0")
... we can read the sanitised CTR_EL0 value directly, and avoid the call
to read_sanitised_ftr_reg().
This patch marks clean_dcache_range_nopatch() as noinstr, and has it
read the sanitized CTR_EL0 value directly, avoiding the issues above.
As a bonus, this is also an optimization. As read_sanitised_ftr_reg()
performs a binary search to find the CTR_EL0 value, reading the value
directly avoids this binary search per applied alternative, avoiding
some unnecessary work.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230616103150.1238132-1-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-06-16 11:31:50 +01:00
static noinstr void clean_dcache_range_nopatch ( u64 start , u64 end )
2018-06-22 09:31:15 +01:00
{
u64 cur , d_size , ctr_el0 ;
arm64: alternatives: make clean_dcache_range_nopatch() noinstr-safe
When patching kernel alternatives, we need to be careful not to execute
kernel code which is itself subject to patching. In general, if code is
executed after the instructions in memory have been patched but prior to
the cache maintenance and barriers completing, it could lead to
UNPREDICTABLE results.
As our regular cache maintenance routines are patched with alternatives,
we have a clean_dcache_range_nopatch() function which is *intended* to
avoid patchable code and therefore supposed to be safe in the middle of
patching alternatives. Unfortunately, it's not marked as 'noinstr', and
so can be instrumented with patchable code.
Additionally, it calls read_sanitised_ftr_reg() (which may be
instrumented with patchable code) to find the sanitized value of
CTR_EL0.DminLine, and is therefore not safe to call during patching.
Luckily, since commit:
675b0563d6b26aa9 ("arm64: cpufeature: expose arm64_ftr_reg struct for CTR_EL0")
... we can read the sanitised CTR_EL0 value directly, and avoid the call
to read_sanitised_ftr_reg().
This patch marks clean_dcache_range_nopatch() as noinstr, and has it
read the sanitized CTR_EL0 value directly, avoiding the issues above.
As a bonus, this is also an optimization. As read_sanitised_ftr_reg()
performs a binary search to find the CTR_EL0 value, reading the value
directly avoids this binary search per applied alternative, avoiding
some unnecessary work.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20230616103150.1238132-1-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2023-06-16 11:31:50 +01:00
ctr_el0 = arm64_ftr_reg_ctrel0 . sys_val ;
2018-06-22 09:31:15 +01:00
d_size = 4 < < cpuid_feature_extract_unsigned_field ( ctr_el0 ,
2022-07-04 18:02:40 +01:00
CTR_EL0_DminLine_SHIFT ) ;
2018-06-22 09:31:15 +01:00
cur = start & ~ ( d_size - 1 ) ;
do {
/*
* We must clean + invalidate to the PoC in order to avoid
* Cortex - A53 errata 826319 , 827319 , 824069 and 819472
* ( this corresponds to ARM64_WORKAROUND_CLEAN_CACHE )
*/
asm volatile ( " dc civac, %0 " : : " r " ( cur ) : " memory " ) ;
} while ( cur + = d_size , cur < end ) ;
}
arm64 updates for 6.1:
- arm64 perf: DDR PMU driver for Alibaba's T-Head Yitian 710 SoC, SVE
vector granule register added to the user regs together with SVE perf
extensions documentation.
- SVE updates: add HWCAP for SVE EBF16, update the SVE ABI documentation
to match the actual kernel behaviour (zeroing the registers on syscall
rather than "zeroed or preserved" previously).
- More conversions to automatic system registers generation.
- vDSO: use self-synchronising virtual counter access in gettimeofday()
if the architecture supports it.
- arm64 stacktrace cleanups and improvements.
- arm64 atomics improvements: always inline assembly, remove LL/SC
trampolines.
- Improve the reporting of EL1 exceptions: rework BTI and FPAC exception
handling, better EL1 undefs reporting.
- Cortex-A510 erratum 2658417: remove BF16 support due to incorrect
result.
- arm64 defconfig updates: build CoreSight as a module, enable options
necessary for docker, memory hotplug/hotremove, enable all PMUs
provided by Arm.
- arm64 ptrace() support for TPIDR2_EL0 (register provided with the SME
extensions).
- arm64 ftraces updates/fixes: fix module PLTs with mcount, remove
unused function.
- kselftest updates for arm64: simple HWCAP validation, FP stress test
improvements, validation of ZA regs in signal handlers, include larger
SVE and SME vector lengths in signal tests, various cleanups.
- arm64 alternatives (code patching) improvements to robustness and
consistency: replace cpucap static branches with equivalent
alternatives, associate callback alternatives with a cpucap.
- Miscellaneous updates: optimise kprobe performance of patching
single-step slots, simplify uaccess_mask_ptr(), move MTE registers
initialisation to C, support huge vmalloc() mappings, run softirqs on
the per-CPU IRQ stack, compat (arm32) misalignment fixups for
multiword accesses.
-----BEGIN PGP SIGNATURE-----
iQIzBAABCgAdFiEE5RElWfyWxS+3PLO2a9axLQDIXvEFAmM9W4cACgkQa9axLQDI
XvEy3w/+LJ3KCFowWiz5gTAWikjv+UVssHjLMJixn47V7hsEFQ26Xnam/438rTMI
kE95u6DHUpw2SMIxKzFRO7oI5cQtP+cWGwTtOUnjVO+U1oN+HqDOIbO9DbylWDcU
eeeqMMmawMfTPuZrYklpOhXscsorbrKIvYBg7wHYOcwBYV3EPhWr89lwMvTVRuyJ
qpX628KlkGMaBcONNhv3nS3qZcAOs0oHQCAVS4C8czLDL+vtJlumXUS3xr1Mqm72
xtFe7sje8Djr2kZ8mzh0GbFiZEBoBD3F/l7ayq8gVRaVpToUt8sk36Stjs4LojF1
6imuAfji/5TItkScq5KhGqj6MIugwp/eUVbRN74OLNTYx7msF1ZADNFQ+Q0UuY0H
SYK13KvmOji0xjS8qAfhqrwNB79sk3fb+zF9LjETbdz4ZJCgg9gcFbSUTY0DvMfS
MXZk/jVeB07olA8xYbjh0BRt4UV9xU628FPQzK5k7e4Nzl4jSvgtJZCZanfuVtjy
/ZS1vbN8o7tQLBAlVnw+Exi/VedkKxkkMgm8tPKsMgERTFDx0Pc4Gs72hRpDnPWT
MRbeCCGleAf3JQ5vF0coBDNOCEVvweQgShHOyHTz0GyhWXLCFx3RJICo5I4EIpps
LLUk4JK0fO3LVrf1AEpu5ZP4+Sact0zfsH3gB7qyLPYFDmjDXD8=
=jl3Z
-----END PGP SIGNATURE-----
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
- arm64 perf: DDR PMU driver for Alibaba's T-Head Yitian 710 SoC, SVE
vector granule register added to the user regs together with SVE perf
extensions documentation.
- SVE updates: add HWCAP for SVE EBF16, update the SVE ABI
documentation to match the actual kernel behaviour (zeroing the
registers on syscall rather than "zeroed or preserved" previously).
- More conversions to automatic system registers generation.
- vDSO: use self-synchronising virtual counter access in gettimeofday()
if the architecture supports it.
- arm64 stacktrace cleanups and improvements.
- arm64 atomics improvements: always inline assembly, remove LL/SC
trampolines.
- Improve the reporting of EL1 exceptions: rework BTI and FPAC
exception handling, better EL1 undefs reporting.
- Cortex-A510 erratum 2658417: remove BF16 support due to incorrect
result.
- arm64 defconfig updates: build CoreSight as a module, enable options
necessary for docker, memory hotplug/hotremove, enable all PMUs
provided by Arm.
- arm64 ptrace() support for TPIDR2_EL0 (register provided with the SME
extensions).
- arm64 ftraces updates/fixes: fix module PLTs with mcount, remove
unused function.
- kselftest updates for arm64: simple HWCAP validation, FP stress test
improvements, validation of ZA regs in signal handlers, include
larger SVE and SME vector lengths in signal tests, various cleanups.
- arm64 alternatives (code patching) improvements to robustness and
consistency: replace cpucap static branches with equivalent
alternatives, associate callback alternatives with a cpucap.
- Miscellaneous updates: optimise kprobe performance of patching
single-step slots, simplify uaccess_mask_ptr(), move MTE registers
initialisation to C, support huge vmalloc() mappings, run softirqs on
the per-CPU IRQ stack, compat (arm32) misalignment fixups for
multiword accesses.
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (126 commits)
arm64: alternatives: Use vdso/bits.h instead of linux/bits.h
arm64/kprobe: Optimize the performance of patching single-step slot
arm64: defconfig: Add Coresight as module
kselftest/arm64: Handle EINTR while reading data from children
kselftest/arm64: Flag fp-stress as exiting when we begin finishing up
kselftest/arm64: Don't repeat termination handler for fp-stress
ARM64: reloc_test: add __init/__exit annotations to module init/exit funcs
arm64/mm: fold check for KFENCE into can_set_direct_map()
arm64: ftrace: fix module PLTs with mcount
arm64: module: Remove unused plt_entry_is_initialized()
arm64: module: Make plt_equals_entry() static
arm64: fix the build with binutils 2.27
kselftest/arm64: Don't enable v8.5 for MTE selftest builds
arm64: uaccess: simplify uaccess_mask_ptr()
arm64: asm/perf_regs.h: Avoid C++-style comment in UAPI header
kselftest/arm64: Fix typo in hwcap check
arm64: mte: move register initialization to C
arm64: mm: handle ARM64_KERNEL_USES_PMD_MAPS in vmemmap_populate()
arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()
arm64/sve: Add Perf extensions documentation
...
2022-10-06 11:51:49 -07:00
static void __apply_alternatives ( const struct alt_region * region ,
bool is_module ,
2023-06-07 17:48:44 +01:00
unsigned long * cpucap_mask )
2014-11-14 15:54:08 +00:00
{
struct alt_instr * alt ;
2017-12-03 12:02:14 +00:00
__le32 * origptr , * updptr ;
alternative_cb_t alt_cb ;
2014-11-14 15:54:08 +00:00
2014-11-28 13:40:45 +00:00
for ( alt = region - > begin ; alt < region - > end ; alt + + ) {
2017-12-03 12:02:14 +00:00
int nr_inst ;
2022-09-12 17:22:08 +01:00
int cap = ALT_CAP ( alt ) ;
2015-06-01 10:47:40 +01:00
2023-06-07 17:48:44 +01:00
if ( ! test_bit ( cap , cpucap_mask ) )
2019-01-31 14:58:53 +00:00
continue ;
2022-09-12 17:22:08 +01:00
if ( ! cpus_have_cap ( cap ) )
2014-11-14 15:54:08 +00:00
continue ;
2022-09-12 17:22:08 +01:00
if ( ALT_HAS_CB ( alt ) )
2017-12-03 12:02:14 +00:00
BUG_ON ( alt - > alt_len ! = 0 ) ;
else
BUG_ON ( alt - > alt_len ! = alt - > orig_len ) ;
2014-11-14 15:54:08 +00:00
2015-06-01 10:47:40 +01:00
origptr = ALT_ORIG_PTR ( alt ) ;
2018-06-22 09:31:15 +01:00
updptr = is_module ? origptr : lm_alias ( origptr ) ;
2017-12-03 12:02:14 +00:00
nr_inst = alt - > orig_len / AARCH64_INSN_SIZE ;
2015-06-01 10:47:40 +01:00
2022-09-12 17:22:08 +01:00
if ( ALT_HAS_CB ( alt ) )
2017-12-03 12:02:14 +00:00
alt_cb = ALT_REPL_PTR ( alt ) ;
2022-09-12 17:22:08 +01:00
else
alt_cb = patch_alternative ;
2017-12-03 12:02:14 +00:00
alt_cb ( alt , origptr , updptr , nr_inst ) ;
2015-06-01 10:47:40 +01:00
2018-06-22 09:31:15 +01:00
if ( ! is_module ) {
clean_dcache_range_nopatch ( ( u64 ) origptr ,
( u64 ) ( origptr + nr_inst ) ) ;
}
}
/*
* The core module code takes care of cache maintenance in
* flush_module_icache ( ) .
*/
if ( ! is_module ) {
dsb ( ish ) ;
arm64: Rename arm64-internal cache maintenance functions
Although naming across the codebase isn't that consistent, it
tends to follow certain patterns. Moreover, the term "flush"
isn't defined in the Arm Architecture reference manual, and might
be interpreted to mean clean, invalidate, or both for a cache.
Rename arm64-internal functions to make the naming internally
consistent, as well as making it consistent with the Arm ARM, by
specifying whether it applies to the instruction, data, or both
caches, whether the operation is a clean, invalidate, or both.
Also specify which point the operation applies to, i.e., to the
point of unification (PoU), coherency (PoC), or persistence
(PoP).
This commit applies the following sed transformation to all files
under arch/arm64:
"s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\
"s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\
"s/\binvalidate_icache_range\b/icache_inval_pou/g;"\
"s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\
"s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\
"s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\
"s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\
"s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\
"s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\
"s/\b__flush_icache_all\b/icache_inval_all_pou/g;"
Note that __clean_dcache_area_poc is deliberately missing a word
boundary check at the beginning in order to match the efistub
symbols in image-vars.h.
Also note that, despite its name, __flush_icache_range operates
on both instruction and data caches. The name change here
reflects that.
No functional change intended.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-24 09:30:01 +01:00
icache_inval_all_pou ( ) ;
2018-06-22 09:31:15 +01:00
isb ( ) ;
2019-01-31 14:58:52 +00:00
2019-01-31 14:58:53 +00:00
bitmap_or ( applied_alternatives , applied_alternatives ,
2023-06-07 17:48:44 +01:00
cpucap_mask , ARM64_NCAPS ) ;
2019-01-31 14:58:53 +00:00
bitmap_and ( applied_alternatives , applied_alternatives ,
2023-06-07 17:48:43 +01:00
system_cpucaps , ARM64_NCAPS ) ;
2014-11-14 15:54:08 +00:00
}
}
2022-12-03 00:18:59 +08:00
static void __init apply_alternatives_vdso ( void )
2022-08-30 11:48:32 +01:00
{
struct alt_region region ;
const struct elf64_hdr * hdr ;
const struct elf64_shdr * shdr ;
const struct elf64_shdr * alt ;
2022-09-30 09:18:22 +01:00
DECLARE_BITMAP ( all_capabilities , ARM64_NCAPS ) ;
2022-08-30 11:48:32 +01:00
2022-09-30 09:18:22 +01:00
bitmap_fill ( all_capabilities , ARM64_NCAPS ) ;
2022-08-30 11:48:32 +01:00
hdr = ( struct elf64_hdr * ) vdso_start ;
shdr = ( void * ) hdr + hdr - > e_shoff ;
alt = find_section ( hdr , shdr , " .altinstructions " ) ;
if ( ! alt )
return ;
region = ( struct alt_region ) {
. begin = ( void * ) hdr + alt - > sh_offset ,
. end = ( void * ) hdr + alt - > sh_offset + alt - > sh_size ,
} ;
__apply_alternatives ( & region , false , & all_capabilities [ 0 ] ) ;
}
2022-12-03 00:18:59 +08:00
static const struct alt_region kernel_alternatives __initconst = {
2022-09-12 17:22:07 +01:00
. begin = ( struct alt_instr * ) __alt_instructions ,
. end = ( struct alt_instr * ) __alt_instructions_end ,
} ;
2015-07-28 19:07:28 +01:00
/*
* We might be patching the stop_machine state machine , so implement a
* really simple polling protocol here .
*/
2022-12-03 00:18:59 +08:00
static int __init __apply_alternatives_multi_stop ( void * unused )
2014-11-14 15:54:08 +00:00
{
2015-07-28 19:07:28 +01:00
/* We always have a CPU 0 at this point (__init) */
if ( smp_processor_id ( ) ) {
2020-06-30 14:06:04 +01:00
while ( ! all_alternatives_applied )
2015-07-28 19:07:28 +01:00
cpu_relax ( ) ;
2015-08-04 18:52:09 +01:00
isb ( ) ;
2015-07-28 19:07:28 +01:00
} else {
2022-09-12 17:22:08 +01:00
DECLARE_BITMAP ( remaining_capabilities , ARM64_NCAPS ) ;
2019-01-31 14:58:53 +00:00
2023-06-07 17:48:43 +01:00
bitmap_complement ( remaining_capabilities , boot_cpucaps ,
2022-09-12 17:22:08 +01:00
ARM64_NCAPS ) ;
2019-01-31 14:58:53 +00:00
2019-01-31 14:58:52 +00:00
BUG_ON ( all_alternatives_applied ) ;
2022-09-12 17:22:07 +01:00
__apply_alternatives ( & kernel_alternatives , false ,
remaining_capabilities ) ;
2015-07-28 19:07:28 +01:00
/* Barriers provided by the cache flushing */
2020-06-30 14:06:04 +01:00
all_alternatives_applied = 1 ;
2015-07-28 19:07:28 +01:00
}
return 0 ;
}
void __init apply_alternatives_all ( void )
{
2022-09-12 17:22:06 +01:00
pr_info ( " applying system-wide alternatives \n " ) ;
2022-08-30 11:48:32 +01:00
apply_alternatives_vdso ( ) ;
2014-11-14 15:54:08 +00:00
/* better not try code patching on a live SMP system */
2015-07-28 19:07:28 +01:00
stop_machine ( __apply_alternatives_multi_stop , NULL , cpu_online_mask ) ;
2014-11-28 13:40:45 +00:00
}
2019-01-31 14:58:53 +00:00
/*
* This is called very early in the boot process ( directly after we run
* a feature detect on the boot CPU ) . No need to worry about other CPUs
* here .
*/
void __init apply_boot_alternatives ( void )
{
/* If called on non-boot cpu things could go wrong */
WARN_ON ( smp_processor_id ( ) ! = 0 ) ;
2022-09-12 17:22:06 +01:00
pr_info ( " applying boot alternatives \n " ) ;
2022-09-12 17:22:07 +01:00
__apply_alternatives ( & kernel_alternatives , false ,
2023-06-07 17:48:43 +01:00
& boot_cpucaps [ 0 ] ) ;
2019-01-31 14:58:53 +00:00
}
2018-06-22 09:31:15 +01:00
# ifdef CONFIG_MODULES
void apply_alternatives_module ( void * start , size_t length )
2014-11-28 13:40:45 +00:00
{
struct alt_region region = {
. begin = start ,
. end = start + length ,
} ;
2022-09-12 17:22:08 +01:00
DECLARE_BITMAP ( all_capabilities , ARM64_NCAPS ) ;
2019-01-31 14:58:53 +00:00
2022-09-12 17:22:08 +01:00
bitmap_fill ( all_capabilities , ARM64_NCAPS ) ;
2014-11-28 13:40:45 +00:00
2019-01-31 14:58:53 +00:00
__apply_alternatives ( & region , true , & all_capabilities [ 0 ] ) ;
2014-11-14 15:54:08 +00:00
}
2018-06-22 09:31:15 +01:00
# endif
arm64: alternatives: add shared NOP callback
For each instance of an alternative, the compiler outputs a distinct
copy of the alternative instructions into a subsection. As the compiler
doesn't have special knowledge of alternatives, it cannot coalesce these
to save space.
In a defconfig kernel built with GCC 12.1.0, there are approximately
10,000 instances of alternative_has_feature_likely(), where the
replacement instruction is always a NOP. As NOPs are
position-independent, we don't need a unique copy per alternative
sequence.
This patch adds a callback to patch an alternative sequence with NOPs,
and make use of this in alternative_has_feature_likely(). So that this
can be used for other sites in future, this is written to patch multiple
instructions up to the original sequence length.
For NVHE, an alias is added to image-vars.h.
For modules, the callback is exported. Note that as modules are loaded
within 2GiB of the kernel, an alt_instr entry in a module can always
refer directly to the callback, and no special handling is necessary.
When building with GCC 12.1.0, the vmlinux is ~158KiB smaller, though
the resulting Image size is unchanged due to alignment constraints and
padding:
| % ls -al vmlinux-*
| -rwxr-xr-x 1 mark mark 134644592 Sep 1 14:52 vmlinux-after
| -rwxr-xr-x 1 mark mark 134486232 Sep 1 14:50 vmlinux-before
| % ls -al Image-*
| -rw-r--r-- 1 mark mark 37108224 Sep 1 14:52 Image-after
| -rw-r--r-- 1 mark mark 37108224 Sep 1 14:50 Image-before
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220912162210.3626215-9-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-12 17:22:10 +01:00
noinstr void alt_cb_patch_nops ( struct alt_instr * alt , __le32 * origptr ,
__le32 * updptr , int nr_inst )
{
for ( int i = 0 ; i < nr_inst ; i + + )
updptr [ i ] = cpu_to_le32 ( aarch64_insn_gen_nop ( ) ) ;
}
EXPORT_SYMBOL ( alt_cb_patch_nops ) ;