344323e042
Now that we're always using STTR variants rather than abstracting two different addressing modes, the user_ldst macro here is frankly more obfuscating than helpful. Rewrite __arch_clear_user() with regular USER() annotations so that it's clearer what's going on, and take the opportunity to minimise the branchiness in the most common paths, while also allowing the exception fixup to return an accurate result. Apparently some folks examine large reads from /dev/zero closely enough to notice the loop being hot, so align it per the other critical loops (presumably around a typical instruction fetch granularity). Reviewed-by: Mark Rutland <mark.rutland@arm.com> Signed-off-by: Robin Murphy <robin.murphy@arm.com> Link: https://lore.kernel.org/r/1cbd78b12c076a8ad4656a345811cfb9425df0b3.1622128527.git.robin.murphy@arm.com Signed-off-by: Will Deacon <will@kernel.org>
58 lines
1.2 KiB
ArmAsm
58 lines
1.2 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (C) 2021 Arm Ltd.
|
|
*/
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
.text
|
|
|
|
/* Prototype: int __arch_clear_user(void *addr, size_t sz)
|
|
* Purpose : clear some user memory
|
|
* Params : addr - user memory address to clear
|
|
* : sz - number of bytes to clear
|
|
* Returns : number of bytes NOT cleared
|
|
*
|
|
* Alignment fixed up by hardware.
|
|
*/
|
|
|
|
.p2align 4
|
|
// Alignment is for the loop, but since the prologue (including BTI)
|
|
// is also 16 bytes we can keep any padding outside the function
|
|
SYM_FUNC_START(__arch_clear_user)
|
|
add x2, x0, x1
|
|
subs x1, x1, #8
|
|
b.mi 2f
|
|
1:
|
|
USER(9f, sttr xzr, [x0])
|
|
add x0, x0, #8
|
|
subs x1, x1, #8
|
|
b.hi 1b
|
|
USER(9f, sttr xzr, [x2, #-8])
|
|
mov x0, #0
|
|
ret
|
|
|
|
2: tbz x1, #2, 3f
|
|
USER(9f, sttr wzr, [x0])
|
|
USER(8f, sttr wzr, [x2, #-4])
|
|
mov x0, #0
|
|
ret
|
|
|
|
3: tbz x1, #1, 4f
|
|
USER(9f, sttrh wzr, [x0])
|
|
4: tbz x1, #0, 5f
|
|
USER(7f, sttrb wzr, [x2, #-1])
|
|
5: mov x0, #0
|
|
ret
|
|
SYM_FUNC_END(__arch_clear_user)
|
|
EXPORT_SYMBOL(__arch_clear_user)
|
|
|
|
.section .fixup,"ax"
|
|
.align 2
|
|
7: sub x0, x2, #5 // Adjust for faulting on the final byte...
|
|
8: add x0, x0, #4 // ...or the second word of the 4-7 byte case
|
|
9: sub x0, x2, x0
|
|
ret
|
|
.previous
|