28ea374da1
When using alternatives for cpufeatures we should include hwcap.h directly, rather than through errata_list.h. Opportunistically drop an unused include too. Signed-off-by: Andrew Jones <ajones@ventanamicro.com> Reviewed-by: Conor Dooley <conor.dooley@microchip.com> Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu> Link: https://lore.kernel.org/r/20230224154601.88163-6-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
139 lines
2.2 KiB
ArmAsm
139 lines
2.2 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
|
|
#include <linux/linkage.h>
|
|
#include <asm/asm.h>
|
|
#include <asm/alternative-macros.h>
|
|
#include <asm/hwcap.h>
|
|
|
|
/* int strncmp(const char *cs, const char *ct, size_t count) */
|
|
SYM_FUNC_START(strncmp)
|
|
|
|
ALTERNATIVE("nop", "j strncmp_zbb", 0, RISCV_ISA_EXT_ZBB, CONFIG_RISCV_ISA_ZBB)
|
|
|
|
/*
|
|
* Returns
|
|
* a0 - comparison result, value like strncmp
|
|
*
|
|
* Parameters
|
|
* a0 - string1
|
|
* a1 - string2
|
|
* a2 - number of characters to compare
|
|
*
|
|
* Clobbers
|
|
* t0, t1, t2
|
|
*/
|
|
li t2, 0
|
|
1:
|
|
beq a2, t2, 2f
|
|
lbu t0, 0(a0)
|
|
lbu t1, 0(a1)
|
|
addi a0, a0, 1
|
|
addi a1, a1, 1
|
|
bne t0, t1, 3f
|
|
addi t2, t2, 1
|
|
bnez t0, 1b
|
|
2:
|
|
li a0, 0
|
|
ret
|
|
3:
|
|
/*
|
|
* strncmp only needs to return (< 0, 0, > 0) values
|
|
* not necessarily -1, 0, +1
|
|
*/
|
|
sub a0, t0, t1
|
|
ret
|
|
|
|
/*
|
|
* Variant of strncmp using the ZBB extension if available
|
|
*/
|
|
#ifdef CONFIG_RISCV_ISA_ZBB
|
|
strncmp_zbb:
|
|
|
|
.option push
|
|
.option arch,+zbb
|
|
|
|
/*
|
|
* Returns
|
|
* a0 - comparison result, like strncmp
|
|
*
|
|
* Parameters
|
|
* a0 - string1
|
|
* a1 - string2
|
|
* a2 - number of characters to compare
|
|
*
|
|
* Clobbers
|
|
* t0, t1, t2, t3, t4, t5, t6
|
|
*/
|
|
|
|
or t2, a0, a1
|
|
li t5, -1
|
|
and t2, t2, SZREG-1
|
|
add t4, a0, a2
|
|
bnez t2, 3f
|
|
|
|
/* Adjust limit for fast-path. */
|
|
andi t6, t4, -SZREG
|
|
|
|
/* Main loop for aligned string. */
|
|
.p2align 3
|
|
1:
|
|
bge a0, t6, 3f
|
|
REG_L t0, 0(a0)
|
|
REG_L t1, 0(a1)
|
|
orc.b t3, t0
|
|
bne t3, t5, 2f
|
|
orc.b t3, t1
|
|
bne t3, t5, 2f
|
|
addi a0, a0, SZREG
|
|
addi a1, a1, SZREG
|
|
beq t0, t1, 1b
|
|
|
|
/*
|
|
* Words don't match, and no null byte in the first
|
|
* word. Get bytes in big-endian order and compare.
|
|
*/
|
|
#ifndef CONFIG_CPU_BIG_ENDIAN
|
|
rev8 t0, t0
|
|
rev8 t1, t1
|
|
#endif
|
|
|
|
/* Synthesize (t0 >= t1) ? 1 : -1 in a branchless sequence. */
|
|
sltu a0, t0, t1
|
|
neg a0, a0
|
|
ori a0, a0, 1
|
|
ret
|
|
|
|
2:
|
|
/*
|
|
* Found a null byte.
|
|
* If words don't match, fall back to simple loop.
|
|
*/
|
|
bne t0, t1, 3f
|
|
|
|
/* Otherwise, strings are equal. */
|
|
li a0, 0
|
|
ret
|
|
|
|
/* Simple loop for misaligned strings. */
|
|
.p2align 3
|
|
3:
|
|
bge a0, t4, 5f
|
|
lbu t0, 0(a0)
|
|
lbu t1, 0(a1)
|
|
addi a0, a0, 1
|
|
addi a1, a1, 1
|
|
bne t0, t1, 4f
|
|
bnez t0, 3b
|
|
|
|
4:
|
|
sub a0, t0, t1
|
|
ret
|
|
|
|
5:
|
|
li a0, 0
|
|
ret
|
|
|
|
.option pop
|
|
#endif
|
|
SYM_FUNC_END(strncmp)
|