linux/arch/x86/lib/memcpy_64.S
Vincent Whitchurch 10f4c9b9a3 x86/asm: Fix build of UML with KASAN
Building UML with KASAN fails since commit 69d4c0d32186 ("entry, kasan,
x86: Disallow overriding mem*() functions") with the following errors:

 $ tools/testing/kunit/kunit.py run --kconfig_add CONFIG_KASAN=y
 ...
 ld: mm/kasan/shadow.o: in function `memset':
 shadow.c:(.text+0x40): multiple definition of `memset';
 arch/x86/lib/memset_64.o:(.noinstr.text+0x0): first defined here
 ld: mm/kasan/shadow.o: in function `memmove':
 shadow.c:(.text+0x90): multiple definition of `memmove';
 arch/x86/lib/memmove_64.o:(.noinstr.text+0x0): first defined here
 ld: mm/kasan/shadow.o: in function `memcpy':
 shadow.c:(.text+0x110): multiple definition of `memcpy';
 arch/x86/lib/memcpy_64.o:(.noinstr.text+0x0): first defined here

UML does not use GENERIC_ENTRY and is still supposed to be allowed to
override the mem*() functions, so use weak aliases in that case.

Fixes: 69d4c0d32186 ("entry, kasan, x86: Disallow overriding mem*() functions")
Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lore.kernel.org/r/20230918-uml-kasan-v3-1-7ad6db477df6@axis.com
2023-09-18 19:30:08 +02:00

173 lines
3.3 KiB
ArmAsm

/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <asm/export.h>
.section .noinstr.text, "ax"
/*
* memcpy - Copy a memory block.
*
* Input:
* rdi destination
* rsi source
* rdx count
*
* Output:
* rax original destination
*
* The FSRM alternative should be done inline (avoiding the call and
* the disgusting return handling), but that would require some help
* from the compiler for better calling conventions.
*
* The 'rep movsb' itself is small enough to replace the call, but the
* two register moves blow up the code. And one of them is "needed"
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
SYM_TYPED_FUNC_START(__memcpy)
ALTERNATIVE "jmp memcpy_orig", "", X86_FEATURE_FSRM
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
RET
SYM_FUNC_END(__memcpy)
EXPORT_SYMBOL(__memcpy)
SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy)
EXPORT_SYMBOL(memcpy)
SYM_FUNC_START_LOCAL(memcpy_orig)
movq %rdi, %rax
cmpq $0x20, %rdx
jb .Lhandle_tail
/*
* We check whether memory false dependence could occur,
* then jump to corresponding copy mode.
*/
cmp %dil, %sil
jl .Lcopy_backward
subq $0x20, %rdx
.Lcopy_forward_loop:
subq $0x20, %rdx
/*
* Move in blocks of 4x8 bytes:
*/
movq 0*8(%rsi), %r8
movq 1*8(%rsi), %r9
movq 2*8(%rsi), %r10
movq 3*8(%rsi), %r11
leaq 4*8(%rsi), %rsi
movq %r8, 0*8(%rdi)
movq %r9, 1*8(%rdi)
movq %r10, 2*8(%rdi)
movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop
addl $0x20, %edx
jmp .Lhandle_tail
.Lcopy_backward:
/*
* Calculate copy position to tail.
*/
addq %rdx, %rsi
addq %rdx, %rdi
subq $0x20, %rdx
/*
* At most 3 ALU operations in one cycle,
* so append NOPS in the same 16 bytes trunk.
*/
.p2align 4
.Lcopy_backward_loop:
subq $0x20, %rdx
movq -1*8(%rsi), %r8
movq -2*8(%rsi), %r9
movq -3*8(%rsi), %r10
movq -4*8(%rsi), %r11
leaq -4*8(%rsi), %rsi
movq %r8, -1*8(%rdi)
movq %r9, -2*8(%rdi)
movq %r10, -3*8(%rdi)
movq %r11, -4*8(%rdi)
leaq -4*8(%rdi), %rdi
jae .Lcopy_backward_loop
/*
* Calculate copy position to head.
*/
addl $0x20, %edx
subq %rdx, %rsi
subq %rdx, %rdi
.Lhandle_tail:
cmpl $16, %edx
jb .Lless_16bytes
/*
* Move data from 16 bytes to 31 bytes.
*/
movq 0*8(%rsi), %r8
movq 1*8(%rsi), %r9
movq -2*8(%rsi, %rdx), %r10
movq -1*8(%rsi, %rdx), %r11
movq %r8, 0*8(%rdi)
movq %r9, 1*8(%rdi)
movq %r10, -2*8(%rdi, %rdx)
movq %r11, -1*8(%rdi, %rdx)
RET
.p2align 4
.Lless_16bytes:
cmpl $8, %edx
jb .Lless_8bytes
/*
* Move data from 8 bytes to 15 bytes.
*/
movq 0*8(%rsi), %r8
movq -1*8(%rsi, %rdx), %r9
movq %r8, 0*8(%rdi)
movq %r9, -1*8(%rdi, %rdx)
RET
.p2align 4
.Lless_8bytes:
cmpl $4, %edx
jb .Lless_3bytes
/*
* Move data from 4 bytes to 7 bytes.
*/
movl (%rsi), %ecx
movl -4(%rsi, %rdx), %r8d
movl %ecx, (%rdi)
movl %r8d, -4(%rdi, %rdx)
RET
.p2align 4
.Lless_3bytes:
subl $1, %edx
jb .Lend
/*
* Move data from 1 bytes to 3 bytes.
*/
movzbl (%rsi), %ecx
jz .Lstore_1byte
movzbq 1(%rsi), %r8
movzbq (%rsi, %rdx), %r9
movb %r8b, 1(%rdi)
movb %r9b, (%rdi, %rdx)
.Lstore_1byte:
movb %cl, (%rdi)
.Lend:
RET
SYM_FUNC_END(memcpy_orig)