arch/csky patches for 5.19-rc1
This pull request we've done: - Three atomic optimization - Memcpy/memcpy_io optimization - Some coding conventions for Kbuild, removing the warning -----BEGIN PGP SIGNATURE----- iQJGBAABCAAwFiEE2KAv+isbWR/viAKHAXH1GYaIxXsFAmKLrCQSHGd1b3JlbkBr ZXJuZWwub3JnAAoJEAFx9RmGiMV7UekP/RZZjUfCjqwf14MEgnw44+ZrdeVuGdIf XSHoeR60wDbKMd2g0wsjNSYw59ybkxz4hkvv+p0yCkniuDA/D7EWesuBUNOzogMs Vf/SHrr5t9dped0sUuvf/4RXwz2jFXFvnPTaYCeaRVhne8vK6GsGJWxZwSDuBBFT ZvcaOJqJ+w6B/HzbxtQOahqcLJ0HCPd/Wk5WrfhhuGxaGhCet0ORwlYvho968peO DPxHmI5j4Y4MO4nRFr/B9Lb9DqOj1s4JMt7fXoMs9MQwgB5QOBrz6jJTnlZ5EiGh z5Mnmil9KxJk/vCs1/F5CFTSWvkB5TGpSJiNeRCcv74g5FeYBMD95HoU0wxlz4w4 ow6nFePMCoNI511+gkfSKnbookpZC5bKfMSRZouM1zZpBR9qpHx/q/s/L1C6j0U2 /g146VjeEAuNisWMxc4XTBu6iTiKeI4JLHpAUryDcIm2u91+Rl6Zgqs42FRF1KWi DkhDqaJWs3lHU7psUht4RfXGcKg5t8NXDFCYglVAzly05QHILVytzfbS18fNf10q Coaj4xVrODWXJs3ByWJ/qSrDs/GntsbKamDeR1hepLr6LU+EaPUsnuL4wYGNneSq pwjlcopbl/xAL9W1127j1nRwLKG8f/3dXpnU/bGG9le1vqwsy9nDzfvH/KZYQE2+ Lwpf493XJVO2 =P11L -----END PGP SIGNATURE----- Merge tag 'csky-for-linus-5.19-rc1' of https://github.com/c-sky/csky-linux Pull arch/csky updates from Guo Ren: - Three atomic optimizations - memcpy/memcpy_io optimization - Some coding conventions for Kbuild, removing warnings * tag 'csky-for-linus-5.19-rc1' of https://github.com/c-sky/csky-linux: csky: Move $(core-y) into arch/csky/Kbuild csky: Remove unused core-y for dts csky: Remove unused $(dtb-y) from boot/Makefile csky: atomic: Add conditional atomic operations' optimization csky: atomic: Add custom atomic.h implementation csky: atomic: Optimize cmpxchg with acquire & release csky: optimize memcpy_{from,to}io() and memset_io() csky: Add C based string functions csky: Fix versioncheck warnings csky: patch_text: Fixup last cpu should be master csky: fix typos in comments
This commit is contained in:
commit
67c642e0d9
@ -1,4 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
obj-y += kernel/ mm/
|
||||
|
||||
# for cleaning
|
||||
subdir- += boot
|
||||
|
@ -320,6 +320,14 @@ config HOTPLUG_CPU
|
||||
controlled through /sys/devices/system/cpu/cpu1/hotplug/target.
|
||||
|
||||
Say N if you want to disable CPU hotplug.
|
||||
|
||||
config HAVE_EFFICIENT_UNALIGNED_STRING_OPS
|
||||
bool "Enable EFFICIENT_UNALIGNED_STRING_OPS for abiv2"
|
||||
depends on CPU_CK807 || CPU_CK810 || CPU_CK860
|
||||
help
|
||||
Say Y here to enable EFFICIENT_UNALIGNED_STRING_OPS. Some CPU models could
|
||||
deal with unaligned access by hardware.
|
||||
|
||||
endmenu
|
||||
|
||||
source "arch/csky/Kconfig.platforms"
|
||||
|
@ -61,15 +61,12 @@ KBUILD_AFLAGS += $(KBUILD_CFLAGS)
|
||||
|
||||
head-y := arch/csky/kernel/head.o
|
||||
|
||||
core-y += arch/csky/kernel/
|
||||
core-y += arch/csky/mm/
|
||||
core-y += arch/csky/$(CSKYABI)/
|
||||
|
||||
libs-y += arch/csky/lib/ \
|
||||
$(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name)
|
||||
|
||||
boot := arch/csky/boot
|
||||
core-y += $(boot)/dts/
|
||||
|
||||
all: zImage
|
||||
|
||||
|
@ -4,5 +4,3 @@ obj-y += bswapdi.o
|
||||
obj-y += bswapsi.o
|
||||
obj-y += cacheflush.o
|
||||
obj-y += mmap.o
|
||||
obj-y += memcpy.o
|
||||
obj-y += strksyms.o
|
||||
|
@ -1,347 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.macro GET_FRONT_BITS rx y
|
||||
#ifdef __cskyLE__
|
||||
lsri \rx, \y
|
||||
#else
|
||||
lsli \rx, \y
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro GET_AFTER_BITS rx y
|
||||
#ifdef __cskyLE__
|
||||
lsli \rx, \y
|
||||
#else
|
||||
lsri \rx, \y
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* void *memcpy(void *dest, const void *src, size_t n); */
|
||||
ENTRY(memcpy)
|
||||
mov r7, r2
|
||||
cmplti r4, 4
|
||||
bt .L_copy_by_byte
|
||||
mov r6, r2
|
||||
andi r6, 3
|
||||
cmpnei r6, 0
|
||||
jbt .L_dest_not_aligned
|
||||
mov r6, r3
|
||||
andi r6, 3
|
||||
cmpnei r6, 0
|
||||
jbt .L_dest_aligned_but_src_not_aligned
|
||||
.L0:
|
||||
cmplti r4, 16
|
||||
jbt .L_aligned_and_len_less_16bytes
|
||||
subi sp, 8
|
||||
stw r8, (sp, 0)
|
||||
.L_aligned_and_len_larger_16bytes:
|
||||
ldw r1, (r3, 0)
|
||||
ldw r5, (r3, 4)
|
||||
ldw r8, (r3, 8)
|
||||
stw r1, (r7, 0)
|
||||
ldw r1, (r3, 12)
|
||||
stw r5, (r7, 4)
|
||||
stw r8, (r7, 8)
|
||||
stw r1, (r7, 12)
|
||||
subi r4, 16
|
||||
addi r3, 16
|
||||
addi r7, 16
|
||||
cmplti r4, 16
|
||||
jbf .L_aligned_and_len_larger_16bytes
|
||||
ldw r8, (sp, 0)
|
||||
addi sp, 8
|
||||
cmpnei r4, 0
|
||||
jbf .L_return
|
||||
|
||||
.L_aligned_and_len_less_16bytes:
|
||||
cmplti r4, 4
|
||||
bt .L_copy_by_byte
|
||||
.L1:
|
||||
ldw r1, (r3, 0)
|
||||
stw r1, (r7, 0)
|
||||
subi r4, 4
|
||||
addi r3, 4
|
||||
addi r7, 4
|
||||
cmplti r4, 4
|
||||
jbf .L1
|
||||
br .L_copy_by_byte
|
||||
|
||||
.L_return:
|
||||
rts
|
||||
|
||||
.L_copy_by_byte: /* len less than 4 bytes */
|
||||
cmpnei r4, 0
|
||||
jbf .L_return
|
||||
.L4:
|
||||
ldb r1, (r3, 0)
|
||||
stb r1, (r7, 0)
|
||||
addi r3, 1
|
||||
addi r7, 1
|
||||
decne r4
|
||||
jbt .L4
|
||||
rts
|
||||
|
||||
/*
|
||||
* If dest is not aligned, just copying some bytes makes the dest align.
|
||||
* Afther that, we judge whether the src is aligned.
|
||||
*/
|
||||
.L_dest_not_aligned:
|
||||
mov r5, r3
|
||||
rsub r5, r5, r7
|
||||
abs r5, r5
|
||||
cmplt r5, r4
|
||||
bt .L_copy_by_byte
|
||||
mov r5, r7
|
||||
sub r5, r3
|
||||
cmphs r5, r4
|
||||
bf .L_copy_by_byte
|
||||
mov r5, r6
|
||||
.L5:
|
||||
ldb r1, (r3, 0) /* makes the dest align. */
|
||||
stb r1, (r7, 0)
|
||||
addi r5, 1
|
||||
subi r4, 1
|
||||
addi r3, 1
|
||||
addi r7, 1
|
||||
cmpnei r5, 4
|
||||
jbt .L5
|
||||
cmplti r4, 4
|
||||
jbt .L_copy_by_byte
|
||||
mov r6, r3 /* judge whether the src is aligned. */
|
||||
andi r6, 3
|
||||
cmpnei r6, 0
|
||||
jbf .L0
|
||||
|
||||
/* Judge the number of misaligned, 1, 2, 3? */
|
||||
.L_dest_aligned_but_src_not_aligned:
|
||||
mov r5, r3
|
||||
rsub r5, r5, r7
|
||||
abs r5, r5
|
||||
cmplt r5, r4
|
||||
bt .L_copy_by_byte
|
||||
bclri r3, 0
|
||||
bclri r3, 1
|
||||
ldw r1, (r3, 0)
|
||||
addi r3, 4
|
||||
cmpnei r6, 2
|
||||
bf .L_dest_aligned_but_src_not_aligned_2bytes
|
||||
cmpnei r6, 3
|
||||
bf .L_dest_aligned_but_src_not_aligned_3bytes
|
||||
|
||||
.L_dest_aligned_but_src_not_aligned_1byte:
|
||||
mov r5, r7
|
||||
sub r5, r3
|
||||
cmphs r5, r4
|
||||
bf .L_copy_by_byte
|
||||
cmplti r4, 16
|
||||
bf .L11
|
||||
.L10: /* If the len is less than 16 bytes */
|
||||
GET_FRONT_BITS r1 8
|
||||
mov r5, r1
|
||||
ldw r6, (r3, 0)
|
||||
mov r1, r6
|
||||
GET_AFTER_BITS r6 24
|
||||
or r5, r6
|
||||
stw r5, (r7, 0)
|
||||
subi r4, 4
|
||||
addi r3, 4
|
||||
addi r7, 4
|
||||
cmplti r4, 4
|
||||
bf .L10
|
||||
subi r3, 3
|
||||
br .L_copy_by_byte
|
||||
.L11:
|
||||
subi sp, 16
|
||||
stw r8, (sp, 0)
|
||||
stw r9, (sp, 4)
|
||||
stw r10, (sp, 8)
|
||||
stw r11, (sp, 12)
|
||||
.L12:
|
||||
ldw r5, (r3, 0)
|
||||
ldw r11, (r3, 4)
|
||||
ldw r8, (r3, 8)
|
||||
ldw r9, (r3, 12)
|
||||
|
||||
GET_FRONT_BITS r1 8 /* little or big endian? */
|
||||
mov r10, r5
|
||||
GET_AFTER_BITS r5 24
|
||||
or r5, r1
|
||||
|
||||
GET_FRONT_BITS r10 8
|
||||
mov r1, r11
|
||||
GET_AFTER_BITS r11 24
|
||||
or r11, r10
|
||||
|
||||
GET_FRONT_BITS r1 8
|
||||
mov r10, r8
|
||||
GET_AFTER_BITS r8 24
|
||||
or r8, r1
|
||||
|
||||
GET_FRONT_BITS r10 8
|
||||
mov r1, r9
|
||||
GET_AFTER_BITS r9 24
|
||||
or r9, r10
|
||||
|
||||
stw r5, (r7, 0)
|
||||
stw r11, (r7, 4)
|
||||
stw r8, (r7, 8)
|
||||
stw r9, (r7, 12)
|
||||
subi r4, 16
|
||||
addi r3, 16
|
||||
addi r7, 16
|
||||
cmplti r4, 16
|
||||
jbf .L12
|
||||
ldw r8, (sp, 0)
|
||||
ldw r9, (sp, 4)
|
||||
ldw r10, (sp, 8)
|
||||
ldw r11, (sp, 12)
|
||||
addi sp , 16
|
||||
cmplti r4, 4
|
||||
bf .L10
|
||||
subi r3, 3
|
||||
br .L_copy_by_byte
|
||||
|
||||
.L_dest_aligned_but_src_not_aligned_2bytes:
|
||||
cmplti r4, 16
|
||||
bf .L21
|
||||
.L20:
|
||||
GET_FRONT_BITS r1 16
|
||||
mov r5, r1
|
||||
ldw r6, (r3, 0)
|
||||
mov r1, r6
|
||||
GET_AFTER_BITS r6 16
|
||||
or r5, r6
|
||||
stw r5, (r7, 0)
|
||||
subi r4, 4
|
||||
addi r3, 4
|
||||
addi r7, 4
|
||||
cmplti r4, 4
|
||||
bf .L20
|
||||
subi r3, 2
|
||||
br .L_copy_by_byte
|
||||
rts
|
||||
|
||||
.L21: /* n > 16 */
|
||||
subi sp, 16
|
||||
stw r8, (sp, 0)
|
||||
stw r9, (sp, 4)
|
||||
stw r10, (sp, 8)
|
||||
stw r11, (sp, 12)
|
||||
|
||||
.L22:
|
||||
ldw r5, (r3, 0)
|
||||
ldw r11, (r3, 4)
|
||||
ldw r8, (r3, 8)
|
||||
ldw r9, (r3, 12)
|
||||
|
||||
GET_FRONT_BITS r1 16
|
||||
mov r10, r5
|
||||
GET_AFTER_BITS r5 16
|
||||
or r5, r1
|
||||
|
||||
GET_FRONT_BITS r10 16
|
||||
mov r1, r11
|
||||
GET_AFTER_BITS r11 16
|
||||
or r11, r10
|
||||
|
||||
GET_FRONT_BITS r1 16
|
||||
mov r10, r8
|
||||
GET_AFTER_BITS r8 16
|
||||
or r8, r1
|
||||
|
||||
GET_FRONT_BITS r10 16
|
||||
mov r1, r9
|
||||
GET_AFTER_BITS r9 16
|
||||
or r9, r10
|
||||
|
||||
stw r5, (r7, 0)
|
||||
stw r11, (r7, 4)
|
||||
stw r8, (r7, 8)
|
||||
stw r9, (r7, 12)
|
||||
subi r4, 16
|
||||
addi r3, 16
|
||||
addi r7, 16
|
||||
cmplti r4, 16
|
||||
jbf .L22
|
||||
ldw r8, (sp, 0)
|
||||
ldw r9, (sp, 4)
|
||||
ldw r10, (sp, 8)
|
||||
ldw r11, (sp, 12)
|
||||
addi sp, 16
|
||||
cmplti r4, 4
|
||||
bf .L20
|
||||
subi r3, 2
|
||||
br .L_copy_by_byte
|
||||
|
||||
|
||||
.L_dest_aligned_but_src_not_aligned_3bytes:
|
||||
cmplti r4, 16
|
||||
bf .L31
|
||||
.L30:
|
||||
GET_FRONT_BITS r1 24
|
||||
mov r5, r1
|
||||
ldw r6, (r3, 0)
|
||||
mov r1, r6
|
||||
GET_AFTER_BITS r6 8
|
||||
or r5, r6
|
||||
stw r5, (r7, 0)
|
||||
subi r4, 4
|
||||
addi r3, 4
|
||||
addi r7, 4
|
||||
cmplti r4, 4
|
||||
bf .L30
|
||||
subi r3, 1
|
||||
br .L_copy_by_byte
|
||||
.L31:
|
||||
subi sp, 16
|
||||
stw r8, (sp, 0)
|
||||
stw r9, (sp, 4)
|
||||
stw r10, (sp, 8)
|
||||
stw r11, (sp, 12)
|
||||
.L32:
|
||||
ldw r5, (r3, 0)
|
||||
ldw r11, (r3, 4)
|
||||
ldw r8, (r3, 8)
|
||||
ldw r9, (r3, 12)
|
||||
|
||||
GET_FRONT_BITS r1 24
|
||||
mov r10, r5
|
||||
GET_AFTER_BITS r5 8
|
||||
or r5, r1
|
||||
|
||||
GET_FRONT_BITS r10 24
|
||||
mov r1, r11
|
||||
GET_AFTER_BITS r11 8
|
||||
or r11, r10
|
||||
|
||||
GET_FRONT_BITS r1 24
|
||||
mov r10, r8
|
||||
GET_AFTER_BITS r8 8
|
||||
or r8, r1
|
||||
|
||||
GET_FRONT_BITS r10 24
|
||||
mov r1, r9
|
||||
GET_AFTER_BITS r9 8
|
||||
or r9, r10
|
||||
|
||||
stw r5, (r7, 0)
|
||||
stw r11, (r7, 4)
|
||||
stw r8, (r7, 8)
|
||||
stw r9, (r7, 12)
|
||||
subi r4, 16
|
||||
addi r3, 16
|
||||
addi r7, 16
|
||||
cmplti r4, 16
|
||||
jbf .L32
|
||||
ldw r8, (sp, 0)
|
||||
ldw r9, (sp, 4)
|
||||
ldw r10, (sp, 8)
|
||||
ldw r11, (sp, 12)
|
||||
addi sp, 16
|
||||
cmplti r4, 4
|
||||
bf .L30
|
||||
subi r3, 1
|
||||
br .L_copy_by_byte
|
@ -1,6 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
EXPORT_SYMBOL(memcpy);
|
@ -2,9 +2,11 @@
|
||||
obj-y += cacheflush.o
|
||||
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
|
||||
obj-y += memcmp.o
|
||||
ifeq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
|
||||
obj-y += memcpy.o
|
||||
obj-y += memmove.o
|
||||
obj-y += memset.o
|
||||
endif
|
||||
obj-y += strcmp.o
|
||||
obj-y += strcpy.o
|
||||
obj-y += strlen.o
|
||||
|
@ -3,10 +3,12 @@
|
||||
|
||||
#include <linux/module.h>
|
||||
|
||||
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
EXPORT_SYMBOL(memset);
|
||||
EXPORT_SYMBOL(memcmp);
|
||||
EXPORT_SYMBOL(memmove);
|
||||
#endif
|
||||
EXPORT_SYMBOL(memcmp);
|
||||
EXPORT_SYMBOL(strcmp);
|
||||
EXPORT_SYMBOL(strcpy);
|
||||
EXPORT_SYMBOL(strlen);
|
||||
|
@ -1,6 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
targets := Image zImage uImage
|
||||
targets += $(dtb-y)
|
||||
|
||||
$(obj)/Image: vmlinux FORCE
|
||||
$(call if_changed,objcopy)
|
||||
|
237
arch/csky/include/asm/atomic.h
Normal file
237
arch/csky/include/asm/atomic.h
Normal file
@ -0,0 +1,237 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef __ASM_CSKY_ATOMIC_H
|
||||
#define __ASM_CSKY_ATOMIC_H
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#include <asm-generic/atomic64.h>
|
||||
|
||||
#include <asm/cmpxchg.h>
|
||||
#include <asm/barrier.h>
|
||||
|
||||
#define __atomic_acquire_fence() __bar_brarw()
|
||||
|
||||
#define __atomic_release_fence() __bar_brwaw()
|
||||
|
||||
static __always_inline int arch_atomic_read(const atomic_t *v)
|
||||
{
|
||||
return READ_ONCE(v->counter);
|
||||
}
|
||||
static __always_inline void arch_atomic_set(atomic_t *v, int i)
|
||||
{
|
||||
WRITE_ONCE(v->counter, i);
|
||||
}
|
||||
|
||||
#define ATOMIC_OP(op) \
|
||||
static __always_inline \
|
||||
void arch_atomic_##op(int i, atomic_t *v) \
|
||||
{ \
|
||||
unsigned long tmp; \
|
||||
__asm__ __volatile__ ( \
|
||||
"1: ldex.w %0, (%2) \n" \
|
||||
" " #op " %0, %1 \n" \
|
||||
" stex.w %0, (%2) \n" \
|
||||
" bez %0, 1b \n" \
|
||||
: "=&r" (tmp) \
|
||||
: "r" (i), "r" (&v->counter) \
|
||||
: "memory"); \
|
||||
}
|
||||
|
||||
ATOMIC_OP(add)
|
||||
ATOMIC_OP(sub)
|
||||
ATOMIC_OP(and)
|
||||
ATOMIC_OP( or)
|
||||
ATOMIC_OP(xor)
|
||||
|
||||
#undef ATOMIC_OP
|
||||
|
||||
#define ATOMIC_FETCH_OP(op) \
|
||||
static __always_inline \
|
||||
int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \
|
||||
{ \
|
||||
register int ret, tmp; \
|
||||
__asm__ __volatile__ ( \
|
||||
"1: ldex.w %0, (%3) \n" \
|
||||
" mov %1, %0 \n" \
|
||||
" " #op " %0, %2 \n" \
|
||||
" stex.w %0, (%3) \n" \
|
||||
" bez %0, 1b \n" \
|
||||
: "=&r" (tmp), "=&r" (ret) \
|
||||
: "r" (i), "r"(&v->counter) \
|
||||
: "memory"); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#define ATOMIC_OP_RETURN(op, c_op) \
|
||||
static __always_inline \
|
||||
int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \
|
||||
{ \
|
||||
return arch_atomic_fetch_##op##_relaxed(i, v) c_op i; \
|
||||
}
|
||||
|
||||
#define ATOMIC_OPS(op, c_op) \
|
||||
ATOMIC_FETCH_OP(op) \
|
||||
ATOMIC_OP_RETURN(op, c_op)
|
||||
|
||||
ATOMIC_OPS(add, +)
|
||||
ATOMIC_OPS(sub, -)
|
||||
|
||||
#define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed
|
||||
#define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed
|
||||
|
||||
#define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed
|
||||
#define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#undef ATOMIC_OP_RETURN
|
||||
|
||||
#define ATOMIC_OPS(op) \
|
||||
ATOMIC_FETCH_OP(op)
|
||||
|
||||
ATOMIC_OPS(and)
|
||||
ATOMIC_OPS( or)
|
||||
ATOMIC_OPS(xor)
|
||||
|
||||
#define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed
|
||||
#define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed
|
||||
#define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
|
||||
#undef ATOMIC_FETCH_OP
|
||||
|
||||
static __always_inline int
|
||||
arch_atomic_fetch_add_unless(atomic_t *v, int a, int u)
|
||||
{
|
||||
int prev, tmp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
RELEASE_FENCE
|
||||
"1: ldex.w %0, (%3) \n"
|
||||
" cmpne %0, %4 \n"
|
||||
" bf 2f \n"
|
||||
" mov %1, %0 \n"
|
||||
" add %1, %2 \n"
|
||||
" stex.w %1, (%3) \n"
|
||||
" bez %1, 1b \n"
|
||||
FULL_FENCE
|
||||
"2:\n"
|
||||
: "=&r" (prev), "=&r" (tmp)
|
||||
: "r" (a), "r" (&v->counter), "r" (u)
|
||||
: "memory");
|
||||
|
||||
return prev;
|
||||
}
|
||||
#define arch_atomic_fetch_add_unless arch_atomic_fetch_add_unless
|
||||
|
||||
static __always_inline bool
|
||||
arch_atomic_inc_unless_negative(atomic_t *v)
|
||||
{
|
||||
int rc, tmp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
RELEASE_FENCE
|
||||
"1: ldex.w %0, (%2) \n"
|
||||
" movi %1, 0 \n"
|
||||
" blz %0, 2f \n"
|
||||
" movi %1, 1 \n"
|
||||
" addi %0, 1 \n"
|
||||
" stex.w %0, (%2) \n"
|
||||
" bez %0, 1b \n"
|
||||
FULL_FENCE
|
||||
"2:\n"
|
||||
: "=&r" (tmp), "=&r" (rc)
|
||||
: "r" (&v->counter)
|
||||
: "memory");
|
||||
|
||||
return tmp ? true : false;
|
||||
|
||||
}
|
||||
#define arch_atomic_inc_unless_negative arch_atomic_inc_unless_negative
|
||||
|
||||
static __always_inline bool
|
||||
arch_atomic_dec_unless_positive(atomic_t *v)
|
||||
{
|
||||
int rc, tmp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
RELEASE_FENCE
|
||||
"1: ldex.w %0, (%2) \n"
|
||||
" movi %1, 0 \n"
|
||||
" bhz %0, 2f \n"
|
||||
" movi %1, 1 \n"
|
||||
" subi %0, 1 \n"
|
||||
" stex.w %0, (%2) \n"
|
||||
" bez %0, 1b \n"
|
||||
FULL_FENCE
|
||||
"2:\n"
|
||||
: "=&r" (tmp), "=&r" (rc)
|
||||
: "r" (&v->counter)
|
||||
: "memory");
|
||||
|
||||
return tmp ? true : false;
|
||||
}
|
||||
#define arch_atomic_dec_unless_positive arch_atomic_dec_unless_positive
|
||||
|
||||
static __always_inline int
|
||||
arch_atomic_dec_if_positive(atomic_t *v)
|
||||
{
|
||||
int dec, tmp;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
RELEASE_FENCE
|
||||
"1: ldex.w %0, (%2) \n"
|
||||
" subi %1, %0, 1 \n"
|
||||
" blz %1, 2f \n"
|
||||
" stex.w %1, (%2) \n"
|
||||
" bez %1, 1b \n"
|
||||
FULL_FENCE
|
||||
"2:\n"
|
||||
: "=&r" (dec), "=&r" (tmp)
|
||||
: "r" (&v->counter)
|
||||
: "memory");
|
||||
|
||||
return dec - 1;
|
||||
}
|
||||
#define arch_atomic_dec_if_positive arch_atomic_dec_if_positive
|
||||
|
||||
#define ATOMIC_OP() \
|
||||
static __always_inline \
|
||||
int arch_atomic_xchg_relaxed(atomic_t *v, int n) \
|
||||
{ \
|
||||
return __xchg_relaxed(n, &(v->counter), 4); \
|
||||
} \
|
||||
static __always_inline \
|
||||
int arch_atomic_cmpxchg_relaxed(atomic_t *v, int o, int n) \
|
||||
{ \
|
||||
return __cmpxchg_relaxed(&(v->counter), o, n, 4); \
|
||||
} \
|
||||
static __always_inline \
|
||||
int arch_atomic_cmpxchg_acquire(atomic_t *v, int o, int n) \
|
||||
{ \
|
||||
return __cmpxchg_acquire(&(v->counter), o, n, 4); \
|
||||
} \
|
||||
static __always_inline \
|
||||
int arch_atomic_cmpxchg(atomic_t *v, int o, int n) \
|
||||
{ \
|
||||
return __cmpxchg(&(v->counter), o, n, 4); \
|
||||
}
|
||||
|
||||
#define ATOMIC_OPS() \
|
||||
ATOMIC_OP()
|
||||
|
||||
ATOMIC_OPS()
|
||||
|
||||
#define arch_atomic_xchg_relaxed arch_atomic_xchg_relaxed
|
||||
#define arch_atomic_cmpxchg_relaxed arch_atomic_cmpxchg_relaxed
|
||||
#define arch_atomic_cmpxchg_acquire arch_atomic_cmpxchg_acquire
|
||||
#define arch_atomic_cmpxchg arch_atomic_cmpxchg
|
||||
|
||||
#undef ATOMIC_OPS
|
||||
#undef ATOMIC_OP
|
||||
|
||||
#else
|
||||
#include <asm-generic/atomic.h>
|
||||
#endif
|
||||
|
||||
#endif /* __ASM_CSKY_ATOMIC_H */
|
@ -37,17 +37,21 @@
|
||||
* bar.brar
|
||||
* bar.bwaw
|
||||
*/
|
||||
#define FULL_FENCE ".long 0x842fc000\n"
|
||||
#define ACQUIRE_FENCE ".long 0x8427c000\n"
|
||||
#define RELEASE_FENCE ".long 0x842ec000\n"
|
||||
|
||||
#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
|
||||
#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
|
||||
#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
|
||||
#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
|
||||
#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
|
||||
#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
|
||||
#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
|
||||
#define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory")
|
||||
#define __bar_brwarw() asm volatile (FULL_FENCE:::"memory")
|
||||
#define __bar_brarw() asm volatile (ACQUIRE_FENCE:::"memory")
|
||||
#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
|
||||
#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
|
||||
#define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory")
|
||||
#define __bar_brwaw() asm volatile (RELEASE_FENCE:::"memory")
|
||||
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
|
||||
#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
|
||||
#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
|
||||
@ -56,7 +60,6 @@
|
||||
#define __smp_rmb() __bar_brar()
|
||||
#define __smp_wmb() __bar_bwaw()
|
||||
|
||||
#define ACQUIRE_FENCE ".long 0x8427c000\n"
|
||||
#define __smp_acquire_fence() __bar_brarw()
|
||||
#define __smp_release_fence() __bar_brwaw()
|
||||
|
||||
|
@ -64,15 +64,71 @@ extern void __bad_xchg(void);
|
||||
#define arch_cmpxchg_relaxed(ptr, o, n) \
|
||||
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
|
||||
|
||||
#define arch_cmpxchg(ptr, o, n) \
|
||||
#define __cmpxchg_acquire(ptr, old, new, size) \
|
||||
({ \
|
||||
__typeof__(ptr) __ptr = (ptr); \
|
||||
__typeof__(new) __new = (new); \
|
||||
__typeof__(new) __tmp; \
|
||||
__typeof__(old) __old = (old); \
|
||||
__typeof__(*(ptr)) __ret; \
|
||||
__smp_release_fence(); \
|
||||
__ret = arch_cmpxchg_relaxed(ptr, o, n); \
|
||||
__smp_acquire_fence(); \
|
||||
switch (size) { \
|
||||
case 4: \
|
||||
asm volatile ( \
|
||||
"1: ldex.w %0, (%3) \n" \
|
||||
" cmpne %0, %4 \n" \
|
||||
" bt 2f \n" \
|
||||
" mov %1, %2 \n" \
|
||||
" stex.w %1, (%3) \n" \
|
||||
" bez %1, 1b \n" \
|
||||
ACQUIRE_FENCE \
|
||||
"2: \n" \
|
||||
: "=&r" (__ret), "=&r" (__tmp) \
|
||||
: "r" (__new), "r"(__ptr), "r"(__old) \
|
||||
:); \
|
||||
break; \
|
||||
default: \
|
||||
__bad_xchg(); \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define arch_cmpxchg_acquire(ptr, o, n) \
|
||||
(__cmpxchg_acquire((ptr), (o), (n), sizeof(*(ptr))))
|
||||
|
||||
#define __cmpxchg(ptr, old, new, size) \
|
||||
({ \
|
||||
__typeof__(ptr) __ptr = (ptr); \
|
||||
__typeof__(new) __new = (new); \
|
||||
__typeof__(new) __tmp; \
|
||||
__typeof__(old) __old = (old); \
|
||||
__typeof__(*(ptr)) __ret; \
|
||||
switch (size) { \
|
||||
case 4: \
|
||||
asm volatile ( \
|
||||
RELEASE_FENCE \
|
||||
"1: ldex.w %0, (%3) \n" \
|
||||
" cmpne %0, %4 \n" \
|
||||
" bt 2f \n" \
|
||||
" mov %1, %2 \n" \
|
||||
" stex.w %1, (%3) \n" \
|
||||
" bez %1, 1b \n" \
|
||||
FULL_FENCE \
|
||||
"2: \n" \
|
||||
: "=&r" (__ret), "=&r" (__tmp) \
|
||||
: "r" (__new), "r"(__ptr), "r"(__old) \
|
||||
:); \
|
||||
break; \
|
||||
default: \
|
||||
__bad_xchg(); \
|
||||
} \
|
||||
__ret; \
|
||||
})
|
||||
|
||||
#define arch_cmpxchg(ptr, o, n) \
|
||||
(__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
|
||||
|
||||
#define arch_cmpxchg_local(ptr, o, n) \
|
||||
(__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
|
||||
#else
|
||||
#include <asm-generic/cmpxchg.h>
|
||||
#endif
|
||||
|
@ -5,7 +5,6 @@
|
||||
|
||||
#include <linux/pgtable.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
/*
|
||||
* I/O memory access primitives. Reads are ordered relative to any
|
||||
@ -32,6 +31,17 @@
|
||||
#define writel(v,c) ({ wmb(); writel_relaxed((v),(c)); mb(); })
|
||||
#endif
|
||||
|
||||
/*
|
||||
* String version of I/O memory access operations.
|
||||
*/
|
||||
extern void __memcpy_fromio(void *, const volatile void __iomem *, size_t);
|
||||
extern void __memcpy_toio(volatile void __iomem *, const void *, size_t);
|
||||
extern void __memset_io(volatile void __iomem *, int, size_t);
|
||||
|
||||
#define memset_io(c,v,l) __memset_io((c),(v),(l))
|
||||
#define memcpy_fromio(a,c,l) __memcpy_fromio((a),(c),(l))
|
||||
#define memcpy_toio(c,a,l) __memcpy_toio((c),(a),(l))
|
||||
|
||||
/*
|
||||
* I/O memory mapping functions.
|
||||
*/
|
||||
|
@ -2,7 +2,7 @@
|
||||
extra-y := head.o vmlinux.lds
|
||||
|
||||
obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
|
||||
obj-y += power.o syscall.o syscall_table.o setup.o
|
||||
obj-y += power.o syscall.o syscall_table.o setup.o io.o
|
||||
obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
|
||||
obj-y += probes/
|
||||
|
||||
|
91
arch/csky/kernel/io.c
Normal file
91
arch/csky/kernel/io.c
Normal file
@ -0,0 +1,91 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/export.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/io.h>
|
||||
|
||||
/*
|
||||
* Copy data from IO memory space to "real" memory space.
|
||||
*/
|
||||
void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t count)
|
||||
{
|
||||
while (count && !IS_ALIGNED((unsigned long)from, 4)) {
|
||||
*(u8 *)to = __raw_readb(from);
|
||||
from++;
|
||||
to++;
|
||||
count--;
|
||||
}
|
||||
|
||||
while (count >= 4) {
|
||||
*(u32 *)to = __raw_readl(from);
|
||||
from += 4;
|
||||
to += 4;
|
||||
count -= 4;
|
||||
}
|
||||
|
||||
while (count) {
|
||||
*(u8 *)to = __raw_readb(from);
|
||||
from++;
|
||||
to++;
|
||||
count--;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__memcpy_fromio);
|
||||
|
||||
/*
|
||||
* Copy data from "real" memory space to IO memory space.
|
||||
*/
|
||||
void __memcpy_toio(volatile void __iomem *to, const void *from, size_t count)
|
||||
{
|
||||
while (count && !IS_ALIGNED((unsigned long)to, 4)) {
|
||||
__raw_writeb(*(u8 *)from, to);
|
||||
from++;
|
||||
to++;
|
||||
count--;
|
||||
}
|
||||
|
||||
while (count >= 4) {
|
||||
__raw_writel(*(u32 *)from, to);
|
||||
from += 4;
|
||||
to += 4;
|
||||
count -= 4;
|
||||
}
|
||||
|
||||
while (count) {
|
||||
__raw_writeb(*(u8 *)from, to);
|
||||
from++;
|
||||
to++;
|
||||
count--;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__memcpy_toio);
|
||||
|
||||
/*
|
||||
* "memset" on IO memory space.
|
||||
*/
|
||||
void __memset_io(volatile void __iomem *dst, int c, size_t count)
|
||||
{
|
||||
u32 qc = (u8)c;
|
||||
|
||||
qc |= qc << 8;
|
||||
qc |= qc << 16;
|
||||
|
||||
while (count && !IS_ALIGNED((unsigned long)dst, 4)) {
|
||||
__raw_writeb(c, dst);
|
||||
dst++;
|
||||
count--;
|
||||
}
|
||||
|
||||
while (count >= 4) {
|
||||
__raw_writel(qc, dst);
|
||||
dst += 4;
|
||||
count -= 4;
|
||||
}
|
||||
|
||||
while (count) {
|
||||
__raw_writeb(c, dst);
|
||||
dst++;
|
||||
count--;
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(__memset_io);
|
@ -68,7 +68,7 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
|
||||
*location = rel[i].r_addend + sym->st_value;
|
||||
break;
|
||||
case R_CSKY_PC32:
|
||||
/* Add the value, subtract its postition */
|
||||
/* Add the value, subtract its position */
|
||||
*location = rel[i].r_addend + sym->st_value
|
||||
- (uint32_t)location;
|
||||
break;
|
||||
|
@ -30,7 +30,7 @@ static int __kprobes patch_text_cb(void *priv)
|
||||
struct csky_insn_patch *param = priv;
|
||||
unsigned int addr = (unsigned int)param->addr;
|
||||
|
||||
if (atomic_inc_return(¶m->cpu_count) == 1) {
|
||||
if (atomic_inc_return(¶m->cpu_count) == num_online_cpus()) {
|
||||
*(u16 *) addr = cpu_to_le16(param->opcode);
|
||||
dcache_wb_range(addr, addr + 2);
|
||||
atomic_inc(¶m->cpu_count);
|
||||
|
@ -102,7 +102,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
|
||||
struct uprobe_task *utask = current->utask;
|
||||
|
||||
/*
|
||||
* Task has received a fatal signal, so reset back to probbed
|
||||
* Task has received a fatal signal, so reset back to probed
|
||||
* address.
|
||||
*/
|
||||
instruction_pointer_set(regs, utask->vaddr);
|
||||
|
@ -2,7 +2,6 @@
|
||||
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <linux/sched/debug.h>
|
||||
|
@ -1,3 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
lib-y := usercopy.o delay.o
|
||||
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
|
||||
ifneq ($(CONFIG_HAVE_EFFICIENT_UNALIGNED_STRING_OPS), y)
|
||||
lib-y += string.o
|
||||
endif
|
||||
|
134
arch/csky/lib/string.c
Normal file
134
arch/csky/lib/string.c
Normal file
@ -0,0 +1,134 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* String functions optimized for hardware which doesn't
|
||||
* handle unaligned memory accesses efficiently.
|
||||
*
|
||||
* Copyright (C) 2021 Matteo Croce
|
||||
*/
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* Minimum size for a word copy to be convenient */
|
||||
#define BYTES_LONG sizeof(long)
|
||||
#define WORD_MASK (BYTES_LONG - 1)
|
||||
#define MIN_THRESHOLD (BYTES_LONG * 2)
|
||||
|
||||
/* convenience union to avoid cast between different pointer types */
|
||||
union types {
|
||||
u8 *as_u8;
|
||||
unsigned long *as_ulong;
|
||||
uintptr_t as_uptr;
|
||||
};
|
||||
|
||||
union const_types {
|
||||
const u8 *as_u8;
|
||||
unsigned long *as_ulong;
|
||||
uintptr_t as_uptr;
|
||||
};
|
||||
|
||||
void *memcpy(void *dest, const void *src, size_t count)
|
||||
{
|
||||
union const_types s = { .as_u8 = src };
|
||||
union types d = { .as_u8 = dest };
|
||||
int distance = 0;
|
||||
|
||||
if (count < MIN_THRESHOLD)
|
||||
goto copy_remainder;
|
||||
|
||||
/* Copy a byte at time until destination is aligned. */
|
||||
for (; d.as_uptr & WORD_MASK; count--)
|
||||
*d.as_u8++ = *s.as_u8++;
|
||||
|
||||
distance = s.as_uptr & WORD_MASK;
|
||||
|
||||
if (distance) {
|
||||
unsigned long last, next;
|
||||
|
||||
/*
|
||||
* s is distance bytes ahead of d, and d just reached
|
||||
* the alignment boundary. Move s backward to word align it
|
||||
* and shift data to compensate for distance, in order to do
|
||||
* word-by-word copy.
|
||||
*/
|
||||
s.as_u8 -= distance;
|
||||
|
||||
next = s.as_ulong[0];
|
||||
for (; count >= BYTES_LONG; count -= BYTES_LONG) {
|
||||
last = next;
|
||||
next = s.as_ulong[1];
|
||||
|
||||
d.as_ulong[0] = last >> (distance * 8) |
|
||||
next << ((BYTES_LONG - distance) * 8);
|
||||
|
||||
d.as_ulong++;
|
||||
s.as_ulong++;
|
||||
}
|
||||
|
||||
/* Restore s with the original offset. */
|
||||
s.as_u8 += distance;
|
||||
} else {
|
||||
/*
|
||||
* If the source and dest lower bits are the same, do a simple
|
||||
* 32/64 bit wide copy.
|
||||
*/
|
||||
for (; count >= BYTES_LONG; count -= BYTES_LONG)
|
||||
*d.as_ulong++ = *s.as_ulong++;
|
||||
}
|
||||
|
||||
copy_remainder:
|
||||
while (count--)
|
||||
*d.as_u8++ = *s.as_u8++;
|
||||
|
||||
return dest;
|
||||
}
|
||||
EXPORT_SYMBOL(memcpy);
|
||||
|
||||
/*
|
||||
* Simply check if the buffer overlaps an call memcpy() in case,
|
||||
* otherwise do a simple one byte at time backward copy.
|
||||
*/
|
||||
void *memmove(void *dest, const void *src, size_t count)
|
||||
{
|
||||
if (dest < src || src + count <= dest)
|
||||
return memcpy(dest, src, count);
|
||||
|
||||
if (dest > src) {
|
||||
const char *s = src + count;
|
||||
char *tmp = dest + count;
|
||||
|
||||
while (count--)
|
||||
*--tmp = *--s;
|
||||
}
|
||||
return dest;
|
||||
}
|
||||
EXPORT_SYMBOL(memmove);
|
||||
|
||||
void *memset(void *s, int c, size_t count)
|
||||
{
|
||||
union types dest = { .as_u8 = s };
|
||||
|
||||
if (count >= MIN_THRESHOLD) {
|
||||
unsigned long cu = (unsigned long)c;
|
||||
|
||||
/* Compose an ulong with 'c' repeated 4/8 times */
|
||||
cu |= cu << 8;
|
||||
cu |= cu << 16;
|
||||
/* Suppress warning on 32 bit machines */
|
||||
cu |= (cu << 16) << 16;
|
||||
|
||||
for (; count && dest.as_uptr & WORD_MASK; count--)
|
||||
*dest.as_u8++ = c;
|
||||
|
||||
/* Copy using the largest size allowed */
|
||||
for (; count >= BYTES_LONG; count -= BYTES_LONG)
|
||||
*dest.as_ulong++ = cu;
|
||||
}
|
||||
|
||||
/* copy the remainder */
|
||||
while (count--)
|
||||
*dest.as_u8++ = c;
|
||||
|
||||
return s;
|
||||
}
|
||||
EXPORT_SYMBOL(memset);
|
@ -9,7 +9,6 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/scatterlist.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/version.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
static inline void cache_op(phys_addr_t paddr, size_t size,
|
||||
|
Loading…
Reference in New Issue
Block a user