08dbd0f8ef
Based on 1 normalized pattern(s): this program is free software you can redistribute it and or modify it under the terms of the gnu general public license version 2 and only version 2 as published by the free software foundation this program is distributed in the hope that it will be useful but without any warranty without even the implied warranty of merchantability or fitness for a particular purpose see the gnu general public license for more details you should have received a copy of the gnu general public license along with this program if not write to the free software foundation inc 51 franklin street fifth floor boston ma 02110 1301 usa extracted by the scancode license scanner the SPDX license identifier GPL-2.0-only has been chosen to replace the boilerplate/reference in 94 file(s). Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Allison Randal <allison@lohutok.net> Reviewed-by: Richard Fontana <rfontana@redhat.com> Reviewed-by: Alexios Zavras <alexios.zavras@intel.com> Cc: linux-spdx@vger.kernel.org Link: https://lkml.kernel.org/r/20190529141334.043630402@linutronix.de Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
303 lines
4.5 KiB
ArmAsm
303 lines
4.5 KiB
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-only */
|
|
/*
|
|
* Copyright (c) 2011, The Linux Foundation. All rights reserved.
|
|
*/
|
|
|
|
|
|
/* HEXAGON assembly optimized memset */
|
|
/* Replaces the standard library function memset */
|
|
|
|
|
|
.macro HEXAGON_OPT_FUNC_BEGIN name
|
|
.text
|
|
.p2align 4
|
|
.globl \name
|
|
.type \name, @function
|
|
\name:
|
|
.endm
|
|
|
|
.macro HEXAGON_OPT_FUNC_FINISH name
|
|
.size \name, . - \name
|
|
.endm
|
|
|
|
/* FUNCTION: memset (v2 version) */
|
|
#if __HEXAGON_ARCH__ < 3
|
|
HEXAGON_OPT_FUNC_BEGIN memset
|
|
{
|
|
r6 = #8
|
|
r7 = extractu(r0, #3 , #0)
|
|
p0 = cmp.eq(r2, #0)
|
|
p1 = cmp.gtu(r2, #7)
|
|
}
|
|
{
|
|
r4 = vsplatb(r1)
|
|
r8 = r0 /* leave r0 intact for return val */
|
|
r9 = sub(r6, r7) /* bytes until double alignment */
|
|
if p0 jumpr r31 /* count == 0, so return */
|
|
}
|
|
{
|
|
r3 = #0
|
|
r7 = #0
|
|
p0 = tstbit(r9, #0)
|
|
if p1 jump 2f /* skip byte loop */
|
|
}
|
|
|
|
/* less than 8 bytes to set, so just set a byte at a time and return */
|
|
|
|
loop0(1f, r2) /* byte loop */
|
|
.falign
|
|
1: /* byte loop */
|
|
{
|
|
memb(r8++#1) = r4
|
|
}:endloop0
|
|
jumpr r31
|
|
.falign
|
|
2: /* skip byte loop */
|
|
{
|
|
r6 = #1
|
|
p0 = tstbit(r9, #1)
|
|
p1 = cmp.eq(r2, #1)
|
|
if !p0 jump 3f /* skip initial byte store */
|
|
}
|
|
{
|
|
memb(r8++#1) = r4
|
|
r3:2 = sub(r3:2, r7:6)
|
|
if p1 jumpr r31
|
|
}
|
|
.falign
|
|
3: /* skip initial byte store */
|
|
{
|
|
r6 = #2
|
|
p0 = tstbit(r9, #2)
|
|
p1 = cmp.eq(r2, #2)
|
|
if !p0 jump 4f /* skip initial half store */
|
|
}
|
|
{
|
|
memh(r8++#2) = r4
|
|
r3:2 = sub(r3:2, r7:6)
|
|
if p1 jumpr r31
|
|
}
|
|
.falign
|
|
4: /* skip initial half store */
|
|
{
|
|
r6 = #4
|
|
p0 = cmp.gtu(r2, #7)
|
|
p1 = cmp.eq(r2, #4)
|
|
if !p0 jump 5f /* skip initial word store */
|
|
}
|
|
{
|
|
memw(r8++#4) = r4
|
|
r3:2 = sub(r3:2, r7:6)
|
|
p0 = cmp.gtu(r2, #11)
|
|
if p1 jumpr r31
|
|
}
|
|
.falign
|
|
5: /* skip initial word store */
|
|
{
|
|
r10 = lsr(r2, #3)
|
|
p1 = cmp.eq(r3, #1)
|
|
if !p0 jump 7f /* skip double loop */
|
|
}
|
|
{
|
|
r5 = r4
|
|
r6 = #8
|
|
loop0(6f, r10) /* double loop */
|
|
}
|
|
|
|
/* set bytes a double word at a time */
|
|
|
|
.falign
|
|
6: /* double loop */
|
|
{
|
|
memd(r8++#8) = r5:4
|
|
r3:2 = sub(r3:2, r7:6)
|
|
p1 = cmp.eq(r2, #8)
|
|
}:endloop0
|
|
.falign
|
|
7: /* skip double loop */
|
|
{
|
|
p0 = tstbit(r2, #2)
|
|
if p1 jumpr r31
|
|
}
|
|
{
|
|
r6 = #4
|
|
p0 = tstbit(r2, #1)
|
|
p1 = cmp.eq(r2, #4)
|
|
if !p0 jump 8f /* skip final word store */
|
|
}
|
|
{
|
|
memw(r8++#4) = r4
|
|
r3:2 = sub(r3:2, r7:6)
|
|
if p1 jumpr r31
|
|
}
|
|
.falign
|
|
8: /* skip final word store */
|
|
{
|
|
p1 = cmp.eq(r2, #2)
|
|
if !p0 jump 9f /* skip final half store */
|
|
}
|
|
{
|
|
memh(r8++#2) = r4
|
|
if p1 jumpr r31
|
|
}
|
|
.falign
|
|
9: /* skip final half store */
|
|
{
|
|
memb(r8++#1) = r4
|
|
jumpr r31
|
|
}
|
|
HEXAGON_OPT_FUNC_FINISH memset
|
|
#endif
|
|
|
|
|
|
/* FUNCTION: memset (v3 and higher version) */
|
|
#if __HEXAGON_ARCH__ >= 3
|
|
HEXAGON_OPT_FUNC_BEGIN memset
|
|
{
|
|
r7=vsplatb(r1)
|
|
r6 = r0
|
|
if (r2==#0) jump:nt .L1
|
|
}
|
|
{
|
|
r5:4=combine(r7,r7)
|
|
p0 = cmp.gtu(r2,#8)
|
|
if (p0.new) jump:nt .L3
|
|
}
|
|
{
|
|
r3 = r0
|
|
loop0(.L47,r2)
|
|
}
|
|
.falign
|
|
.L47:
|
|
{
|
|
memb(r3++#1) = r1
|
|
}:endloop0 /* start=.L47 */
|
|
jumpr r31
|
|
.L3:
|
|
{
|
|
p0 = tstbit(r0,#0)
|
|
if (!p0.new) jump:nt .L8
|
|
p1 = cmp.eq(r2, #1)
|
|
}
|
|
{
|
|
r6 = add(r0, #1)
|
|
r2 = add(r2,#-1)
|
|
memb(r0) = r1
|
|
if (p1) jump .L1
|
|
}
|
|
.L8:
|
|
{
|
|
p0 = tstbit(r6,#1)
|
|
if (!p0.new) jump:nt .L10
|
|
}
|
|
{
|
|
r2 = add(r2,#-2)
|
|
memh(r6++#2) = r7
|
|
p0 = cmp.eq(r2, #2)
|
|
if (p0.new) jump:nt .L1
|
|
}
|
|
.L10:
|
|
{
|
|
p0 = tstbit(r6,#2)
|
|
if (!p0.new) jump:nt .L12
|
|
}
|
|
{
|
|
r2 = add(r2,#-4)
|
|
memw(r6++#4) = r7
|
|
p0 = cmp.eq(r2, #4)
|
|
if (p0.new) jump:nt .L1
|
|
}
|
|
.L12:
|
|
{
|
|
p0 = cmp.gtu(r2,#127)
|
|
if (!p0.new) jump:nt .L14
|
|
}
|
|
r3 = and(r6,#31)
|
|
if (r3==#0) jump:nt .L17
|
|
{
|
|
memd(r6++#8) = r5:4
|
|
r2 = add(r2,#-8)
|
|
}
|
|
r3 = and(r6,#31)
|
|
if (r3==#0) jump:nt .L17
|
|
{
|
|
memd(r6++#8) = r5:4
|
|
r2 = add(r2,#-8)
|
|
}
|
|
r3 = and(r6,#31)
|
|
if (r3==#0) jump:nt .L17
|
|
{
|
|
memd(r6++#8) = r5:4
|
|
r2 = add(r2,#-8)
|
|
}
|
|
.L17:
|
|
{
|
|
r3 = lsr(r2,#5)
|
|
if (r1!=#0) jump:nt .L18
|
|
}
|
|
{
|
|
r8 = r3
|
|
r3 = r6
|
|
loop0(.L46,r3)
|
|
}
|
|
.falign
|
|
.L46:
|
|
{
|
|
dczeroa(r6)
|
|
r6 = add(r6,#32)
|
|
r2 = add(r2,#-32)
|
|
}:endloop0 /* start=.L46 */
|
|
.L14:
|
|
{
|
|
p0 = cmp.gtu(r2,#7)
|
|
if (!p0.new) jump:nt .L28
|
|
r8 = lsr(r2,#3)
|
|
}
|
|
loop0(.L44,r8)
|
|
.falign
|
|
.L44:
|
|
{
|
|
memd(r6++#8) = r5:4
|
|
r2 = add(r2,#-8)
|
|
}:endloop0 /* start=.L44 */
|
|
.L28:
|
|
{
|
|
p0 = tstbit(r2,#2)
|
|
if (!p0.new) jump:nt .L33
|
|
}
|
|
{
|
|
r2 = add(r2,#-4)
|
|
memw(r6++#4) = r7
|
|
}
|
|
.L33:
|
|
{
|
|
p0 = tstbit(r2,#1)
|
|
if (!p0.new) jump:nt .L35
|
|
}
|
|
{
|
|
r2 = add(r2,#-2)
|
|
memh(r6++#2) = r7
|
|
}
|
|
.L35:
|
|
p0 = cmp.eq(r2,#1)
|
|
if (p0) memb(r6) = r1
|
|
.L1:
|
|
jumpr r31
|
|
.L18:
|
|
loop0(.L45,r3)
|
|
.falign
|
|
.L45:
|
|
dczeroa(r6)
|
|
{
|
|
memd(r6++#8) = r5:4
|
|
r2 = add(r2,#-32)
|
|
}
|
|
memd(r6++#8) = r5:4
|
|
memd(r6++#8) = r5:4
|
|
{
|
|
memd(r6++#8) = r5:4
|
|
}:endloop0 /* start=.L45 */
|
|
jump .L14
|
|
HEXAGON_OPT_FUNC_FINISH memset
|
|
#endif
|