2019-05-29 07:12:41 -07:00
/* SPDX-License-Identifier: GPL-2.0-only */
2011-10-31 18:38:38 -05:00
/ *
2012-09-19 16:22:02 -05:00
* Copyright ( c ) 2 0 1 1 , T h e L i n u x F o u n d a t i o n . A l l r i g h t s r e s e r v e d .
2011-10-31 18:38:38 -05:00
* /
/* HEXAGON assembly optimized memset */
/* Replaces the standard library function memset */
.macro HEXAGON_OPT_FUNC_BEGIN name
.text
.p2align 4
.globl \ name
.type \ name, @function
\ name :
.endm
.macro HEXAGON_OPT_FUNC_FINISH name
.size \ name, . - \ n a m e
.endm
/* FUNCTION: memset (v2 version) */
# if _ _ H E X A G O N _ A R C H _ _ < 3
HEXAGON_ O P T _ F U N C _ B E G I N m e m s e t
{
r6 = #8
r7 = e x t r a c t u ( r0 , #3 , #0 )
p0 = c m p . e q ( r2 , #0 )
p1 = c m p . g t u ( r2 , #7 )
}
{
r4 = v s p l a t b ( r1 )
r8 = r0 / * l e a v e r0 i n t a c t f o r r e t u r n v a l * /
r9 = s u b ( r6 , r7 ) / * b y t e s u n t i l d o u b l e a l i g n m e n t * /
if p0 j u m p r r31 / * c o u n t = = 0 , s o r e t u r n * /
}
{
r3 = #0
r7 = #0
p0 = t s t b i t ( r9 , #0 )
if p1 j u m p 2 f / * s k i p b y t e l o o p * /
}
/* less than 8 bytes to set, so just set a byte at a time and return */
loop0 ( 1 f , r2 ) / * b y t e l o o p * /
.falign
1 : /* byte loop */
{
memb( r8 + + #1 ) = r4
} : endloop0
jumpr r31
.falign
2 : /* skip byte loop */
{
r6 = #1
p0 = t s t b i t ( r9 , #1 )
p1 = c m p . e q ( r2 , #1 )
if ! p0 j u m p 3 f / * s k i p i n i t i a l b y t e s t o r e * /
}
{
memb( r8 + + #1 ) = r4
r3 : 2 = sub( r3 : 2 , r7 : 6 )
if p1 j u m p r r31
}
.falign
3 : /* skip initial byte store */
{
r6 = #2
p0 = t s t b i t ( r9 , #2 )
p1 = c m p . e q ( r2 , #2 )
if ! p0 j u m p 4 f / * s k i p i n i t i a l h a l f s t o r e * /
}
{
memh( r8 + + #2 ) = r4
r3 : 2 = sub( r3 : 2 , r7 : 6 )
if p1 j u m p r r31
}
.falign
4 : /* skip initial half store */
{
r6 = #4
p0 = c m p . g t u ( r2 , #7 )
p1 = c m p . e q ( r2 , #4 )
if ! p0 j u m p 5 f / * s k i p i n i t i a l w o r d s t o r e * /
}
{
memw( r8 + + #4 ) = r4
r3 : 2 = sub( r3 : 2 , r7 : 6 )
p0 = c m p . g t u ( r2 , #11 )
if p1 j u m p r r31
}
.falign
5 : /* skip initial word store */
{
r1 0 = l s r ( r2 , #3 )
p1 = c m p . e q ( r3 , #1 )
if ! p0 j u m p 7 f / * s k i p d o u b l e l o o p * /
}
{
r5 = r4
r6 = #8
loop0 ( 6 f , r10 ) / * d o u b l e l o o p * /
}
/* set bytes a double word at a time */
.falign
6 : /* double loop */
{
memd( r8 + + #8 ) = r5 : 4
r3 : 2 = sub( r3 : 2 , r7 : 6 )
p1 = c m p . e q ( r2 , #8 )
} : endloop0
.falign
7 : /* skip double loop */
{
p0 = t s t b i t ( r2 , #2 )
if p1 j u m p r r31
}
{
r6 = #4
p0 = t s t b i t ( r2 , #1 )
p1 = c m p . e q ( r2 , #4 )
if ! p0 j u m p 8 f / * s k i p f i n a l w o r d s t o r e * /
}
{
memw( r8 + + #4 ) = r4
r3 : 2 = sub( r3 : 2 , r7 : 6 )
if p1 j u m p r r31
}
.falign
8 : /* skip final word store */
{
p1 = c m p . e q ( r2 , #2 )
if ! p0 j u m p 9 f / * s k i p f i n a l h a l f s t o r e * /
}
{
memh( r8 + + #2 ) = r4
if p1 j u m p r r31
}
.falign
9 : /* skip final half store */
{
memb( r8 + + #1 ) = r4
jumpr r31
}
HEXAGON_ O P T _ F U N C _ F I N I S H m e m s e t
# endif
/* FUNCTION: memset (v3 and higher version) */
# if _ _ H E X A G O N _ A R C H _ _ > = 3
HEXAGON_ O P T _ F U N C _ B E G I N m e m s e t
{
r7 =vsplatb ( r1 )
r6 = r0
if ( r2 = = #0 ) j u m p : n t . L 1
}
{
r5 : 4 = combine( r7 ,r7 )
p0 = c m p . g t u ( r2 ,#8 )
if ( p0 . n e w ) j u m p : n t . L 3
}
{
r3 = r0
loop0 ( . L 4 7 ,r2 )
}
.falign
.L47 :
{
memb( r3 + + #1 ) = r1
} : endloop0 / * s t a r t = . L 4 7 * /
jumpr r31
.L3 :
{
p0 = t s t b i t ( r0 ,#0 )
if ( ! p0 . n e w ) j u m p : n t . L 8
p1 = c m p . e q ( r2 , #1 )
}
{
r6 = a d d ( r0 , #1 )
r2 = a d d ( r2 ,#- 1 )
memb( r0 ) = r1
if ( p1 ) j u m p . L 1
}
.L8 :
{
p0 = t s t b i t ( r6 ,#1 )
if ( ! p0 . n e w ) j u m p : n t . L 1 0
}
{
r2 = a d d ( r2 ,#- 2 )
memh( r6 + + #2 ) = r7
p0 = c m p . e q ( r2 , #2 )
if ( p0 . n e w ) j u m p : n t . L 1
}
.L10 :
{
p0 = t s t b i t ( r6 ,#2 )
if ( ! p0 . n e w ) j u m p : n t . L 1 2
}
{
r2 = a d d ( r2 ,#- 4 )
memw( r6 + + #4 ) = r7
p0 = c m p . e q ( r2 , #4 )
if ( p0 . n e w ) j u m p : n t . L 1
}
.L12 :
{
p0 = c m p . g t u ( r2 ,#127 )
if ( ! p0 . n e w ) j u m p : n t . L 1 4
}
r3 = a n d ( r6 ,#31 )
if ( r3 = = #0 ) j u m p : n t . L 1 7
{
memd( r6 + + #8 ) = r5 : 4
r2 = a d d ( r2 ,#- 8 )
}
r3 = a n d ( r6 ,#31 )
if ( r3 = = #0 ) j u m p : n t . L 1 7
{
memd( r6 + + #8 ) = r5 : 4
r2 = a d d ( r2 ,#- 8 )
}
r3 = a n d ( r6 ,#31 )
if ( r3 = = #0 ) j u m p : n t . L 1 7
{
memd( r6 + + #8 ) = r5 : 4
r2 = a d d ( r2 ,#- 8 )
}
.L17 :
{
r3 = l s r ( r2 ,#5 )
if ( r1 ! = #0 ) j u m p : n t . L 1 8
}
{
r8 = r3
r3 = r6
loop0 ( . L 4 6 ,r3 )
}
.falign
.L46 :
{
dczeroa( r6 )
r6 = a d d ( r6 ,#32 )
r2 = a d d ( r2 ,#- 32 )
} : endloop0 / * s t a r t = . L 4 6 * /
.L14 :
{
p0 = c m p . g t u ( r2 ,#7 )
if ( ! p0 . n e w ) j u m p : n t . L 2 8
r8 = l s r ( r2 ,#3 )
}
loop0 ( . L 4 4 ,r8 )
.falign
.L44 :
{
memd( r6 + + #8 ) = r5 : 4
r2 = a d d ( r2 ,#- 8 )
} : endloop0 / * s t a r t = . L 4 4 * /
.L28 :
{
p0 = t s t b i t ( r2 ,#2 )
if ( ! p0 . n e w ) j u m p : n t . L 3 3
}
{
r2 = a d d ( r2 ,#- 4 )
memw( r6 + + #4 ) = r7
}
.L33 :
{
p0 = t s t b i t ( r2 ,#1 )
if ( ! p0 . n e w ) j u m p : n t . L 3 5
}
{
r2 = a d d ( r2 ,#- 2 )
memh( r6 + + #2 ) = r7
}
.L35 :
p0 = c m p . e q ( r2 ,#1 )
if ( p0 ) m e m b ( r6 ) = r1
.L1 :
jumpr r31
.L18 :
loop0 ( . L 4 5 ,r3 )
.falign
.L45 :
dczeroa( r6 )
{
memd( r6 + + #8 ) = r5 : 4
r2 = a d d ( r2 ,#- 32 )
}
memd( r6 + + #8 ) = r5 : 4
memd( r6 + + #8 ) = r5 : 4
{
memd( r6 + + #8 ) = r5 : 4
} : endloop0 / * s t a r t = . L 4 5 * /
jump . L 1 4
HEXAGON_ O P T _ F U N C _ F I N I S H m e m s e t
# endif