2015-01-21 12:27:38 +11:00
/ *
* Author : Anton B l a n c h a r d < a n t o n @au.ibm.com>
* Copyright 2 0 1 5 I B M C o r p o r a t i o n .
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or
* modify i t u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e
* as p u b l i s h e d b y t h e F r e e S o f t w a r e F o u n d a t i o n ; either version
* 2 of t h e L i c e n s e , o r ( a t y o u r o p t i o n ) a n y l a t e r v e r s i o n .
* /
# include < a s m / p p c _ a s m . h >
2016-01-13 23:33:46 -05:00
# include < a s m / e x p o r t . h >
2015-01-21 12:27:38 +11:00
# define o f f8 r6
# define o f f16 r7
# define o f f24 r8
# define r A r9
# define r B r10
# define r C r11
# define r D r27
# define r E r28
# define r F r29
# define r G r30
# define r H r31
# ifdef _ _ L I T T L E _ E N D I A N _ _
# define L D l d b r x
# else
# define L D l d x
# endif
_ GLOBAL( m e m c m p )
cmpdi c r1 ,r5 ,0
/* Use the short loop if both strings are not 8B aligned */
or r6 ,r3 ,r4
andi. r6 ,r6 ,7
/* Use the short loop if length is less than 32B */
cmpdi c r6 ,r5 ,3 1
beq c r1 ,. L z e r o
bne . L s h o r t
bgt c r6 ,. L l o n g
.Lshort :
mtctr r5
1 : lbz r A ,0 ( r3 )
lbz r B ,0 ( r4 )
subf. r C ,r B ,r A
bne . L n o n _ z e r o
bdz . L z e r o
lbz r A ,1 ( r3 )
lbz r B ,1 ( r4 )
subf. r C ,r B ,r A
bne . L n o n _ z e r o
bdz . L z e r o
lbz r A ,2 ( r3 )
lbz r B ,2 ( r4 )
subf. r C ,r B ,r A
bne . L n o n _ z e r o
bdz . L z e r o
lbz r A ,3 ( r3 )
lbz r B ,3 ( r4 )
subf. r C ,r B ,r A
bne . L n o n _ z e r o
addi r3 ,r3 ,4
addi r4 ,r4 ,4
bdnz 1 b
.Lzero :
li r3 ,0
blr
.Lnon_zero :
mr r3 ,r C
blr
.Llong :
li o f f8 ,8
li o f f16 ,1 6
li o f f24 ,2 4
std r31 ,- 8 ( r1 )
std r30 ,- 1 6 ( r1 )
std r29 ,- 2 4 ( r1 )
std r28 ,- 3 2 ( r1 )
std r27 ,- 4 0 ( r1 )
srdi r0 ,r5 ,5
mtctr r0
andi. r5 ,r5 ,3 1
LD r A ,0 ,r3
LD r B ,0 ,r4
LD r C ,o f f8 ,r3
LD r D ,o f f8 ,r4
LD r E ,o f f16 ,r3
LD r F ,o f f16 ,r4
LD r G ,o f f24 ,r3
LD r H ,o f f24 ,r4
cmpld c r0 ,r A ,r B
addi r3 ,r3 ,3 2
addi r4 ,r4 ,3 2
bdz . L f i r s t 3 2
LD r A ,0 ,r3
LD r B ,0 ,r4
cmpld c r1 ,r C ,r D
LD r C ,o f f8 ,r3
LD r D ,o f f8 ,r4
cmpld c r6 ,r E ,r F
LD r E ,o f f16 ,r3
LD r F ,o f f16 ,r4
cmpld c r7 ,r G ,r H
bne c r0 ,. L c m p A B
LD r G ,o f f24 ,r3
LD r H ,o f f24 ,r4
cmpld c r0 ,r A ,r B
bne c r1 ,. L c m p C D
addi r3 ,r3 ,3 2
addi r4 ,r4 ,3 2
bdz . L s e c o n d32
.balign 16
1 : LD r A ,0 ,r3
LD r B ,0 ,r4
cmpld c r1 ,r C ,r D
bne c r6 ,. L c m p E F
LD r C ,o f f8 ,r3
LD r D ,o f f8 ,r4
cmpld c r6 ,r E ,r F
bne c r7 ,. L c m p G H
LD r E ,o f f16 ,r3
LD r F ,o f f16 ,r4
cmpld c r7 ,r G ,r H
bne c r0 ,. L c m p A B
LD r G ,o f f24 ,r3
LD r H ,o f f24 ,r4
cmpld c r0 ,r A ,r B
bne c r1 ,. L c m p C D
addi r3 ,r3 ,3 2
addi r4 ,r4 ,3 2
bdnz 1 b
.Lsecond32 :
cmpld c r1 ,r C ,r D
bne c r6 ,. L c m p E F
cmpld c r6 ,r E ,r F
bne c r7 ,. L c m p G H
cmpld c r7 ,r G ,r H
bne c r0 ,. L c m p A B
bne c r1 ,. L c m p C D
bne c r6 ,. L c m p E F
bne c r7 ,. L c m p G H
.Ltail :
ld r31 ,- 8 ( r1 )
ld r30 ,- 1 6 ( r1 )
ld r29 ,- 2 4 ( r1 )
ld r28 ,- 3 2 ( r1 )
ld r27 ,- 4 0 ( r1 )
cmpdi r5 ,0
beq . L z e r o
b . L s h o r t
.Lfirst32 :
cmpld c r1 ,r C ,r D
cmpld c r6 ,r E ,r F
cmpld c r7 ,r G ,r H
bne c r0 ,. L c m p A B
bne c r1 ,. L c m p C D
bne c r6 ,. L c m p E F
bne c r7 ,. L c m p G H
b . L t a i l
.LcmpAB :
li r3 ,1
bgt c r0 ,. L o u t
li r3 ,- 1
b . L o u t
.LcmpCD :
li r3 ,1
bgt c r1 ,. L o u t
li r3 ,- 1
b . L o u t
.LcmpEF :
li r3 ,1
bgt c r6 ,. L o u t
li r3 ,- 1
b . L o u t
.LcmpGH :
li r3 ,1
bgt c r7 ,. L o u t
li r3 ,- 1
.Lout :
ld r31 ,- 8 ( r1 )
ld r30 ,- 1 6 ( r1 )
ld r29 ,- 2 4 ( r1 )
ld r28 ,- 3 2 ( r1 )
ld r27 ,- 4 0 ( r1 )
blr
2016-01-13 23:33:46 -05:00
EXPORT_ S Y M B O L ( m e m c m p )