2019-05-27 08:55:01 +02:00
/* SPDX-License-Identifier: GPL-2.0-or-later */
2015-02-24 20:36:40 +01:00
/ *
* Fast S H A - 1 i m p l e m e n t a t i o n f o r S P E i n s t r u c t i o n s e t ( P P C )
*
* This c o d e m a k e s u s e o f t h e S P E S I M D i n s t r u c t i o n s e t a s d e f i n e d i n
* http : / / cache. f r e e s c a l e . c o m / f i l e s / 3 2 b i t / d o c / r e f _ m a n u a l / S P E P I M . p d f
* Implementation i s b a s e d o n o p t i m i z a t i o n g u i d e n o t e s f r o m
* http : / / cache. f r e e s c a l e . c o m / f i l e s / 3 2 b i t / d o c / a p p _ n o t e / A N 2 6 6 5 . p d f
*
* Copyright ( c ) 2 0 1 5 M a r k u s S t o c k h a u s e n < s t o c k h a u s e n @collogia.de>
* /
# include < a s m / p p c _ a s m . h >
# include < a s m / a s m - o f f s e t s . h >
# define r H P r3 / * p o i n t e r t o h a s h v a l u e * /
# define r W P r4 / * p o i n t e r t o i n p u t * /
# define r K P r5 / * p o i n t e r t o c o n s t a n t s * /
# define r W 0 r14 / * 6 4 b i t r o u n d w o r d s * /
# define r W 1 r15
# define r W 2 r16
# define r W 3 r17
# define r W 4 r18
# define r W 5 r19
# define r W 6 r20
# define r W 7 r21
# define r H 0 r6 / * 3 2 b i t h a s h v a l u e s * /
# define r H 1 r7
# define r H 2 r8
# define r H 3 r9
# define r H 4 r10
# define r T 0 r22 / * 6 4 b i t t e m p o r a r y * /
# define r T 1 r0 / * 3 2 b i t t e m p o r a r i e s * /
# define r T 2 r11
# define r T 3 r12
# define r K r23 / * 6 4 b i t c o n s t a n t i n v o l a t i l e r e g i s t e r * /
# define L O A D _ K 0 1
# define L O A D _ K 1 1 \
evlwwsplat r K ,0 ( r K P ) ;
# define L O A D _ K 2 1 \
evlwwsplat r K ,4 ( r K P ) ;
# define L O A D _ K 3 1 \
evlwwsplat r K ,8 ( r K P ) ;
# define L O A D _ K 4 1 \
evlwwsplat r K ,1 2 ( r K P ) ;
# define I N I T I A L I Z E \
stwu r1 ,- 1 2 8 ( r1 ) ; /* create stack frame */ \
evstdw r14 ,8 ( r1 ) ; /* We must save non volatile */ \
evstdw r15 ,1 6 ( r1 ) ; /* registers. Take the chance */ \
evstdw r16 ,2 4 ( r1 ) ; /* and save the SPE part too */ \
evstdw r17 ,3 2 ( r1 ) ; \
evstdw r18 ,4 0 ( r1 ) ; \
evstdw r19 ,4 8 ( r1 ) ; \
evstdw r20 ,5 6 ( r1 ) ; \
evstdw r21 ,6 4 ( r1 ) ; \
evstdw r22 ,7 2 ( r1 ) ; \
evstdw r23 ,8 0 ( r1 ) ;
# define F I N A L I Z E \
evldw r14 ,8 ( r1 ) ; /* restore SPE registers */ \
evldw r15 ,1 6 ( r1 ) ; \
evldw r16 ,2 4 ( r1 ) ; \
evldw r17 ,3 2 ( r1 ) ; \
evldw r18 ,4 0 ( r1 ) ; \
evldw r19 ,4 8 ( r1 ) ; \
evldw r20 ,5 6 ( r1 ) ; \
evldw r21 ,6 4 ( r1 ) ; \
evldw r22 ,7 2 ( r1 ) ; \
evldw r23 ,8 0 ( r1 ) ; \
xor r0 ,r0 ,r0 ; \
stw r0 ,8 ( r1 ) ; /* Delete sensitive data */ \
stw r0 ,1 6 ( r1 ) ; /* that we might have pushed */ \
stw r0 ,2 4 ( r1 ) ; /* from other context that runs */ \
stw r0 ,3 2 ( r1 ) ; /* the same code. Assume that */ \
stw r0 ,4 0 ( r1 ) ; /* the lower part of the GPRs */ \
stw r0 ,4 8 ( r1 ) ; /* were already overwritten on */ \
stw r0 ,5 6 ( r1 ) ; /* the way down to here */ \
stw r0 ,6 4 ( r1 ) ; \
stw r0 ,7 2 ( r1 ) ; \
stw r0 ,8 0 ( r1 ) ; \
addi r1 ,r1 ,1 2 8 ; /* cleanup stack frame */
# ifdef _ _ B I G _ E N D I A N _ _
# define L O A D _ D A T A ( r e g , o f f ) \
lwz r e g ,o f f ( r W P ) ; /* load data */
# define N E X T _ B L O C K \
addi r W P ,r W P ,6 4 ; /* increment per block */
# else
# define L O A D _ D A T A ( r e g , o f f ) \
lwbrx r e g ,0 ,r W P ; /* load data */ \
addi r W P ,r W P ,4 ; /* increment per word */
# define N E X T _ B L O C K / * n o t h i n g t o d o * /
# endif
# define R _ 0 0 _ 1 5 ( a , b , c , d , e , w0 , w1 , k , o f f ) \
LOAD_ D A T A ( w0 , o f f ) / * 1 : W * / \
and r T 2 ,b ,c ; /* 1: F' = B and C */ \
LOAD_ K ## k # # 1 \
andc r T 1 ,d ,b ; /* 1: F" = ~B and D */ \
rotrwi r T 0 ,a ,2 7 ; /* 1: A' = A rotl 5 */ \
or r T 2 ,r T 2 ,r T 1 ; /* 1: F = F' or F" */ \
add e ,e ,r T 0 ; /* 1: E = E + A' */ \
rotrwi b ,b ,2 ; /* 1: B = B rotl 30 */ \
add e ,e ,w0 ; /* 1: E = E + W */ \
LOAD_ D A T A ( w1 , o f f + 4 ) / * 2 : W * / \
add e ,e ,r T 2 ; /* 1: E = E + F */ \
and r T 1 ,a ,b ; /* 2: F' = B and C */ \
add e ,e ,r K ; /* 1: E = E + K */ \
andc r T 2 ,c ,a ; /* 2: F" = ~B and D */ \
add d ,d ,r K ; /* 2: E = E + K */ \
or r T 2 ,r T 2 ,r T 1 ; /* 2: F = F' or F" */ \
rotrwi r T 0 ,e ,2 7 ; /* 2: A' = A rotl 5 */ \
add d ,d ,w1 ; /* 2: E = E + W */ \
rotrwi a ,a ,2 ; /* 2: B = B rotl 30 */ \
add d ,d ,r T 0 ; /* 2: E = E + A' */ \
evmergelo w1 ,w1 ,w0 ; /* mix W[0]/W[1] */ \
add d ,d ,r T 2 / * 2 : E = E + F * /
# define R _ 1 6 _ 1 9 ( a , b , c , d , e , w0 , w1 , w4 , w6 , w7 , k ) \
and r T 2 ,b ,c ; /* 1: F' = B and C */ \
evmergelohi r T 0 ,w7 ,w6 ; /* W[-3] */ \
andc r T 1 ,d ,b ; /* 1: F" = ~B and D */ \
evxor w0 ,w0 ,r T 0 ; /* W = W[-16] xor W[-3] */ \
or r T 1 ,r T 1 ,r T 2 ; /* 1: F = F' or F" */ \
evxor w0 ,w0 ,w4 ; /* W = W xor W[-8] */ \
add e ,e ,r T 1 ; /* 1: E = E + F */ \
evxor w0 ,w0 ,w1 ; /* W = W xor W[-14] */ \
rotrwi r T 2 ,a ,2 7 ; /* 1: A' = A rotl 5 */ \
evrlwi w0 ,w0 ,1 ; /* W = W rotl 1 */ \
add e ,e ,r T 2 ; /* 1: E = E + A' */ \
evaddw r T 0 ,w0 ,r K ; /* WK = W + K */ \
rotrwi b ,b ,2 ; /* 1: B = B rotl 30 */ \
LOAD_ K ## k # # 1 \
evmergehi r T 1 ,r T 1 ,r T 0 ; /* WK1/WK2 */ \
add e ,e ,r T 0 ; /* 1: E = E + WK */ \
add d ,d ,r T 1 ; /* 2: E = E + WK */ \
and r T 2 ,a ,b ; /* 2: F' = B and C */ \
andc r T 1 ,c ,a ; /* 2: F" = ~B and D */ \
rotrwi r T 0 ,e ,2 7 ; /* 2: A' = A rotl 5 */ \
or r T 1 ,r T 1 ,r T 2 ; /* 2: F = F' or F" */ \
add d ,d ,r T 0 ; /* 2: E = E + A' */ \
rotrwi a ,a ,2 ; /* 2: B = B rotl 30 */ \
add d ,d ,r T 1 / * 2 : E = E + F * /
# define R _ 2 0 _ 3 9 ( a , b , c , d , e , w0 , w1 , w4 , w6 , w7 , k ) \
evmergelohi r T 0 ,w7 ,w6 ; /* W[-3] */ \
xor r T 2 ,b ,c ; /* 1: F' = B xor C */ \
evxor w0 ,w0 ,r T 0 ; /* W = W[-16] xor W[-3] */ \
xor r T 2 ,r T 2 ,d ; /* 1: F = F' xor D */ \
evxor w0 ,w0 ,w4 ; /* W = W xor W[-8] */ \
add e ,e ,r T 2 ; /* 1: E = E + F */ \
evxor w0 ,w0 ,w1 ; /* W = W xor W[-14] */ \
rotrwi r T 2 ,a ,2 7 ; /* 1: A' = A rotl 5 */ \
evrlwi w0 ,w0 ,1 ; /* W = W rotl 1 */ \
add e ,e ,r T 2 ; /* 1: E = E + A' */ \
evaddw r T 0 ,w0 ,r K ; /* WK = W + K */ \
rotrwi b ,b ,2 ; /* 1: B = B rotl 30 */ \
LOAD_ K ## k # # 1 \
evmergehi r T 1 ,r T 1 ,r T 0 ; /* WK1/WK2 */ \
add e ,e ,r T 0 ; /* 1: E = E + WK */ \
xor r T 2 ,a ,b ; /* 2: F' = B xor C */ \
add d ,d ,r T 1 ; /* 2: E = E + WK */ \
xor r T 2 ,r T 2 ,c ; /* 2: F = F' xor D */ \
rotrwi r T 0 ,e ,2 7 ; /* 2: A' = A rotl 5 */ \
add d ,d ,r T 2 ; /* 2: E = E + F */ \
rotrwi a ,a ,2 ; /* 2: B = B rotl 30 */ \
add d ,d ,r T 0 / * 2 : E = E + A ' * /
# define R _ 4 0 _ 5 9 ( a , b , c , d , e , w0 , w1 , w4 , w6 , w7 , k ) \
and r T 2 ,b ,c ; /* 1: F' = B and C */ \
evmergelohi r T 0 ,w7 ,w6 ; /* W[-3] */ \
or r T 1 ,b ,c ; /* 1: F" = B or C */ \
evxor w0 ,w0 ,r T 0 ; /* W = W[-16] xor W[-3] */ \
and r T 1 ,d ,r T 1 ; /* 1: F" = F" and D */ \
evxor w0 ,w0 ,w4 ; /* W = W xor W[-8] */ \
or r T 2 ,r T 2 ,r T 1 ; /* 1: F = F' or F" */ \
evxor w0 ,w0 ,w1 ; /* W = W xor W[-14] */ \
add e ,e ,r T 2 ; /* 1: E = E + F */ \
evrlwi w0 ,w0 ,1 ; /* W = W rotl 1 */ \
rotrwi r T 2 ,a ,2 7 ; /* 1: A' = A rotl 5 */ \
evaddw r T 0 ,w0 ,r K ; /* WK = W + K */ \
add e ,e ,r T 2 ; /* 1: E = E + A' */ \
LOAD_ K ## k # # 1 \
evmergehi r T 1 ,r T 1 ,r T 0 ; /* WK1/WK2 */ \
rotrwi b ,b ,2 ; /* 1: B = B rotl 30 */ \
add e ,e ,r T 0 ; /* 1: E = E + WK */ \
and r T 2 ,a ,b ; /* 2: F' = B and C */ \
or r T 0 ,a ,b ; /* 2: F" = B or C */ \
add d ,d ,r T 1 ; /* 2: E = E + WK */ \
and r T 0 ,c ,r T 0 ; /* 2: F" = F" and D */ \
rotrwi a ,a ,2 ; /* 2: B = B rotl 30 */ \
or r T 2 ,r T 2 ,r T 0 ; /* 2: F = F' or F" */ \
rotrwi r T 0 ,e ,2 7 ; /* 2: A' = A rotl 5 */ \
add d ,d ,r T 2 ; /* 2: E = E + F */ \
add d ,d ,r T 0 / * 2 : E = E + A ' * /
# define R _ 6 0 _ 7 9 ( a , b , c , d , e , w0 , w1 , w4 , w6 , w7 , k ) \
R_ 2 0 _ 3 9 ( a , b , c , d , e , w0 , w1 , w4 , w6 , w7 , k )
_ GLOBAL( p p c _ s p e _ s h a1 _ t r a n s f o r m )
INITIALIZE
lwz r H 0 ,0 ( r H P )
lwz r H 1 ,4 ( r H P )
mtctr r5
lwz r H 2 ,8 ( r H P )
lis r K P ,P P C _ S P E _ S H A 1 _ K @h
lwz r H 3 ,1 2 ( r H P )
ori r K P ,r K P ,P P C _ S P E _ S H A 1 _ K @l
lwz r H 4 ,1 6 ( r H P )
ppc_spe_sha1_main :
R_ 0 0 _ 1 5 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 1 , r W 0 , 1 , 0 )
R_ 0 0 _ 1 5 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r W 2 , r W 1 , 0 , 8 )
R_ 0 0 _ 1 5 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r W 3 , r W 2 , 0 , 1 6 )
R_ 0 0 _ 1 5 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 4 , r W 3 , 0 , 2 4 )
R_ 0 0 _ 1 5 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 5 , r W 4 , 0 , 3 2 )
R_ 0 0 _ 1 5 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 6 , r W 5 , 0 , 4 0 )
R_ 0 0 _ 1 5 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r T 3 , r W 6 , 0 , 4 8 )
R_ 0 0 _ 1 5 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r T 3 , r W 7 , 0 , 5 6 )
R_ 1 6 _ 1 9 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 0 , r W 1 , r W 4 , r W 6 , r W 7 , 0 )
R_ 1 6 _ 1 9 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 1 , r W 2 , r W 5 , r W 7 , r W 0 , 2 )
R_ 2 0 _ 3 9 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 2 , r W 3 , r W 6 , r W 0 , r W 1 , 0 )
R_ 2 0 _ 3 9 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r W 3 , r W 4 , r W 7 , r W 1 , r W 2 , 0 )
R_ 2 0 _ 3 9 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r W 4 , r W 5 , r W 0 , r W 2 , r W 3 , 0 )
R_ 2 0 _ 3 9 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 5 , r W 6 , r W 1 , r W 3 , r W 4 , 0 )
R_ 2 0 _ 3 9 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 6 , r W 7 , r W 2 , r W 4 , r W 5 , 0 )
R_ 2 0 _ 3 9 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 7 , r W 0 , r W 3 , r W 5 , r W 6 , 0 )
R_ 2 0 _ 3 9 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r W 0 , r W 1 , r W 4 , r W 6 , r W 7 , 0 )
R_ 2 0 _ 3 9 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r W 1 , r W 2 , r W 5 , r W 7 , r W 0 , 0 )
R_ 2 0 _ 3 9 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 2 , r W 3 , r W 6 , r W 0 , r W 1 , 0 )
R_ 2 0 _ 3 9 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 3 , r W 4 , r W 7 , r W 1 , r W 2 , 3 )
R_ 4 0 _ 5 9 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 4 , r W 5 , r W 0 , r W 2 , r W 3 , 0 )
R_ 4 0 _ 5 9 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r W 5 , r W 6 , r W 1 , r W 3 , r W 4 , 0 )
R_ 4 0 _ 5 9 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r W 6 , r W 7 , r W 2 , r W 4 , r W 5 , 0 )
R_ 4 0 _ 5 9 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 7 , r W 0 , r W 3 , r W 5 , r W 6 , 0 )
R_ 4 0 _ 5 9 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 0 , r W 1 , r W 4 , r W 6 , r W 7 , 0 )
R_ 4 0 _ 5 9 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 1 , r W 2 , r W 5 , r W 7 , r W 0 , 0 )
R_ 4 0 _ 5 9 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r W 2 , r W 3 , r W 6 , r W 0 , r W 1 , 0 )
R_ 4 0 _ 5 9 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r W 3 , r W 4 , r W 7 , r W 1 , r W 2 , 0 )
R_ 4 0 _ 5 9 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 4 , r W 5 , r W 0 , r W 2 , r W 3 , 0 )
R_ 4 0 _ 5 9 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 5 , r W 6 , r W 1 , r W 3 , r W 4 , 4 )
R_ 6 0 _ 7 9 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 6 , r W 7 , r W 2 , r W 4 , r W 5 , 0 )
R_ 6 0 _ 7 9 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r W 7 , r W 0 , r W 3 , r W 5 , r W 6 , 0 )
R_ 6 0 _ 7 9 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r W 0 , r W 1 , r W 4 , r W 6 , r W 7 , 0 )
R_ 6 0 _ 7 9 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 1 , r W 2 , r W 5 , r W 7 , r W 0 , 0 )
R_ 6 0 _ 7 9 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 2 , r W 3 , r W 6 , r W 0 , r W 1 , 0 )
R_ 6 0 _ 7 9 ( r H 0 , r H 1 , r H 2 , r H 3 , r H 4 , r W 3 , r W 4 , r W 7 , r W 1 , r W 2 , 0 )
R_ 6 0 _ 7 9 ( r H 3 , r H 4 , r H 0 , r H 1 , r H 2 , r W 4 , r W 5 , r W 0 , r W 2 , r W 3 , 0 )
lwz r T 3 ,0 ( r H P )
R_ 6 0 _ 7 9 ( r H 1 , r H 2 , r H 3 , r H 4 , r H 0 , r W 5 , r W 6 , r W 1 , r W 3 , r W 4 , 0 )
lwz r W 1 ,4 ( r H P )
R_ 6 0 _ 7 9 ( r H 4 , r H 0 , r H 1 , r H 2 , r H 3 , r W 6 , r W 7 , r W 2 , r W 4 , r W 5 , 0 )
lwz r W 2 ,8 ( r H P )
R_ 6 0 _ 7 9 ( r H 2 , r H 3 , r H 4 , r H 0 , r H 1 , r W 7 , r W 0 , r W 3 , r W 5 , r W 6 , 0 )
lwz r W 3 ,1 2 ( r H P )
NEXT_ B L O C K
lwz r W 4 ,1 6 ( r H P )
add r H 0 ,r H 0 ,r T 3
stw r H 0 ,0 ( r H P )
add r H 1 ,r H 1 ,r W 1
stw r H 1 ,4 ( r H P )
add r H 2 ,r H 2 ,r W 2
stw r H 2 ,8 ( r H P )
add r H 3 ,r H 3 ,r W 3
stw r H 3 ,1 2 ( r H P )
add r H 4 ,r H 4 ,r W 4
stw r H 4 ,1 6 ( r H P )
bdnz p p c _ s p e _ s h a1 _ m a i n
FINALIZE
blr
.data
.align 4
PPC_SPE_SHA1_K :
.long 0 x5 A 8 2 7 9 9 9 ,0 x6 E D 9 E B A 1 ,0 x8 F 1 B B C D C ,0 x C A 6 2 C 1 D 6