2017-01-11 19:41:49 +03:00
/ *
2018-12-04 06:52:51 +03:00
* ChaCha/ X C h a C h a N E O N h e l p e r f u n c t i o n s
2017-01-11 19:41:49 +03:00
*
2018-12-04 16:13:33 +03:00
* Copyright ( C ) 2 0 1 6 - 2 0 1 8 L i n a r o , L t d . < a r d . b i e s h e u v e l @linaro.org>
2017-01-11 19:41:49 +03:00
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s
* published b y t h e F r e e S o f t w a r e F o u n d a t i o n .
*
2018-12-04 16:13:33 +03:00
* Originally b a s e d o n :
2017-01-11 19:41:49 +03:00
* ChaCha2 0 2 5 6 - b i t c i p h e r a l g o r i t h m , R F C 7 5 3 9 , x64 S S S E 3 f u n c t i o n s
*
* Copyright ( C ) 2 0 1 5 M a r t i n W i l l i
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
* it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e a s p u b l i s h e d b y
* the F r e e S o f t w a r e F o u n d a t i o n ; either version 2 of the License, or
* ( at y o u r o p t i o n ) a n y l a t e r v e r s i o n .
* /
# include < l i n u x / l i n k a g e . h >
2018-12-04 16:13:32 +03:00
# include < a s m / a s s e m b l e r . h >
# include < a s m / c a c h e . h >
2017-01-11 19:41:49 +03:00
.text
.align 6
2018-12-04 06:52:50 +03:00
/ *
2018-12-04 06:52:51 +03:00
* chacha_ p e r m u t e - p e r m u t e o n e b l o c k
2018-12-04 06:52:50 +03:00
*
* Permute o n e 6 4 - b y t e b l o c k w h e r e t h e s t a t e m a t r i x i s s t o r e d i n t h e f o u r N E O N
* registers v0 - v3 . I t p e r f o r m s m a t r i x o p e r a t i o n s o n f o u r w o r d s i n p a r a l l e l ,
* but r e q u i r e s s h u f f l i n g t o r e a r r a n g e t h e w o r d s a f t e r e a c h r o u n d .
*
2018-12-04 06:52:51 +03:00
* The r o u n d c o u n t i s g i v e n i n w3 .
*
* Clobbers : w3 , x10 , v4 , v12
2018-12-04 06:52:50 +03:00
* /
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T _ L O C A L ( c h a c h a _ p e r m u t e )
2017-01-11 19:41:49 +03:00
2018-12-04 16:13:32 +03:00
adr_ l x10 , R O T 8
2018-12-04 06:52:50 +03:00
ld1 { v12 . 4 s } , [ x10 ]
2017-01-11 19:41:49 +03:00
.Ldoubleround :
/ / x0 + = x1 , x3 = r o t l 3 2 ( x3 ^ x0 , 1 6 )
add v0 . 4 s , v0 . 4 s , v1 . 4 s
eor v3 . 1 6 b , v3 . 1 6 b , v0 . 1 6 b
rev3 2 v3 . 8 h , v3 . 8 h
/ / x2 + = x3 , x1 = r o t l 3 2 ( x1 ^ x2 , 1 2 )
add v2 . 4 s , v2 . 4 s , v3 . 4 s
eor v4 . 1 6 b , v1 . 1 6 b , v2 . 1 6 b
shl v1 . 4 s , v4 . 4 s , #12
sri v1 . 4 s , v4 . 4 s , #20
/ / x0 + = x1 , x3 = r o t l 3 2 ( x3 ^ x0 , 8 )
add v0 . 4 s , v0 . 4 s , v1 . 4 s
eor v3 . 1 6 b , v3 . 1 6 b , v0 . 1 6 b
tbl v3 . 1 6 b , { v3 . 1 6 b } , v12 . 1 6 b
/ / x2 + = x3 , x1 = r o t l 3 2 ( x1 ^ x2 , 7 )
add v2 . 4 s , v2 . 4 s , v3 . 4 s
eor v4 . 1 6 b , v1 . 1 6 b , v2 . 1 6 b
shl v1 . 4 s , v4 . 4 s , #7
sri v1 . 4 s , v4 . 4 s , #25
/ / x1 = s h u f f l e 3 2 ( x1 , M A S K ( 0 , 3 , 2 , 1 ) )
ext v1 . 1 6 b , v1 . 1 6 b , v1 . 1 6 b , #4
/ / x2 = s h u f f l e 3 2 ( x2 , M A S K ( 1 , 0 , 3 , 2 ) )
ext v2 . 1 6 b , v2 . 1 6 b , v2 . 1 6 b , #8
/ / x3 = s h u f f l e 3 2 ( x3 , M A S K ( 2 , 1 , 0 , 3 ) )
ext v3 . 1 6 b , v3 . 1 6 b , v3 . 1 6 b , #12
/ / x0 + = x1 , x3 = r o t l 3 2 ( x3 ^ x0 , 1 6 )
add v0 . 4 s , v0 . 4 s , v1 . 4 s
eor v3 . 1 6 b , v3 . 1 6 b , v0 . 1 6 b
rev3 2 v3 . 8 h , v3 . 8 h
/ / x2 + = x3 , x1 = r o t l 3 2 ( x1 ^ x2 , 1 2 )
add v2 . 4 s , v2 . 4 s , v3 . 4 s
eor v4 . 1 6 b , v1 . 1 6 b , v2 . 1 6 b
shl v1 . 4 s , v4 . 4 s , #12
sri v1 . 4 s , v4 . 4 s , #20
/ / x0 + = x1 , x3 = r o t l 3 2 ( x3 ^ x0 , 8 )
add v0 . 4 s , v0 . 4 s , v1 . 4 s
eor v3 . 1 6 b , v3 . 1 6 b , v0 . 1 6 b
tbl v3 . 1 6 b , { v3 . 1 6 b } , v12 . 1 6 b
/ / x2 + = x3 , x1 = r o t l 3 2 ( x1 ^ x2 , 7 )
add v2 . 4 s , v2 . 4 s , v3 . 4 s
eor v4 . 1 6 b , v1 . 1 6 b , v2 . 1 6 b
shl v1 . 4 s , v4 . 4 s , #7
sri v1 . 4 s , v4 . 4 s , #25
/ / x1 = s h u f f l e 3 2 ( x1 , M A S K ( 2 , 1 , 0 , 3 ) )
ext v1 . 1 6 b , v1 . 1 6 b , v1 . 1 6 b , #12
/ / x2 = s h u f f l e 3 2 ( x2 , M A S K ( 1 , 0 , 3 , 2 ) )
ext v2 . 1 6 b , v2 . 1 6 b , v2 . 1 6 b , #8
/ / x3 = s h u f f l e 3 2 ( x3 , M A S K ( 0 , 3 , 2 , 1 ) )
ext v3 . 1 6 b , v3 . 1 6 b , v3 . 1 6 b , #4
2018-12-04 06:52:51 +03:00
subs w3 , w3 , #2
2017-01-11 19:41:49 +03:00
b. n e . L d o u b l e r o u n d
2018-12-04 06:52:50 +03:00
ret
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( c h a c h a _ p e r m u t e )
2018-12-04 06:52:50 +03:00
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T ( c h a c h a _ b l o c k _ x o r _ n e o n )
2018-12-04 06:52:50 +03:00
/ / x0 : Input s t a t e m a t r i x , s
/ / x1 : 1 data b l o c k o u t p u t , o
/ / x2 : 1 data b l o c k i n p u t , i
2018-12-04 06:52:51 +03:00
/ / w3 : nrounds
2018-12-04 06:52:50 +03:00
stp x29 , x30 , [ s p , #- 16 ] !
mov x29 , s p
/ / x0 . . 3 = s0 . . 3
ld1 { v0 . 4 s - v3 . 4 s } , [ x0 ]
ld1 { v8 . 4 s - v11 . 4 s } , [ x0 ]
2018-12-04 06:52:51 +03:00
bl c h a c h a _ p e r m u t e
2018-12-04 06:52:50 +03:00
2017-01-11 19:41:49 +03:00
ld1 { v4 . 1 6 b - v7 . 1 6 b } , [ x2 ]
/ / o0 = i 0 ^ ( x0 + s0 )
add v0 . 4 s , v0 . 4 s , v8 . 4 s
eor v0 . 1 6 b , v0 . 1 6 b , v4 . 1 6 b
/ / o1 = i 1 ^ ( x1 + s1 )
add v1 . 4 s , v1 . 4 s , v9 . 4 s
eor v1 . 1 6 b , v1 . 1 6 b , v5 . 1 6 b
/ / o2 = i 2 ^ ( x2 + s2 )
add v2 . 4 s , v2 . 4 s , v10 . 4 s
eor v2 . 1 6 b , v2 . 1 6 b , v6 . 1 6 b
/ / o3 = i 3 ^ ( x3 + s3 )
add v3 . 4 s , v3 . 4 s , v11 . 4 s
eor v3 . 1 6 b , v3 . 1 6 b , v7 . 1 6 b
st1 { v0 . 1 6 b - v3 . 1 6 b } , [ x1 ]
2018-12-04 06:52:50 +03:00
ldp x29 , x30 , [ s p ] , #16
2017-01-11 19:41:49 +03:00
ret
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( c h a c h a _ b l o c k _ x o r _ n e o n )
2017-01-11 19:41:49 +03:00
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T ( h c h a c h a _ b l o c k _ n e o n )
2018-12-04 06:52:50 +03:00
/ / x0 : Input s t a t e m a t r i x , s
/ / x1 : output ( 8 3 2 - b i t w o r d s )
2018-12-04 06:52:51 +03:00
/ / w2 : nrounds
2018-12-04 06:52:50 +03:00
stp x29 , x30 , [ s p , #- 16 ] !
mov x29 , s p
ld1 { v0 . 4 s - v3 . 4 s } , [ x0 ]
2018-12-04 06:52:51 +03:00
mov w3 , w2
bl c h a c h a _ p e r m u t e
2018-12-04 06:52:50 +03:00
2019-02-23 09:54:08 +03:00
st1 { v0 . 4 s } , [ x1 ] , #16
st1 { v3 . 4 s } , [ x1 ]
2018-12-04 06:52:50 +03:00
ldp x29 , x30 , [ s p ] , #16
ret
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( h c h a c h a _ b l o c k _ n e o n )
2018-12-04 06:52:50 +03:00
2018-12-04 16:13:33 +03:00
a0 . r e q w12
a1 . r e q w13
a2 . r e q w14
a3 . r e q w15
a4 . r e q w16
a5 . r e q w17
a6 . r e q w19
a7 . r e q w20
a8 . r e q w21
a9 . r e q w22
a1 0 . r e q w23
a1 1 . r e q w24
a1 2 . r e q w25
a1 3 . r e q w26
a1 4 . r e q w27
a1 5 . r e q w28
2017-01-11 19:41:49 +03:00
.align 6
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T ( c h a c h a _ 4 b l o c k _ x o r _ n e o n )
2018-12-04 16:13:33 +03:00
frame_ p u s h 1 0
2017-01-11 19:41:49 +03:00
/ / x0 : Input s t a t e m a t r i x , s
/ / x1 : 4 data b l o c k s o u t p u t , o
/ / x2 : 4 data b l o c k s i n p u t , i
2018-12-04 06:52:51 +03:00
/ / w3 : nrounds
2018-12-04 16:13:32 +03:00
/ / x4 : byte c o u n t
adr_ l x10 , . L p e r m u t e
and x5 , x4 , #63
add x10 , x10 , x5
add x11 , x10 , #64
2017-01-11 19:41:49 +03:00
/ /
2018-12-04 06:52:51 +03:00
/ / This f u n c t i o n e n c r y p t s f o u r c o n s e c u t i v e C h a C h a b l o c k s b y l o a d i n g
2017-01-11 19:41:49 +03:00
/ / the s t a t e m a t r i x i n N E O N r e g i s t e r s f o u r t i m e s . T h e a l g o r i t h m p e r f o r m s
/ / each o p e r a t i o n o n t h e c o r r e s p o n d i n g w o r d o f e a c h s t a t e m a t r i x , h e n c e
/ / requires n o w o r d s h u f f l i n g . F o r f i n a l X O R i n g s t e p w e t r a n s p o s e t h e
/ / matrix b y i n t e r l e a v i n g 3 2 - a n d t h e n 6 4 - b i t w o r d s , w h i c h a l l o w s u s t o
/ / do X O R i n N E O N r e g i s t e r s .
/ /
2018-12-04 16:13:33 +03:00
/ / At t h e s a m e t i m e , a f i f t h b l o c k i s e n c r y p t e d i n p a r a l l e l u s i n g
/ / scalar r e g i s t e r s
/ /
2018-12-04 16:13:32 +03:00
adr_ l x9 , C T R I N C / / . . . a n d R O T 8
2018-12-04 06:52:51 +03:00
ld1 { v30 . 4 s - v31 . 4 s } , [ x9 ]
2017-01-11 19:41:49 +03:00
/ / x0 . . 1 5 [ 0 - 3 ] = s0 . . 3 [ 0 . . 3 ]
2018-12-04 16:13:32 +03:00
add x8 , x0 , #16
ld4 r { v0 . 4 s - v3 . 4 s } , [ x0 ]
ld4 r { v4 . 4 s - v7 . 4 s } , [ x8 ] , #16
ld4 r { v8 . 4 s - v11 . 4 s } , [ x8 ] , #16
ld4 r { v12 . 4 s - v15 . 4 s } , [ x8 ]
2017-01-11 19:41:49 +03:00
2018-12-04 16:13:33 +03:00
mov a0 , v0 . s [ 0 ]
mov a1 , v1 . s [ 0 ]
mov a2 , v2 . s [ 0 ]
mov a3 , v3 . s [ 0 ]
mov a4 , v4 . s [ 0 ]
mov a5 , v5 . s [ 0 ]
mov a6 , v6 . s [ 0 ]
mov a7 , v7 . s [ 0 ]
mov a8 , v8 . s [ 0 ]
mov a9 , v9 . s [ 0 ]
mov a10 , v10 . s [ 0 ]
mov a11 , v11 . s [ 0 ]
mov a12 , v12 . s [ 0 ]
mov a13 , v13 . s [ 0 ]
mov a14 , v14 . s [ 0 ]
mov a15 , v15 . s [ 0 ]
/ / x1 2 + = c o u n t e r v a l u e s 1 - 4
2017-01-11 19:41:49 +03:00
add v12 . 4 s , v12 . 4 s , v30 . 4 s
.Ldoubleround4 :
/ / x0 + = x4 , x12 = r o t l 3 2 ( x12 ^ x0 , 1 6 )
/ / x1 + = x5 , x13 = r o t l 3 2 ( x13 ^ x1 , 1 6 )
/ / x2 + = x6 , x14 = r o t l 3 2 ( x14 ^ x2 , 1 6 )
/ / x3 + = x7 , x15 = r o t l 3 2 ( x15 ^ x3 , 1 6 )
add v0 . 4 s , v0 . 4 s , v4 . 4 s
2018-12-04 16:13:33 +03:00
add a0 , a0 , a4
2017-01-11 19:41:49 +03:00
add v1 . 4 s , v1 . 4 s , v5 . 4 s
2018-12-04 16:13:33 +03:00
add a1 , a1 , a5
2017-01-11 19:41:49 +03:00
add v2 . 4 s , v2 . 4 s , v6 . 4 s
2018-12-04 16:13:33 +03:00
add a2 , a2 , a6
2017-01-11 19:41:49 +03:00
add v3 . 4 s , v3 . 4 s , v7 . 4 s
2018-12-04 16:13:33 +03:00
add a3 , a3 , a7
2017-01-11 19:41:49 +03:00
eor v12 . 1 6 b , v12 . 1 6 b , v0 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a12 , a12 , a0
2017-01-11 19:41:49 +03:00
eor v13 . 1 6 b , v13 . 1 6 b , v1 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a13 , a13 , a1
2017-01-11 19:41:49 +03:00
eor v14 . 1 6 b , v14 . 1 6 b , v2 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a14 , a14 , a2
2017-01-11 19:41:49 +03:00
eor v15 . 1 6 b , v15 . 1 6 b , v3 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a15 , a15 , a3
2017-01-11 19:41:49 +03:00
rev3 2 v12 . 8 h , v12 . 8 h
2018-12-04 16:13:33 +03:00
ror a12 , a12 , #16
2017-01-11 19:41:49 +03:00
rev3 2 v13 . 8 h , v13 . 8 h
2018-12-04 16:13:33 +03:00
ror a13 , a13 , #16
2017-01-11 19:41:49 +03:00
rev3 2 v14 . 8 h , v14 . 8 h
2018-12-04 16:13:33 +03:00
ror a14 , a14 , #16
2017-01-11 19:41:49 +03:00
rev3 2 v15 . 8 h , v15 . 8 h
2018-12-04 16:13:33 +03:00
ror a15 , a15 , #16
2017-01-11 19:41:49 +03:00
/ / x8 + = x12 , x4 = r o t l 3 2 ( x4 ^ x8 , 1 2 )
/ / x9 + = x13 , x5 = r o t l 3 2 ( x5 ^ x9 , 1 2 )
/ / x1 0 + = x14 , x6 = r o t l 3 2 ( x6 ^ x10 , 1 2 )
/ / x1 1 + = x15 , x7 = r o t l 3 2 ( x7 ^ x11 , 1 2 )
add v8 . 4 s , v8 . 4 s , v12 . 4 s
2018-12-04 16:13:33 +03:00
add a8 , a8 , a12
2017-01-11 19:41:49 +03:00
add v9 . 4 s , v9 . 4 s , v13 . 4 s
2018-12-04 16:13:33 +03:00
add a9 , a9 , a13
2017-01-11 19:41:49 +03:00
add v10 . 4 s , v10 . 4 s , v14 . 4 s
2018-12-04 16:13:33 +03:00
add a10 , a10 , a14
2017-01-11 19:41:49 +03:00
add v11 . 4 s , v11 . 4 s , v15 . 4 s
2018-12-04 16:13:33 +03:00
add a11 , a11 , a15
2017-01-11 19:41:49 +03:00
eor v16 . 1 6 b , v4 . 1 6 b , v8 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a4 , a4 , a8
2017-01-11 19:41:49 +03:00
eor v17 . 1 6 b , v5 . 1 6 b , v9 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a5 , a5 , a9
2017-01-11 19:41:49 +03:00
eor v18 . 1 6 b , v6 . 1 6 b , v10 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a6 , a6 , a10
2017-01-11 19:41:49 +03:00
eor v19 . 1 6 b , v7 . 1 6 b , v11 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a7 , a7 , a11
2017-01-11 19:41:49 +03:00
shl v4 . 4 s , v16 . 4 s , #12
shl v5 . 4 s , v17 . 4 s , #12
shl v6 . 4 s , v18 . 4 s , #12
shl v7 . 4 s , v19 . 4 s , #12
sri v4 . 4 s , v16 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a4 , a4 , #20
2017-01-11 19:41:49 +03:00
sri v5 . 4 s , v17 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a5 , a5 , #20
2017-01-11 19:41:49 +03:00
sri v6 . 4 s , v18 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a6 , a6 , #20
2017-01-11 19:41:49 +03:00
sri v7 . 4 s , v19 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a7 , a7 , #20
2017-01-11 19:41:49 +03:00
/ / x0 + = x4 , x12 = r o t l 3 2 ( x12 ^ x0 , 8 )
/ / x1 + = x5 , x13 = r o t l 3 2 ( x13 ^ x1 , 8 )
/ / x2 + = x6 , x14 = r o t l 3 2 ( x14 ^ x2 , 8 )
/ / x3 + = x7 , x15 = r o t l 3 2 ( x15 ^ x3 , 8 )
add v0 . 4 s , v0 . 4 s , v4 . 4 s
2018-12-04 16:13:33 +03:00
add a0 , a0 , a4
2017-01-11 19:41:49 +03:00
add v1 . 4 s , v1 . 4 s , v5 . 4 s
2018-12-04 16:13:33 +03:00
add a1 , a1 , a5
2017-01-11 19:41:49 +03:00
add v2 . 4 s , v2 . 4 s , v6 . 4 s
2018-12-04 16:13:33 +03:00
add a2 , a2 , a6
2017-01-11 19:41:49 +03:00
add v3 . 4 s , v3 . 4 s , v7 . 4 s
2018-12-04 16:13:33 +03:00
add a3 , a3 , a7
2017-01-11 19:41:49 +03:00
eor v12 . 1 6 b , v12 . 1 6 b , v0 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a12 , a12 , a0
2017-01-11 19:41:49 +03:00
eor v13 . 1 6 b , v13 . 1 6 b , v1 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a13 , a13 , a1
2017-01-11 19:41:49 +03:00
eor v14 . 1 6 b , v14 . 1 6 b , v2 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a14 , a14 , a2
2017-01-11 19:41:49 +03:00
eor v15 . 1 6 b , v15 . 1 6 b , v3 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a15 , a15 , a3
2017-01-11 19:41:49 +03:00
tbl v12 . 1 6 b , { v12 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a12 , a12 , #24
2017-01-11 19:41:49 +03:00
tbl v13 . 1 6 b , { v13 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a13 , a13 , #24
2017-01-11 19:41:49 +03:00
tbl v14 . 1 6 b , { v14 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a14 , a14 , #24
2017-01-11 19:41:49 +03:00
tbl v15 . 1 6 b , { v15 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a15 , a15 , #24
2017-01-11 19:41:49 +03:00
/ / x8 + = x12 , x4 = r o t l 3 2 ( x4 ^ x8 , 7 )
/ / x9 + = x13 , x5 = r o t l 3 2 ( x5 ^ x9 , 7 )
/ / x1 0 + = x14 , x6 = r o t l 3 2 ( x6 ^ x10 , 7 )
/ / x1 1 + = x15 , x7 = r o t l 3 2 ( x7 ^ x11 , 7 )
add v8 . 4 s , v8 . 4 s , v12 . 4 s
2018-12-04 16:13:33 +03:00
add a8 , a8 , a12
2017-01-11 19:41:49 +03:00
add v9 . 4 s , v9 . 4 s , v13 . 4 s
2018-12-04 16:13:33 +03:00
add a9 , a9 , a13
2017-01-11 19:41:49 +03:00
add v10 . 4 s , v10 . 4 s , v14 . 4 s
2018-12-04 16:13:33 +03:00
add a10 , a10 , a14
2017-01-11 19:41:49 +03:00
add v11 . 4 s , v11 . 4 s , v15 . 4 s
2018-12-04 16:13:33 +03:00
add a11 , a11 , a15
2017-01-11 19:41:49 +03:00
eor v16 . 1 6 b , v4 . 1 6 b , v8 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a4 , a4 , a8
2017-01-11 19:41:49 +03:00
eor v17 . 1 6 b , v5 . 1 6 b , v9 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a5 , a5 , a9
2017-01-11 19:41:49 +03:00
eor v18 . 1 6 b , v6 . 1 6 b , v10 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a6 , a6 , a10
2017-01-11 19:41:49 +03:00
eor v19 . 1 6 b , v7 . 1 6 b , v11 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a7 , a7 , a11
2017-01-11 19:41:49 +03:00
shl v4 . 4 s , v16 . 4 s , #7
shl v5 . 4 s , v17 . 4 s , #7
shl v6 . 4 s , v18 . 4 s , #7
shl v7 . 4 s , v19 . 4 s , #7
sri v4 . 4 s , v16 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a4 , a4 , #25
2017-01-11 19:41:49 +03:00
sri v5 . 4 s , v17 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a5 , a5 , #25
2017-01-11 19:41:49 +03:00
sri v6 . 4 s , v18 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a6 , a6 , #25
2017-01-11 19:41:49 +03:00
sri v7 . 4 s , v19 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a7 , a7 , #25
2017-01-11 19:41:49 +03:00
/ / x0 + = x5 , x15 = r o t l 3 2 ( x15 ^ x0 , 1 6 )
/ / x1 + = x6 , x12 = r o t l 3 2 ( x12 ^ x1 , 1 6 )
/ / x2 + = x7 , x13 = r o t l 3 2 ( x13 ^ x2 , 1 6 )
/ / x3 + = x4 , x14 = r o t l 3 2 ( x14 ^ x3 , 1 6 )
add v0 . 4 s , v0 . 4 s , v5 . 4 s
2018-12-04 16:13:33 +03:00
add a0 , a0 , a5
2017-01-11 19:41:49 +03:00
add v1 . 4 s , v1 . 4 s , v6 . 4 s
2018-12-04 16:13:33 +03:00
add a1 , a1 , a6
2017-01-11 19:41:49 +03:00
add v2 . 4 s , v2 . 4 s , v7 . 4 s
2018-12-04 16:13:33 +03:00
add a2 , a2 , a7
2017-01-11 19:41:49 +03:00
add v3 . 4 s , v3 . 4 s , v4 . 4 s
2018-12-04 16:13:33 +03:00
add a3 , a3 , a4
2017-01-11 19:41:49 +03:00
eor v15 . 1 6 b , v15 . 1 6 b , v0 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a15 , a15 , a0
2017-01-11 19:41:49 +03:00
eor v12 . 1 6 b , v12 . 1 6 b , v1 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a12 , a12 , a1
2017-01-11 19:41:49 +03:00
eor v13 . 1 6 b , v13 . 1 6 b , v2 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a13 , a13 , a2
2017-01-11 19:41:49 +03:00
eor v14 . 1 6 b , v14 . 1 6 b , v3 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a14 , a14 , a3
2017-01-11 19:41:49 +03:00
rev3 2 v15 . 8 h , v15 . 8 h
2018-12-04 16:13:33 +03:00
ror a15 , a15 , #16
2017-01-11 19:41:49 +03:00
rev3 2 v12 . 8 h , v12 . 8 h
2018-12-04 16:13:33 +03:00
ror a12 , a12 , #16
2017-01-11 19:41:49 +03:00
rev3 2 v13 . 8 h , v13 . 8 h
2018-12-04 16:13:33 +03:00
ror a13 , a13 , #16
2017-01-11 19:41:49 +03:00
rev3 2 v14 . 8 h , v14 . 8 h
2018-12-04 16:13:33 +03:00
ror a14 , a14 , #16
2017-01-11 19:41:49 +03:00
/ / x1 0 + = x15 , x5 = r o t l 3 2 ( x5 ^ x10 , 1 2 )
/ / x1 1 + = x12 , x6 = r o t l 3 2 ( x6 ^ x11 , 1 2 )
/ / x8 + = x13 , x7 = r o t l 3 2 ( x7 ^ x8 , 1 2 )
/ / x9 + = x14 , x4 = r o t l 3 2 ( x4 ^ x9 , 1 2 )
add v10 . 4 s , v10 . 4 s , v15 . 4 s
2018-12-04 16:13:33 +03:00
add a10 , a10 , a15
2017-01-11 19:41:49 +03:00
add v11 . 4 s , v11 . 4 s , v12 . 4 s
2018-12-04 16:13:33 +03:00
add a11 , a11 , a12
2017-01-11 19:41:49 +03:00
add v8 . 4 s , v8 . 4 s , v13 . 4 s
2018-12-04 16:13:33 +03:00
add a8 , a8 , a13
2017-01-11 19:41:49 +03:00
add v9 . 4 s , v9 . 4 s , v14 . 4 s
2018-12-04 16:13:33 +03:00
add a9 , a9 , a14
2017-01-11 19:41:49 +03:00
eor v16 . 1 6 b , v5 . 1 6 b , v10 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a5 , a5 , a10
2017-01-11 19:41:49 +03:00
eor v17 . 1 6 b , v6 . 1 6 b , v11 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a6 , a6 , a11
2017-01-11 19:41:49 +03:00
eor v18 . 1 6 b , v7 . 1 6 b , v8 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a7 , a7 , a8
2017-01-11 19:41:49 +03:00
eor v19 . 1 6 b , v4 . 1 6 b , v9 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a4 , a4 , a9
2017-01-11 19:41:49 +03:00
shl v5 . 4 s , v16 . 4 s , #12
shl v6 . 4 s , v17 . 4 s , #12
shl v7 . 4 s , v18 . 4 s , #12
shl v4 . 4 s , v19 . 4 s , #12
sri v5 . 4 s , v16 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a5 , a5 , #20
2017-01-11 19:41:49 +03:00
sri v6 . 4 s , v17 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a6 , a6 , #20
2017-01-11 19:41:49 +03:00
sri v7 . 4 s , v18 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a7 , a7 , #20
2017-01-11 19:41:49 +03:00
sri v4 . 4 s , v19 . 4 s , #20
2018-12-04 16:13:33 +03:00
ror a4 , a4 , #20
2017-01-11 19:41:49 +03:00
/ / x0 + = x5 , x15 = r o t l 3 2 ( x15 ^ x0 , 8 )
/ / x1 + = x6 , x12 = r o t l 3 2 ( x12 ^ x1 , 8 )
/ / x2 + = x7 , x13 = r o t l 3 2 ( x13 ^ x2 , 8 )
/ / x3 + = x4 , x14 = r o t l 3 2 ( x14 ^ x3 , 8 )
add v0 . 4 s , v0 . 4 s , v5 . 4 s
2018-12-04 16:13:33 +03:00
add a0 , a0 , a5
2017-01-11 19:41:49 +03:00
add v1 . 4 s , v1 . 4 s , v6 . 4 s
2018-12-04 16:13:33 +03:00
add a1 , a1 , a6
2017-01-11 19:41:49 +03:00
add v2 . 4 s , v2 . 4 s , v7 . 4 s
2018-12-04 16:13:33 +03:00
add a2 , a2 , a7
2017-01-11 19:41:49 +03:00
add v3 . 4 s , v3 . 4 s , v4 . 4 s
2018-12-04 16:13:33 +03:00
add a3 , a3 , a4
2017-01-11 19:41:49 +03:00
eor v15 . 1 6 b , v15 . 1 6 b , v0 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a15 , a15 , a0
2017-01-11 19:41:49 +03:00
eor v12 . 1 6 b , v12 . 1 6 b , v1 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a12 , a12 , a1
2017-01-11 19:41:49 +03:00
eor v13 . 1 6 b , v13 . 1 6 b , v2 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a13 , a13 , a2
2017-01-11 19:41:49 +03:00
eor v14 . 1 6 b , v14 . 1 6 b , v3 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a14 , a14 , a3
2017-01-11 19:41:49 +03:00
tbl v15 . 1 6 b , { v15 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a15 , a15 , #24
2017-01-11 19:41:49 +03:00
tbl v12 . 1 6 b , { v12 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a12 , a12 , #24
2017-01-11 19:41:49 +03:00
tbl v13 . 1 6 b , { v13 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a13 , a13 , #24
2017-01-11 19:41:49 +03:00
tbl v14 . 1 6 b , { v14 . 1 6 b } , v31 . 1 6 b
2018-12-04 16:13:33 +03:00
ror a14 , a14 , #24
2017-01-11 19:41:49 +03:00
/ / x1 0 + = x15 , x5 = r o t l 3 2 ( x5 ^ x10 , 7 )
/ / x1 1 + = x12 , x6 = r o t l 3 2 ( x6 ^ x11 , 7 )
/ / x8 + = x13 , x7 = r o t l 3 2 ( x7 ^ x8 , 7 )
/ / x9 + = x14 , x4 = r o t l 3 2 ( x4 ^ x9 , 7 )
add v10 . 4 s , v10 . 4 s , v15 . 4 s
2018-12-04 16:13:33 +03:00
add a10 , a10 , a15
2017-01-11 19:41:49 +03:00
add v11 . 4 s , v11 . 4 s , v12 . 4 s
2018-12-04 16:13:33 +03:00
add a11 , a11 , a12
2017-01-11 19:41:49 +03:00
add v8 . 4 s , v8 . 4 s , v13 . 4 s
2018-12-04 16:13:33 +03:00
add a8 , a8 , a13
2017-01-11 19:41:49 +03:00
add v9 . 4 s , v9 . 4 s , v14 . 4 s
2018-12-04 16:13:33 +03:00
add a9 , a9 , a14
2017-01-11 19:41:49 +03:00
eor v16 . 1 6 b , v5 . 1 6 b , v10 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a5 , a5 , a10
2017-01-11 19:41:49 +03:00
eor v17 . 1 6 b , v6 . 1 6 b , v11 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a6 , a6 , a11
2017-01-11 19:41:49 +03:00
eor v18 . 1 6 b , v7 . 1 6 b , v8 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a7 , a7 , a8
2017-01-11 19:41:49 +03:00
eor v19 . 1 6 b , v4 . 1 6 b , v9 . 1 6 b
2018-12-04 16:13:33 +03:00
eor a4 , a4 , a9
2017-01-11 19:41:49 +03:00
shl v5 . 4 s , v16 . 4 s , #7
shl v6 . 4 s , v17 . 4 s , #7
shl v7 . 4 s , v18 . 4 s , #7
shl v4 . 4 s , v19 . 4 s , #7
sri v5 . 4 s , v16 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a5 , a5 , #25
2017-01-11 19:41:49 +03:00
sri v6 . 4 s , v17 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a6 , a6 , #25
2017-01-11 19:41:49 +03:00
sri v7 . 4 s , v18 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a7 , a7 , #25
2017-01-11 19:41:49 +03:00
sri v4 . 4 s , v19 . 4 s , #25
2018-12-04 16:13:33 +03:00
ror a4 , a4 , #25
2017-01-11 19:41:49 +03:00
2018-12-04 06:52:51 +03:00
subs w3 , w3 , #2
2017-01-11 19:41:49 +03:00
b. n e . L d o u b l e r o u n d4
ld4 r { v16 . 4 s - v19 . 4 s } , [ x0 ] , #16
ld4 r { v20 . 4 s - v23 . 4 s } , [ x0 ] , #16
/ / x1 2 + = c o u n t e r v a l u e s 0 - 3
add v12 . 4 s , v12 . 4 s , v30 . 4 s
/ / x0 [ 0 - 3 ] + = s0 [ 0 ]
/ / x1 [ 0 - 3 ] + = s0 [ 1 ]
/ / x2 [ 0 - 3 ] + = s0 [ 2 ]
/ / x3 [ 0 - 3 ] + = s0 [ 3 ]
add v0 . 4 s , v0 . 4 s , v16 . 4 s
2018-12-04 16:13:33 +03:00
mov w6 , v16 . s [ 0 ]
mov w7 , v17 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v1 . 4 s , v1 . 4 s , v17 . 4 s
2018-12-04 16:13:33 +03:00
mov w8 , v18 . s [ 0 ]
mov w9 , v19 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v2 . 4 s , v2 . 4 s , v18 . 4 s
2018-12-04 16:13:33 +03:00
add a0 , a0 , w6
add a1 , a1 , w7
2017-01-11 19:41:49 +03:00
add v3 . 4 s , v3 . 4 s , v19 . 4 s
2018-12-04 16:13:33 +03:00
add a2 , a2 , w8
add a3 , a3 , w9
2019-02-23 09:54:07 +03:00
CPU_ B E ( r e v a0 , a0 )
CPU_ B E ( r e v a1 , a1 )
CPU_ B E ( r e v a2 , a2 )
CPU_ B E ( r e v a3 , a3 )
2017-01-11 19:41:49 +03:00
ld4 r { v24 . 4 s - v27 . 4 s } , [ x0 ] , #16
ld4 r { v28 . 4 s - v31 . 4 s } , [ x0 ]
/ / x4 [ 0 - 3 ] + = s1 [ 0 ]
/ / x5 [ 0 - 3 ] + = s1 [ 1 ]
/ / x6 [ 0 - 3 ] + = s1 [ 2 ]
/ / x7 [ 0 - 3 ] + = s1 [ 3 ]
add v4 . 4 s , v4 . 4 s , v20 . 4 s
2018-12-04 16:13:33 +03:00
mov w6 , v20 . s [ 0 ]
mov w7 , v21 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v5 . 4 s , v5 . 4 s , v21 . 4 s
2018-12-04 16:13:33 +03:00
mov w8 , v22 . s [ 0 ]
mov w9 , v23 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v6 . 4 s , v6 . 4 s , v22 . 4 s
2018-12-04 16:13:33 +03:00
add a4 , a4 , w6
add a5 , a5 , w7
2017-01-11 19:41:49 +03:00
add v7 . 4 s , v7 . 4 s , v23 . 4 s
2018-12-04 16:13:33 +03:00
add a6 , a6 , w8
add a7 , a7 , w9
2019-02-23 09:54:07 +03:00
CPU_ B E ( r e v a4 , a4 )
CPU_ B E ( r e v a5 , a5 )
CPU_ B E ( r e v a6 , a6 )
CPU_ B E ( r e v a7 , a7 )
2017-01-11 19:41:49 +03:00
/ / x8 [ 0 - 3 ] + = s2 [ 0 ]
/ / x9 [ 0 - 3 ] + = s2 [ 1 ]
/ / x1 0 [ 0 - 3 ] + = s2 [ 2 ]
/ / x1 1 [ 0 - 3 ] + = s2 [ 3 ]
add v8 . 4 s , v8 . 4 s , v24 . 4 s
2018-12-04 16:13:33 +03:00
mov w6 , v24 . s [ 0 ]
mov w7 , v25 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v9 . 4 s , v9 . 4 s , v25 . 4 s
2018-12-04 16:13:33 +03:00
mov w8 , v26 . s [ 0 ]
mov w9 , v27 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v10 . 4 s , v10 . 4 s , v26 . 4 s
2018-12-04 16:13:33 +03:00
add a8 , a8 , w6
add a9 , a9 , w7
2017-01-11 19:41:49 +03:00
add v11 . 4 s , v11 . 4 s , v27 . 4 s
2018-12-04 16:13:33 +03:00
add a10 , a10 , w8
add a11 , a11 , w9
2019-02-23 09:54:07 +03:00
CPU_ B E ( r e v a8 , a8 )
CPU_ B E ( r e v a9 , a9 )
CPU_ B E ( r e v a10 , a10 )
CPU_ B E ( r e v a11 , a11 )
2017-01-11 19:41:49 +03:00
/ / x1 2 [ 0 - 3 ] + = s3 [ 0 ]
/ / x1 3 [ 0 - 3 ] + = s3 [ 1 ]
/ / x1 4 [ 0 - 3 ] + = s3 [ 2 ]
/ / x1 5 [ 0 - 3 ] + = s3 [ 3 ]
add v12 . 4 s , v12 . 4 s , v28 . 4 s
2018-12-04 16:13:33 +03:00
mov w6 , v28 . s [ 0 ]
mov w7 , v29 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v13 . 4 s , v13 . 4 s , v29 . 4 s
2018-12-04 16:13:33 +03:00
mov w8 , v30 . s [ 0 ]
mov w9 , v31 . s [ 0 ]
2017-01-11 19:41:49 +03:00
add v14 . 4 s , v14 . 4 s , v30 . 4 s
2018-12-04 16:13:33 +03:00
add a12 , a12 , w6
add a13 , a13 , w7
2017-01-11 19:41:49 +03:00
add v15 . 4 s , v15 . 4 s , v31 . 4 s
2018-12-04 16:13:33 +03:00
add a14 , a14 , w8
add a15 , a15 , w9
2019-02-23 09:54:07 +03:00
CPU_ B E ( r e v a12 , a12 )
CPU_ B E ( r e v a13 , a13 )
CPU_ B E ( r e v a14 , a14 )
CPU_ B E ( r e v a15 , a15 )
2017-01-11 19:41:49 +03:00
/ / interleave 3 2 - b i t w o r d s i n s t a t e n , n + 1
2018-12-04 16:13:33 +03:00
ldp w6 , w7 , [ x2 ] , #64
2017-01-11 19:41:49 +03:00
zip1 v16 . 4 s , v0 . 4 s , v1 . 4 s
2018-12-04 16:13:33 +03:00
ldp w8 , w9 , [ x2 , #- 56 ]
eor a0 , a0 , w6
2017-01-11 19:41:49 +03:00
zip2 v17 . 4 s , v0 . 4 s , v1 . 4 s
2018-12-04 16:13:33 +03:00
eor a1 , a1 , w7
2017-01-11 19:41:49 +03:00
zip1 v18 . 4 s , v2 . 4 s , v3 . 4 s
2018-12-04 16:13:33 +03:00
eor a2 , a2 , w8
2017-01-11 19:41:49 +03:00
zip2 v19 . 4 s , v2 . 4 s , v3 . 4 s
2018-12-04 16:13:33 +03:00
eor a3 , a3 , w9
ldp w6 , w7 , [ x2 , #- 48 ]
2017-01-11 19:41:49 +03:00
zip1 v20 . 4 s , v4 . 4 s , v5 . 4 s
2018-12-04 16:13:33 +03:00
ldp w8 , w9 , [ x2 , #- 40 ]
eor a4 , a4 , w6
2017-01-11 19:41:49 +03:00
zip2 v21 . 4 s , v4 . 4 s , v5 . 4 s
2018-12-04 16:13:33 +03:00
eor a5 , a5 , w7
2017-01-11 19:41:49 +03:00
zip1 v22 . 4 s , v6 . 4 s , v7 . 4 s
2018-12-04 16:13:33 +03:00
eor a6 , a6 , w8
2017-01-11 19:41:49 +03:00
zip2 v23 . 4 s , v6 . 4 s , v7 . 4 s
2018-12-04 16:13:33 +03:00
eor a7 , a7 , w9
ldp w6 , w7 , [ x2 , #- 32 ]
2017-01-11 19:41:49 +03:00
zip1 v24 . 4 s , v8 . 4 s , v9 . 4 s
2018-12-04 16:13:33 +03:00
ldp w8 , w9 , [ x2 , #- 24 ]
eor a8 , a8 , w6
2017-01-11 19:41:49 +03:00
zip2 v25 . 4 s , v8 . 4 s , v9 . 4 s
2018-12-04 16:13:33 +03:00
eor a9 , a9 , w7
2017-01-11 19:41:49 +03:00
zip1 v26 . 4 s , v10 . 4 s , v11 . 4 s
2018-12-04 16:13:33 +03:00
eor a10 , a10 , w8
2017-01-11 19:41:49 +03:00
zip2 v27 . 4 s , v10 . 4 s , v11 . 4 s
2018-12-04 16:13:33 +03:00
eor a11 , a11 , w9
ldp w6 , w7 , [ x2 , #- 16 ]
2017-01-11 19:41:49 +03:00
zip1 v28 . 4 s , v12 . 4 s , v13 . 4 s
2018-12-04 16:13:33 +03:00
ldp w8 , w9 , [ x2 , #- 8 ]
eor a12 , a12 , w6
2017-01-11 19:41:49 +03:00
zip2 v29 . 4 s , v12 . 4 s , v13 . 4 s
2018-12-04 16:13:33 +03:00
eor a13 , a13 , w7
2017-01-11 19:41:49 +03:00
zip1 v30 . 4 s , v14 . 4 s , v15 . 4 s
2018-12-04 16:13:33 +03:00
eor a14 , a14 , w8
2017-01-11 19:41:49 +03:00
zip2 v31 . 4 s , v14 . 4 s , v15 . 4 s
2018-12-04 16:13:33 +03:00
eor a15 , a15 , w9
2017-01-11 19:41:49 +03:00
2018-12-04 16:13:32 +03:00
mov x3 , #64
2018-12-04 16:13:33 +03:00
subs x5 , x4 , #128
2018-12-04 16:13:32 +03:00
add x6 , x5 , x2
csel x3 , x3 , x z r , g e
csel x2 , x2 , x6 , g e
2017-01-11 19:41:49 +03:00
/ / interleave 6 4 - b i t w o r d s i n s t a t e n , n + 2
zip1 v0 . 2 d , v16 . 2 d , v18 . 2 d
zip2 v4 . 2 d , v16 . 2 d , v18 . 2 d
2018-12-04 16:13:33 +03:00
stp a0 , a1 , [ x1 ] , #64
2017-01-11 19:41:49 +03:00
zip1 v8 . 2 d , v17 . 2 d , v19 . 2 d
zip2 v12 . 2 d , v17 . 2 d , v19 . 2 d
2018-12-04 16:13:33 +03:00
stp a2 , a3 , [ x1 , #- 56 ]
2018-12-04 16:13:32 +03:00
ld1 { v16 . 1 6 b - v19 . 1 6 b } , [ x2 ] , x3
2018-12-04 16:13:33 +03:00
subs x6 , x4 , #192
2018-12-04 16:13:32 +03:00
ccmp x3 , x z r , #4 , l t
add x7 , x6 , x2
csel x3 , x3 , x z r , e q
csel x2 , x2 , x7 , e q
2017-01-11 19:41:49 +03:00
zip1 v1 . 2 d , v20 . 2 d , v22 . 2 d
zip2 v5 . 2 d , v20 . 2 d , v22 . 2 d
2018-12-04 16:13:33 +03:00
stp a4 , a5 , [ x1 , #- 48 ]
2017-01-11 19:41:49 +03:00
zip1 v9 . 2 d , v21 . 2 d , v23 . 2 d
zip2 v13 . 2 d , v21 . 2 d , v23 . 2 d
2018-12-04 16:13:33 +03:00
stp a6 , a7 , [ x1 , #- 40 ]
2018-12-04 16:13:32 +03:00
ld1 { v20 . 1 6 b - v23 . 1 6 b } , [ x2 ] , x3
2018-12-04 16:13:33 +03:00
subs x7 , x4 , #256
2018-12-04 16:13:32 +03:00
ccmp x3 , x z r , #4 , l t
add x8 , x7 , x2
csel x3 , x3 , x z r , e q
csel x2 , x2 , x8 , e q
2017-01-11 19:41:49 +03:00
zip1 v2 . 2 d , v24 . 2 d , v26 . 2 d
zip2 v6 . 2 d , v24 . 2 d , v26 . 2 d
2018-12-04 16:13:33 +03:00
stp a8 , a9 , [ x1 , #- 32 ]
2017-01-11 19:41:49 +03:00
zip1 v10 . 2 d , v25 . 2 d , v27 . 2 d
zip2 v14 . 2 d , v25 . 2 d , v27 . 2 d
2018-12-04 16:13:33 +03:00
stp a10 , a11 , [ x1 , #- 24 ]
2018-12-04 16:13:32 +03:00
ld1 { v24 . 1 6 b - v27 . 1 6 b } , [ x2 ] , x3
2018-12-04 16:13:33 +03:00
subs x8 , x4 , #320
2018-12-04 16:13:32 +03:00
ccmp x3 , x z r , #4 , l t
add x9 , x8 , x2
csel x2 , x2 , x9 , e q
2017-01-11 19:41:49 +03:00
zip1 v3 . 2 d , v28 . 2 d , v30 . 2 d
zip2 v7 . 2 d , v28 . 2 d , v30 . 2 d
2018-12-04 16:13:33 +03:00
stp a12 , a13 , [ x1 , #- 16 ]
2017-01-11 19:41:49 +03:00
zip1 v11 . 2 d , v29 . 2 d , v31 . 2 d
zip2 v15 . 2 d , v29 . 2 d , v31 . 2 d
2018-12-04 16:13:33 +03:00
stp a14 , a15 , [ x1 , #- 8 ]
2017-01-11 19:41:49 +03:00
ld1 { v28 . 1 6 b - v31 . 1 6 b } , [ x2 ]
/ / xor w i t h c o r r e s p o n d i n g i n p u t , w r i t e t o o u t p u t
2018-12-04 16:13:32 +03:00
tbnz x5 , #63 , 0 f
2017-01-11 19:41:49 +03:00
eor v16 . 1 6 b , v16 . 1 6 b , v0 . 1 6 b
eor v17 . 1 6 b , v17 . 1 6 b , v1 . 1 6 b
eor v18 . 1 6 b , v18 . 1 6 b , v2 . 1 6 b
eor v19 . 1 6 b , v19 . 1 6 b , v3 . 1 6 b
2018-12-04 16:13:32 +03:00
st1 { v16 . 1 6 b - v19 . 1 6 b } , [ x1 ] , #64
2018-12-04 16:13:33 +03:00
cbz x5 , . L o u t
2018-12-04 16:13:32 +03:00
tbnz x6 , #63 , 1 f
2017-01-11 19:41:49 +03:00
eor v20 . 1 6 b , v20 . 1 6 b , v4 . 1 6 b
eor v21 . 1 6 b , v21 . 1 6 b , v5 . 1 6 b
eor v22 . 1 6 b , v22 . 1 6 b , v6 . 1 6 b
eor v23 . 1 6 b , v23 . 1 6 b , v7 . 1 6 b
2018-12-04 16:13:32 +03:00
st1 { v20 . 1 6 b - v23 . 1 6 b } , [ x1 ] , #64
2018-12-04 16:13:33 +03:00
cbz x6 , . L o u t
2018-12-04 16:13:32 +03:00
tbnz x7 , #63 , 2 f
2017-01-11 19:41:49 +03:00
eor v24 . 1 6 b , v24 . 1 6 b , v8 . 1 6 b
eor v25 . 1 6 b , v25 . 1 6 b , v9 . 1 6 b
eor v26 . 1 6 b , v26 . 1 6 b , v10 . 1 6 b
eor v27 . 1 6 b , v27 . 1 6 b , v11 . 1 6 b
st1 { v24 . 1 6 b - v27 . 1 6 b } , [ x1 ] , #64
2018-12-04 16:13:33 +03:00
cbz x7 , . L o u t
2018-12-04 16:13:32 +03:00
tbnz x8 , #63 , 3 f
eor v28 . 1 6 b , v28 . 1 6 b , v12 . 1 6 b
2017-01-11 19:41:49 +03:00
eor v29 . 1 6 b , v29 . 1 6 b , v13 . 1 6 b
eor v30 . 1 6 b , v30 . 1 6 b , v14 . 1 6 b
eor v31 . 1 6 b , v31 . 1 6 b , v15 . 1 6 b
st1 { v28 . 1 6 b - v31 . 1 6 b } , [ x1 ]
2018-12-04 16:13:33 +03:00
.Lout : frame_ p o p
2017-01-11 19:41:49 +03:00
ret
2018-12-04 16:13:32 +03:00
2018-12-04 16:13:33 +03:00
/ / fewer t h a n 1 2 8 b y t e s o f i n / o u t p u t
2018-12-04 16:13:32 +03:00
0 : ld1 { v8 . 1 6 b } , [ x10 ]
ld1 { v9 . 1 6 b } , [ x11 ]
movi v10 . 1 6 b , #16
sub x2 , x1 , #64
add x1 , x1 , x5
ld1 { v16 . 1 6 b - v19 . 1 6 b } , [ x2 ]
tbl v4 . 1 6 b , { v0 . 1 6 b - v3 . 1 6 b } , v8 . 1 6 b
tbx v20 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
add v8 . 1 6 b , v8 . 1 6 b , v10 . 1 6 b
add v9 . 1 6 b , v9 . 1 6 b , v10 . 1 6 b
tbl v5 . 1 6 b , { v0 . 1 6 b - v3 . 1 6 b } , v8 . 1 6 b
tbx v21 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
add v8 . 1 6 b , v8 . 1 6 b , v10 . 1 6 b
add v9 . 1 6 b , v9 . 1 6 b , v10 . 1 6 b
tbl v6 . 1 6 b , { v0 . 1 6 b - v3 . 1 6 b } , v8 . 1 6 b
tbx v22 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
add v8 . 1 6 b , v8 . 1 6 b , v10 . 1 6 b
add v9 . 1 6 b , v9 . 1 6 b , v10 . 1 6 b
tbl v7 . 1 6 b , { v0 . 1 6 b - v3 . 1 6 b } , v8 . 1 6 b
tbx v23 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
eor v20 . 1 6 b , v20 . 1 6 b , v4 . 1 6 b
eor v21 . 1 6 b , v21 . 1 6 b , v5 . 1 6 b
eor v22 . 1 6 b , v22 . 1 6 b , v6 . 1 6 b
eor v23 . 1 6 b , v23 . 1 6 b , v7 . 1 6 b
st1 { v20 . 1 6 b - v23 . 1 6 b } , [ x1 ]
2018-12-04 16:13:33 +03:00
b . L o u t
2018-12-04 16:13:32 +03:00
2018-12-04 16:13:33 +03:00
/ / fewer t h a n 1 9 2 b y t e s o f i n / o u t p u t
2018-12-04 16:13:32 +03:00
1 : ld1 { v8 . 1 6 b } , [ x10 ]
ld1 { v9 . 1 6 b } , [ x11 ]
movi v10 . 1 6 b , #16
add x1 , x1 , x6
tbl v0 . 1 6 b , { v4 . 1 6 b - v7 . 1 6 b } , v8 . 1 6 b
tbx v20 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
add v8 . 1 6 b , v8 . 1 6 b , v10 . 1 6 b
add v9 . 1 6 b , v9 . 1 6 b , v10 . 1 6 b
tbl v1 . 1 6 b , { v4 . 1 6 b - v7 . 1 6 b } , v8 . 1 6 b
tbx v21 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
add v8 . 1 6 b , v8 . 1 6 b , v10 . 1 6 b
add v9 . 1 6 b , v9 . 1 6 b , v10 . 1 6 b
tbl v2 . 1 6 b , { v4 . 1 6 b - v7 . 1 6 b } , v8 . 1 6 b
tbx v22 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
add v8 . 1 6 b , v8 . 1 6 b , v10 . 1 6 b
add v9 . 1 6 b , v9 . 1 6 b , v10 . 1 6 b
tbl v3 . 1 6 b , { v4 . 1 6 b - v7 . 1 6 b } , v8 . 1 6 b
tbx v23 . 1 6 b , { v16 . 1 6 b - v19 . 1 6 b } , v9 . 1 6 b
eor v20 . 1 6 b , v20 . 1 6 b , v0 . 1 6 b
eor v21 . 1 6 b , v21 . 1 6 b , v1 . 1 6 b
eor v22 . 1 6 b , v22 . 1 6 b , v2 . 1 6 b
eor v23 . 1 6 b , v23 . 1 6 b , v3 . 1 6 b
st1 { v20 . 1 6 b - v23 . 1 6 b } , [ x1 ]
2018-12-04 16:13:33 +03:00
b . L o u t
2018-12-04 16:13:32 +03:00
2018-12-04 16:13:33 +03:00
/ / fewer t h a n 2 5 6 b y t e s o f i n / o u t p u t
2018-12-04 16:13:32 +03:00
2 : ld1 { v4 . 1 6 b } , [ x10 ]
ld1 { v5 . 1 6 b } , [ x11 ]
movi v6 . 1 6 b , #16
add x1 , x1 , x7
tbl v0 . 1 6 b , { v8 . 1 6 b - v11 . 1 6 b } , v4 . 1 6 b
tbx v24 . 1 6 b , { v20 . 1 6 b - v23 . 1 6 b } , v5 . 1 6 b
add v4 . 1 6 b , v4 . 1 6 b , v6 . 1 6 b
add v5 . 1 6 b , v5 . 1 6 b , v6 . 1 6 b
tbl v1 . 1 6 b , { v8 . 1 6 b - v11 . 1 6 b } , v4 . 1 6 b
tbx v25 . 1 6 b , { v20 . 1 6 b - v23 . 1 6 b } , v5 . 1 6 b
add v4 . 1 6 b , v4 . 1 6 b , v6 . 1 6 b
add v5 . 1 6 b , v5 . 1 6 b , v6 . 1 6 b
tbl v2 . 1 6 b , { v8 . 1 6 b - v11 . 1 6 b } , v4 . 1 6 b
tbx v26 . 1 6 b , { v20 . 1 6 b - v23 . 1 6 b } , v5 . 1 6 b
add v4 . 1 6 b , v4 . 1 6 b , v6 . 1 6 b
add v5 . 1 6 b , v5 . 1 6 b , v6 . 1 6 b
tbl v3 . 1 6 b , { v8 . 1 6 b - v11 . 1 6 b } , v4 . 1 6 b
tbx v27 . 1 6 b , { v20 . 1 6 b - v23 . 1 6 b } , v5 . 1 6 b
eor v24 . 1 6 b , v24 . 1 6 b , v0 . 1 6 b
eor v25 . 1 6 b , v25 . 1 6 b , v1 . 1 6 b
eor v26 . 1 6 b , v26 . 1 6 b , v2 . 1 6 b
eor v27 . 1 6 b , v27 . 1 6 b , v3 . 1 6 b
st1 { v24 . 1 6 b - v27 . 1 6 b } , [ x1 ]
2018-12-04 16:13:33 +03:00
b . L o u t
2018-12-04 16:13:32 +03:00
2018-12-04 16:13:33 +03:00
/ / fewer t h a n 3 2 0 b y t e s o f i n / o u t p u t
2018-12-04 16:13:32 +03:00
3 : ld1 { v4 . 1 6 b } , [ x10 ]
ld1 { v5 . 1 6 b } , [ x11 ]
movi v6 . 1 6 b , #16
add x1 , x1 , x8
tbl v0 . 1 6 b , { v12 . 1 6 b - v15 . 1 6 b } , v4 . 1 6 b
tbx v28 . 1 6 b , { v24 . 1 6 b - v27 . 1 6 b } , v5 . 1 6 b
add v4 . 1 6 b , v4 . 1 6 b , v6 . 1 6 b
add v5 . 1 6 b , v5 . 1 6 b , v6 . 1 6 b
tbl v1 . 1 6 b , { v12 . 1 6 b - v15 . 1 6 b } , v4 . 1 6 b
tbx v29 . 1 6 b , { v24 . 1 6 b - v27 . 1 6 b } , v5 . 1 6 b
add v4 . 1 6 b , v4 . 1 6 b , v6 . 1 6 b
add v5 . 1 6 b , v5 . 1 6 b , v6 . 1 6 b
tbl v2 . 1 6 b , { v12 . 1 6 b - v15 . 1 6 b } , v4 . 1 6 b
tbx v30 . 1 6 b , { v24 . 1 6 b - v27 . 1 6 b } , v5 . 1 6 b
add v4 . 1 6 b , v4 . 1 6 b , v6 . 1 6 b
add v5 . 1 6 b , v5 . 1 6 b , v6 . 1 6 b
tbl v3 . 1 6 b , { v12 . 1 6 b - v15 . 1 6 b } , v4 . 1 6 b
tbx v31 . 1 6 b , { v24 . 1 6 b - v27 . 1 6 b } , v5 . 1 6 b
eor v28 . 1 6 b , v28 . 1 6 b , v0 . 1 6 b
eor v29 . 1 6 b , v29 . 1 6 b , v1 . 1 6 b
eor v30 . 1 6 b , v30 . 1 6 b , v2 . 1 6 b
eor v31 . 1 6 b , v31 . 1 6 b , v3 . 1 6 b
st1 { v28 . 1 6 b - v31 . 1 6 b } , [ x1 ]
2018-12-04 16:13:33 +03:00
b . L o u t
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( c h a c h a _ 4 b l o c k _ x o r _ n e o n )
2017-01-11 19:41:49 +03:00
2018-12-04 16:13:32 +03:00
.section " .rodata " , " a" , % p r o g b i t s
.align L1_CACHE_SHIFT
.Lpermute :
.set .Li , 0
.rept 192
.byte ( .Li - 6 4 )
.set .Li , .Li + 1
.endr
2018-12-04 16:13:33 +03:00
CTRINC : .word 1 , 2 , 3 , 4
2017-01-11 19:41:49 +03:00
ROT8 : .word 0x02010003 , 0 x0 6 0 5 0 4 0 7 , 0 x0 a09 0 8 0 b , 0 x0 e 0 d0 c0 f