2016-12-05 21:42:25 +03:00
/ /
/ / Accelerated C R C - T 1 0 D I F u s i n g a r m 6 4 N E O N a n d C r y p t o E x t e n s i o n s i n s t r u c t i o n s
/ /
/ / Copyright ( C ) 2 0 1 6 L i n a r o L t d < a r d . b i e s h e u v e l @linaro.org>
/ /
/ / This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
/ / it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s
/ / published b y t h e F r e e S o f t w a r e F o u n d a t i o n .
/ /
/ /
/ / Implement f a s t C R C - T 1 0 D I F c o m p u t a t i o n w i t h S S E a n d P C L M U L Q D Q i n s t r u c t i o n s
/ /
/ / Copyright ( c ) 2 0 1 3 , I n t e l C o r p o r a t i o n
/ /
/ / Authors :
/ / Erdinc O z t u r k < e r d i n c . o z t u r k @intel.com>
/ / Vinodh G o p a l < v i n o d h . g o p a l @intel.com>
/ / James G u i l f o r d < j a m e s . g u i l f o r d @intel.com>
/ / Tim C h e n < t i m . c . c h e n @linux.intel.com>
/ /
/ / This s o f t w a r e i s a v a i l a b l e t o y o u u n d e r a c h o i c e o f o n e o f t w o
/ / licenses. Y o u m a y c h o o s e t o b e l i c e n s e d u n d e r t h e t e r m s o f t h e G N U
/ / General P u b l i c L i c e n s e ( G P L ) V e r s i o n 2 , a v a i l a b l e f r o m t h e f i l e
/ / COPYING i n t h e m a i n d i r e c t o r y o f t h i s s o u r c e t r e e , o r t h e
/ / OpenIB. o r g B S D l i c e n s e b e l o w :
/ /
/ / Redistribution a n d u s e i n s o u r c e a n d b i n a r y f o r m s , w i t h o r w i t h o u t
/ / modification, a r e p e r m i t t e d p r o v i d e d t h a t t h e f o l l o w i n g c o n d i t i o n s a r e
/ / met :
/ /
/ / * Redistributions o f s o u r c e c o d e m u s t r e t a i n t h e a b o v e c o p y r i g h t
/ / notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r .
/ /
/ / * Redistributions i n b i n a r y f o r m m u s t r e p r o d u c e t h e a b o v e c o p y r i g h t
/ / notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r i n t h e
/ / documentation a n d / o r o t h e r m a t e r i a l s p r o v i d e d w i t h t h e
/ / distribution.
/ /
/ / * Neither t h e n a m e o f t h e I n t e l C o r p o r a t i o n n o r t h e n a m e s o f i t s
/ / contributors m a y b e u s e d t o e n d o r s e o r p r o m o t e p r o d u c t s d e r i v e d f r o m
/ / this s o f t w a r e w i t h o u t s p e c i f i c p r i o r w r i t t e n p e r m i s s i o n .
/ /
/ /
/ / THIS S O F T W A R E I S P R O V I D E D B Y I N T E L C O R P O R A T I O N " " A S I S " " A N D A N Y
/ / EXPRESS O R I M P L I E D W A R R A N T I E S , I N C L U D I N G , B U T N O T L I M I T E D T O , T H E
/ / IMPLIED W A R R A N T I E S O F M E R C H A N T A B I L I T Y A N D F I T N E S S F O R A P A R T I C U L A R
/ / PURPOSE A R E D I S C L A I M E D . I N N O E V E N T S H A L L I N T E L C O R P O R A T I O N O R
/ / CONTRIBUTORS B E L I A B L E F O R A N Y D I R E C T , I N D I R E C T , I N C I D E N T A L , S P E C I A L ,
/ / EXEMPLARY, O R C O N S E Q U E N T I A L D A M A G E S ( I N C L U D I N G , B U T N O T L I M I T E D T O ,
/ / PROCUREMENT O F S U B S T I T U T E G O O D S O R S E R V I C E S ; LOSS OF USE, DATA, OR
/ / PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
/ / LIABILITY, W H E T H E R I N C O N T R A C T , S T R I C T L I A B I L I T Y , O R T O R T ( I N C L U D I N G
/ / NEGLIGENCE O R O T H E R W I S E ) A R I S I N G I N A N Y W A Y O U T O F T H E U S E O F T H I S
/ / SOFTWARE, E V E N I F A D V I S E D O F T H E P O S S I B I L I T Y O F S U C H D A M A G E .
/ /
/ / Function A P I :
/ / UINT1 6 c r c _ t 1 0 d i f _ p c l (
/ / UINT1 6 i n i t _ c r c , / / i n i t i a l C R C v a l u e , 1 6 b i t s
/ / const u n s i g n e d c h a r * b u f , / / b u f f e r p o i n t e r t o c a l c u l a t e C R C o n
/ / UINT6 4 l e n / / b u f f e r l e n g t h i n b y t e s ( 6 4 - b i t d a t a )
/ / ) ;
/ /
/ / Reference p a p e r t i t l e d " F a s t C R C C o m p u t a t i o n f o r G e n e r i c
/ / Polynomials U s i n g P C L M U L Q D Q I n s t r u c t i o n "
/ / URL : http : / / www. i n t e l . c o m / c o n t e n t / d a m / w w w / p u b l i c / u s / e n / d o c u m e n t s
/ / / white- p a p e r s / f a s t - c r c - c o m p u t a t i o n - g e n e r i c - p o l y n o m i a l s - p c l m u l q d q - p a p e r . p d f
/ /
/ /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
.text
.cpu generic+ c r y p t o
2018-04-30 19:18:28 +03:00
arg1 _ l o w32 . r e q w19
arg2 . r e q x20
arg3 . r e q x21
2016-12-05 21:42:25 +03:00
vzr . r e q v13
ENTRY( c r c _ t 1 0 d i f _ p m u l l )
2018-04-30 19:18:28 +03:00
frame_ p u s h 3 , 1 2 8
mov a r g 1 _ l o w32 , w0
mov a r g 2 , x1
mov a r g 3 , x2
2016-12-05 21:42:25 +03:00
movi v z r . 1 6 b , #0 / / i n i t z e r o r e g i s t e r
/ / adjust t h e 1 6 - b i t i n i t i a l _ c r c v a l u e , s c a l e i t t o 3 2 b i t s
lsl a r g 1 _ l o w32 , a r g 1 _ l o w32 , #16
/ / check i f s m a l l e r t h a n 2 5 6
cmp a r g 3 , #256
/ / for s i z e s l e s s t h a n 1 2 8 , w e c a n ' t f o l d 6 4 B a t a t i m e . . .
b. l t _ l e s s _ t h a n _ 1 2 8
/ / load t h e i n i t i a l c r c v a l u e
/ / crc v a l u e d o e s n o t n e e d t o b e b y t e - r e f l e c t e d , b u t i t n e e d s
/ / to b e m o v e d t o t h e h i g h p a r t o f t h e r e g i s t e r .
/ / because d a t a w i l l b e b y t e - r e f l e c t e d a n d w i l l a l i g n w i t h
/ / initial c r c a t c o r r e c t p l a c e .
movi v10 . 1 6 b , #0
mov v10 . s [ 3 ] , a r g 1 _ l o w32 / / i n i t i a l c r c
/ / receive t h e i n i t i a l 6 4 B d a t a , x o r t h e i n i t i a l c r c v a l u e
ldp q0 , q1 , [ a r g 2 ]
ldp q2 , q3 , [ a r g 2 , #0x20 ]
ldp q4 , q5 , [ a r g 2 , #0x40 ]
ldp q6 , q7 , [ a r g 2 , #0x60 ]
add a r g 2 , a r g 2 , #0x80
CPU_ L E ( r e v64 v0 . 1 6 b , v0 . 1 6 b )
CPU_ L E ( r e v64 v1 . 1 6 b , v1 . 1 6 b )
CPU_ L E ( r e v64 v2 . 1 6 b , v2 . 1 6 b )
CPU_ L E ( r e v64 v3 . 1 6 b , v3 . 1 6 b )
CPU_ L E ( r e v64 v4 . 1 6 b , v4 . 1 6 b )
CPU_ L E ( r e v64 v5 . 1 6 b , v5 . 1 6 b )
CPU_ L E ( r e v64 v6 . 1 6 b , v6 . 1 6 b )
CPU_ L E ( r e v64 v7 . 1 6 b , v7 . 1 6 b )
CPU_ L E ( e x t v0 . 1 6 b , v0 . 1 6 b , v0 . 1 6 b , #8 )
CPU_ L E ( e x t v1 . 1 6 b , v1 . 1 6 b , v1 . 1 6 b , #8 )
CPU_ L E ( e x t v2 . 1 6 b , v2 . 1 6 b , v2 . 1 6 b , #8 )
CPU_ L E ( e x t v3 . 1 6 b , v3 . 1 6 b , v3 . 1 6 b , #8 )
CPU_ L E ( e x t v4 . 1 6 b , v4 . 1 6 b , v4 . 1 6 b , #8 )
CPU_ L E ( e x t v5 . 1 6 b , v5 . 1 6 b , v5 . 1 6 b , #8 )
CPU_ L E ( e x t v6 . 1 6 b , v6 . 1 6 b , v6 . 1 6 b , #8 )
CPU_ L E ( e x t v7 . 1 6 b , v7 . 1 6 b , v7 . 1 6 b , #8 )
/ / XOR t h e i n i t i a l _ c r c v a l u e
eor v0 . 1 6 b , v0 . 1 6 b , v10 . 1 6 b
2018-01-10 15:11:40 +03:00
ldr_ l q10 , r k 3 , x8 / / x m m 1 0 h a s r k 3 a n d r k 4
2016-12-05 21:42:25 +03:00
/ / type o f p m u l l i n s t r u c t i o n
/ / will d e t e r m i n e w h i c h c o n s t a n t t o u s e
/ /
/ / we s u b t r a c t 2 5 6 i n s t e a d o f 1 2 8 t o s a v e o n e i n s t r u c t i o n f r o m t h e l o o p
/ /
sub a r g 3 , a r g 3 , #256
/ / at t h i s s e c t i o n o f t h e c o d e , t h e r e i s 6 4 * x + y ( 0 < =y < 6 4 ) b y t e s o f
/ / buffer. T h e _ f o l d _ 6 4 _ B _ l o o p w i l l f o l d 6 4 B a t a t i m e
/ / until w e h a v e 6 4 + y B y t e s o f b u f f e r
/ / fold 6 4 B a t a t i m e . T h i s s e c t i o n o f t h e c o d e f o l d s 4 v e c t o r
/ / registers i n p a r a l l e l
_fold_64_B_loop :
.macro fold6 4 , r e g 1 , r e g 2
ldp q11 , q12 , [ a r g 2 ] , #0x20
pmull2 v8 . 1 q , \ r e g 1 \ ( ) . 2 d , v10 . 2 d
pmull \ r e g 1 \ ( ) . 1 q , \ r e g 1 \ ( ) . 1 d , v10 . 1 d
CPU_ L E ( r e v64 v11 . 1 6 b , v11 . 1 6 b )
CPU_ L E ( r e v64 v12 . 1 6 b , v12 . 1 6 b )
pmull2 v9 . 1 q , \ r e g 2 \ ( ) . 2 d , v10 . 2 d
pmull \ r e g 2 \ ( ) . 1 q , \ r e g 2 \ ( ) . 1 d , v10 . 1 d
CPU_ L E ( e x t v11 . 1 6 b , v11 . 1 6 b , v11 . 1 6 b , #8 )
CPU_ L E ( e x t v12 . 1 6 b , v12 . 1 6 b , v12 . 1 6 b , #8 )
eor \ r e g 1 \ ( ) . 1 6 b , \ r e g 1 \ ( ) . 1 6 b , v8 . 1 6 b
eor \ r e g 2 \ ( ) . 1 6 b , \ r e g 2 \ ( ) . 1 6 b , v9 . 1 6 b
eor \ r e g 1 \ ( ) . 1 6 b , \ r e g 1 \ ( ) . 1 6 b , v11 . 1 6 b
eor \ r e g 2 \ ( ) . 1 6 b , \ r e g 2 \ ( ) . 1 6 b , v12 . 1 6 b
.endm
fold6 4 v0 , v1
fold6 4 v2 , v3
fold6 4 v4 , v5
fold6 4 v6 , v7
subs a r g 3 , a r g 3 , #128
/ / check i f t h e r e i s a n o t h e r 6 4 B i n t h e b u f f e r t o b e a b l e t o f o l d
2018-04-30 19:18:28 +03:00
b. l t _ f o l d _ 6 4 _ B _ e n d
if_ w i l l _ c o n d _ y i e l d _ n e o n
stp q0 , q1 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t ]
stp q2 , q3 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t + 32 ]
stp q4 , q5 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t + 64 ]
stp q6 , q7 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t + 96 ]
do_ c o n d _ y i e l d _ n e o n
ldp q0 , q1 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t ]
ldp q2 , q3 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t + 32 ]
ldp q4 , q5 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t + 64 ]
ldp q6 , q7 , [ s p , #. L f r a m e _ l o c a l _ o f f s e t + 96 ]
ldr_ l q10 , r k 3 , x8
movi v z r . 1 6 b , #0 / / i n i t z e r o r e g i s t e r
endif_ y i e l d _ n e o n
b _ f o l d _ 6 4 _ B _ l o o p
2016-12-05 21:42:25 +03:00
2018-04-30 19:18:28 +03:00
_fold_64_B_end :
2016-12-05 21:42:25 +03:00
/ / at t h i s p o i n t , t h e b u f f e r p o i n t e r i s p o i n t i n g a t t h e l a s t y B y t e s
/ / of t h e b u f f e r t h e 6 4 B o f f o l d e d d a t a i s i n 4 o f t h e v e c t o r
/ / registers : v0 , v1 , v2 , v3
/ / fold t h e 8 v e c t o r r e g i s t e r s t o 1 v e c t o r r e g i s t e r w i t h d i f f e r e n t
/ / constants
2018-01-10 15:11:40 +03:00
ldr_ l q10 , r k 9 , x8
2016-12-05 21:42:25 +03:00
.macro fold1 6 , r e g , r k
pmull v8 . 1 q , \ r e g \ ( ) . 1 d , v10 . 1 d
pmull2 \ r e g \ ( ) . 1 q , \ r e g \ ( ) . 2 d , v10 . 2 d
.ifnb \ rk
2018-01-10 15:11:40 +03:00
ldr_ l q10 , \ r k , x8
2016-12-05 21:42:25 +03:00
.endif
eor v7 . 1 6 b , v7 . 1 6 b , v8 . 1 6 b
eor v7 . 1 6 b , v7 . 1 6 b , \ r e g \ ( ) . 1 6 b
.endm
fold1 6 v0 , r k 1 1
fold1 6 v1 , r k 1 3
fold1 6 v2 , r k 1 5
fold1 6 v3 , r k 1 7
fold1 6 v4 , r k 1 9
fold1 6 v5 , r k 1
fold1 6 v6
/ / instead o f 6 4 , w e a d d 4 8 t o t h e l o o p c o u n t e r t o s a v e 1 i n s t r u c t i o n
/ / from t h e l o o p i n s t e a d o f a c m p i n s t r u c t i o n , w e u s e t h e n e g a t i v e
/ / flag w i t h t h e j l i n s t r u c t i o n
adds a r g 3 , a r g 3 , #( 128 - 1 6 )
b. l t _ f i n a l _ r e d u c t i o n _ f o r _ 1 2 8
/ / now w e h a v e 1 6 + y b y t e s l e f t t o r e d u c e . 1 6 B y t e s i s i n r e g i s t e r v7
/ / and t h e r e s t i s i n m e m o r y . W e c a n f o l d 1 6 b y t e s a t a t i m e i f y > =16
/ / continue f o l d i n g 1 6 B a t a t i m e
_16B_reduction_loop :
pmull v8 . 1 q , v7 . 1 d , v10 . 1 d
pmull2 v7 . 1 q , v7 . 2 d , v10 . 2 d
eor v7 . 1 6 b , v7 . 1 6 b , v8 . 1 6 b
ldr q0 , [ a r g 2 ] , #16
CPU_ L E ( r e v64 v0 . 1 6 b , v0 . 1 6 b )
CPU_ L E ( e x t v0 . 1 6 b , v0 . 1 6 b , v0 . 1 6 b , #8 )
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
subs a r g 3 , a r g 3 , #16
/ / instead o f a c m p i n s t r u c t i o n , w e u t i l i z e t h e f l a g s w i t h t h e
/ / jge i n s t r u c t i o n e q u i v a l e n t o f : c m p a r g 3 , 1 6 - 1 6
/ / check i f t h e r e i s a n y m o r e 1 6 B i n t h e b u f f e r t o b e a b l e t o f o l d
b. g e _ 1 6 B _ r e d u c t i o n _ l o o p
/ / now w e h a v e 1 6 + z b y t e s l e f t t o r e d u c e , w h e r e 0 < = z < 1 6 .
/ / first, w e r e d u c e t h e d a t a i n t h e x m m 7 r e g i s t e r
_final_reduction_for_128 :
/ / check i f a n y m o r e d a t a t o f o l d . I f n o t , c o m p u t e t h e C R C o f
/ / the f i n a l 1 2 8 b i t s
adds a r g 3 , a r g 3 , #16
b. e q _ 1 2 8 _ d o n e
/ / here w e a r e g e t t i n g d a t a t h a t i s l e s s t h a n 1 6 b y t e s .
/ / since w e k n o w t h a t t h e r e w a s d a t a b e f o r e t h e p o i n t e r , w e c a n
/ / offset t h e i n p u t p o i n t e r b e f o r e t h e a c t u a l p o i n t , t o r e c e i v e
/ / exactly 1 6 b y t e s . a f t e r t h a t t h e r e g i s t e r s n e e d t o b e a d j u s t e d .
_get_last_two_regs :
add a r g 2 , a r g 2 , a r g 3
ldr q1 , [ a r g 2 , #- 16 ]
CPU_ L E ( r e v64 v1 . 1 6 b , v1 . 1 6 b )
CPU_ L E ( e x t v1 . 1 6 b , v1 . 1 6 b , v1 . 1 6 b , #8 )
/ / get r i d o f t h e e x t r a d a t a t h a t w a s l o a d e d b e f o r e
/ / load t h e s h i f t c o n s t a n t
2018-01-10 15:11:40 +03:00
adr_ l x4 , t b l _ s h f _ t a b l e + 1 6
2016-12-05 21:42:25 +03:00
sub x4 , x4 , a r g 3
ld1 { v0 . 1 6 b } , [ x4 ]
/ / shift v2 t o t h e l e f t b y a r g 3 b y t e s
tbl v2 . 1 6 b , { v7 . 1 6 b } , v0 . 1 6 b
/ / shift v7 t o t h e r i g h t b y 1 6 - a r g 3 b y t e s
movi v9 . 1 6 b , #0x80
eor v0 . 1 6 b , v0 . 1 6 b , v9 . 1 6 b
tbl v7 . 1 6 b , { v7 . 1 6 b } , v0 . 1 6 b
/ / blend
sshr v0 . 1 6 b , v0 . 1 6 b , #7 / / c o n v e r t t o 8 - b i t m a s k
bsl v0 . 1 6 b , v2 . 1 6 b , v1 . 1 6 b
/ / fold 1 6 B y t e s
pmull v8 . 1 q , v7 . 1 d , v10 . 1 d
pmull2 v7 . 1 q , v7 . 2 d , v10 . 2 d
eor v7 . 1 6 b , v7 . 1 6 b , v8 . 1 6 b
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
_128_done :
/ / compute c r c o f a 1 2 8 - b i t v a l u e
2018-01-10 15:11:40 +03:00
ldr_ l q10 , r k 5 , x8 / / r k 5 a n d r k 6 i n x m m 1 0
2016-12-05 21:42:25 +03:00
/ / 6 4 b f o l d
ext v0 . 1 6 b , v z r . 1 6 b , v7 . 1 6 b , #8
mov v7 . d [ 0 ] , v7 . d [ 1 ]
pmull v7 . 1 q , v7 . 1 d , v10 . 1 d
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
/ / 3 2 b f o l d
ext v0 . 1 6 b , v7 . 1 6 b , v z r . 1 6 b , #4
mov v7 . s [ 3 ] , v z r . s [ 0 ]
pmull2 v0 . 1 q , v0 . 2 d , v10 . 2 d
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
/ / barrett r e d u c t i o n
_barrett :
2018-01-10 15:11:40 +03:00
ldr_ l q10 , r k 7 , x8
2016-12-05 21:42:25 +03:00
mov v0 . d [ 0 ] , v7 . d [ 1 ]
pmull v0 . 1 q , v0 . 1 d , v10 . 1 d
ext v0 . 1 6 b , v z r . 1 6 b , v0 . 1 6 b , #12
pmull2 v0 . 1 q , v0 . 2 d , v10 . 2 d
ext v0 . 1 6 b , v z r . 1 6 b , v0 . 1 6 b , #12
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
mov w0 , v7 . s [ 1 ]
_cleanup :
/ / scale t h e r e s u l t b a c k t o 1 6 b i t s
lsr x0 , x0 , #16
2018-04-30 19:18:28 +03:00
frame_ p o p
2016-12-05 21:42:25 +03:00
ret
_less_than_128 :
cbz a r g 3 , _ c l e a n u p
movi v0 . 1 6 b , #0
mov v0 . s [ 3 ] , a r g 1 _ l o w32 / / g e t t h e i n i t i a l c r c v a l u e
ldr q7 , [ a r g 2 ] , #0x10
CPU_ L E ( r e v64 v7 . 1 6 b , v7 . 1 6 b )
CPU_ L E ( e x t v7 . 1 6 b , v7 . 1 6 b , v7 . 1 6 b , #8 )
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b / / x o r t h e i n i t i a l c r c v a l u e
cmp a r g 3 , #16
b. e q _ 1 2 8 _ d o n e / / e x a c t l y 1 6 l e f t
b. l t _ l e s s _ t h a n _ 1 6 _ l e f t
2018-01-10 15:11:40 +03:00
ldr_ l q10 , r k 1 , x8 / / r k 1 a n d r k 2 i n x m m 1 0
2016-12-05 21:42:25 +03:00
/ / update t h e c o u n t e r . s u b t r a c t 3 2 i n s t e a d o f 1 6 t o s a v e o n e
/ / instruction f r o m t h e l o o p
subs a r g 3 , a r g 3 , #32
b. g e _ 1 6 B _ r e d u c t i o n _ l o o p
add a r g 3 , a r g 3 , #16
b _ g e t _ l a s t _ t w o _ r e g s
_less_than_16_left :
/ / shl r9 , 4
2018-01-10 15:11:40 +03:00
adr_ l x0 , t b l _ s h f _ t a b l e + 1 6
2016-12-05 21:42:25 +03:00
sub x0 , x0 , a r g 3
ld1 { v0 . 1 6 b } , [ x0 ]
movi v9 . 1 6 b , #0x80
eor v0 . 1 6 b , v0 . 1 6 b , v9 . 1 6 b
tbl v7 . 1 6 b , { v7 . 1 6 b } , v0 . 1 6 b
b _ 1 2 8 _ d o n e
ENDPROC( c r c _ t 1 0 d i f _ p m u l l )
/ / precomputed c o n s t a n t s
/ / these c o n s t a n t s a r e p r e c o m p u t e d f r o m t h e p o l y :
/ / 0 x8 b b70 0 0 0 ( 0 x8 b b7 s c a l e d t o 3 2 b i t s )
2018-01-10 15:11:40 +03:00
.section " .rodata " , " a"
2016-12-05 21:42:25 +03:00
.align 4
/ / Q = 0 x18 B B 7 0 0 0 0
/ / rk1 = 2 ^ ( 3 2 * 3 ) m o d Q < < 3 2
/ / rk2 = 2 ^ ( 3 2 * 5 ) m o d Q < < 3 2
/ / rk3 = 2 ^ ( 3 2 * 1 5 ) m o d Q < < 3 2
/ / rk4 = 2 ^ ( 3 2 * 1 7 ) m o d Q < < 3 2
/ / rk5 = 2 ^ ( 3 2 * 3 ) m o d Q < < 3 2
/ / rk6 = 2 ^ ( 3 2 * 2 ) m o d Q < < 3 2
/ / rk7 = f l o o r ( 2 ^ 6 4 / Q )
/ / rk8 = Q
rk1 : .octa 0x06df0000 0 0 0 0 0 0 0 0 2 d5 6 0 0 0 0 0 0 0 0 0 0 0 0
rk3 : .octa 0x7cf50000 0 0 0 0 0 0 0 0 9 d9 d00 0 0 0 0 0 0 0 0 0 0
rk5 : .octa 0x13680000 0 0 0 0 0 0 0 0 2 d5 6 0 0 0 0 0 0 0 0 0 0 0 0
rk7 : .octa 0x00000001 8 bb7 0 0 0 0 0 0 0 0 0 0 0 1 f65 a57 f8
rk9 : .octa 0xbfd60000 0 0 0 0 0 0 0 0 ceae0 0 0 0 0 0 0 0 0 0 0 0
rk11 : .octa 0x713c0000 0 0 0 0 0 0 0 0 1 e1 6 0 0 0 0 0 0 0 0 0 0 0 0
rk13 : .octa 0x80a60000 0 0 0 0 0 0 0 0 f7 f90 0 0 0 0 0 0 0 0 0 0 0
rk15 : .octa 0xe6580000 0 0 0 0 0 0 0 0 0 4 4 c0 0 0 0 0 0 0 0 0 0 0 0
rk17 : .octa 0xa4970000 0 0 0 0 0 0 0 0 ad1 8 0 0 0 0 0 0 0 0 0 0 0 0
rk19 : .octa 0xe7b50000 0 0 0 0 0 0 0 0 6 ee3 0 0 0 0 0 0 0 0 0 0 0 0
tbl_shf_table :
/ / use t h e s e v a l u e s f o r s h i f t c o n s t a n t s f o r t h e t b l / t b x i n s t r u c t i o n
/ / different a l i g n m e n t s r e s u l t i n v a l u e s a s s h o w n :
/ / DDQ 0 x00 8 f8 e 8 d8 c8 b8 a89 8 8 8 7 8 6 8 5 8 4 8 3 8 2 8 1 # s h l 15 ( 1 6 - 1 ) / s h r1
/ / DDQ 0 x01 0 0 8 f8 e 8 d8 c8 b8 a89 8 8 8 7 8 6 8 5 8 4 8 3 8 2 # s h l 14 ( 1 6 - 3 ) / s h r2
/ / DDQ 0 x02 0 1 0 0 8 f8 e 8 d8 c8 b8 a89 8 8 8 7 8 6 8 5 8 4 8 3 # s h l 13 ( 1 6 - 4 ) / s h r3
/ / DDQ 0 x03 0 2 0 1 0 0 8 f8 e 8 d8 c8 b8 a89 8 8 8 7 8 6 8 5 8 4 # s h l 12 ( 1 6 - 4 ) / s h r4
/ / DDQ 0 x04 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c8 b8 a89 8 8 8 7 8 6 8 5 # s h l 11 ( 1 6 - 5 ) / s h r5
/ / DDQ 0 x05 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c8 b8 a89 8 8 8 7 8 6 # s h l 10 ( 1 6 - 6 ) / s h r6
/ / DDQ 0 x06 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c8 b8 a89 8 8 8 7 # s h l 9 ( 1 6 - 7 ) / s h r7
/ / DDQ 0 x07 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c8 b8 a89 8 8 # s h l 8 ( 1 6 - 8 ) / s h r8
/ / DDQ 0 x08 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c8 b8 a89 # s h l 7 ( 1 6 - 9 ) / s h r9
/ / DDQ 0 x09 0 8 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c8 b8 a # s h l 6 ( 1 6 - 1 0 ) / s h r10
/ / DDQ 0 x0 a09 0 8 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c8 b # s h l 5 ( 1 6 - 1 1 ) / s h r11
/ / DDQ 0 x0 b0 a09 0 8 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d8 c # s h l 4 ( 1 6 - 1 2 ) / s h r12
/ / DDQ 0 x0 c0 b0 a09 0 8 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e 8 d # s h l 3 ( 1 6 - 1 3 ) / s h r13
/ / DDQ 0 x0 d0 c0 b0 a09 0 8 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f8 e # s h l 2 ( 1 6 - 1 4 ) / s h r14
/ / DDQ 0 x0 e 0 d0 c0 b0 a09 0 8 0 7 0 6 0 5 0 4 0 3 0 2 0 1 0 0 8 f # s h l 1 ( 1 6 - 1 5 ) / s h r15
.byte 0 x0 , 0 x81 , 0 x82 , 0 x83 , 0 x84 , 0 x85 , 0 x86 , 0 x87
.byte 0 x8 8 , 0 x89 , 0 x8 a , 0 x8 b , 0 x8 c , 0 x8 d , 0 x8 e , 0 x8 f
.byte 0 x0 , 0 x1 , 0 x2 , 0 x3 , 0 x4 , 0 x5 , 0 x6 , 0 x7
.byte 0 x8 , 0 x9 , 0 x a , 0 x b , 0 x c , 0 x d , 0 x e , 0 x0