2016-12-05 21:42:25 +03:00
/ /
/ / Accelerated C R C - T 1 0 D I F u s i n g a r m 6 4 N E O N a n d C r y p t o E x t e n s i o n s i n s t r u c t i o n s
/ /
/ / Copyright ( C ) 2 0 1 6 L i n a r o L t d < a r d . b i e s h e u v e l @linaro.org>
2019-01-31 07:42:42 +03:00
/ / Copyright ( C ) 2 0 1 9 G o o g l e L L C < e b i g g e r s @google.com>
2016-12-05 21:42:25 +03:00
/ /
/ / This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify
/ / it u n d e r t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s
/ / published b y t h e F r e e S o f t w a r e F o u n d a t i o n .
/ /
2019-01-31 07:42:42 +03:00
/ / Derived f r o m t h e x86 v e r s i o n :
2016-12-05 21:42:25 +03:00
/ /
/ / Implement f a s t C R C - T 1 0 D I F c o m p u t a t i o n w i t h S S E a n d P C L M U L Q D Q i n s t r u c t i o n s
/ /
/ / Copyright ( c ) 2 0 1 3 , I n t e l C o r p o r a t i o n
/ /
/ / Authors :
/ / Erdinc O z t u r k < e r d i n c . o z t u r k @intel.com>
/ / Vinodh G o p a l < v i n o d h . g o p a l @intel.com>
/ / James G u i l f o r d < j a m e s . g u i l f o r d @intel.com>
/ / Tim C h e n < t i m . c . c h e n @linux.intel.com>
/ /
/ / This s o f t w a r e i s a v a i l a b l e t o y o u u n d e r a c h o i c e o f o n e o f t w o
/ / licenses. Y o u m a y c h o o s e t o b e l i c e n s e d u n d e r t h e t e r m s o f t h e G N U
/ / General P u b l i c L i c e n s e ( G P L ) V e r s i o n 2 , a v a i l a b l e f r o m t h e f i l e
/ / COPYING i n t h e m a i n d i r e c t o r y o f t h i s s o u r c e t r e e , o r t h e
/ / OpenIB. o r g B S D l i c e n s e b e l o w :
/ /
/ / Redistribution a n d u s e i n s o u r c e a n d b i n a r y f o r m s , w i t h o r w i t h o u t
/ / modification, a r e p e r m i t t e d p r o v i d e d t h a t t h e f o l l o w i n g c o n d i t i o n s a r e
/ / met :
/ /
/ / * Redistributions o f s o u r c e c o d e m u s t r e t a i n t h e a b o v e c o p y r i g h t
/ / notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r .
/ /
/ / * Redistributions i n b i n a r y f o r m m u s t r e p r o d u c e t h e a b o v e c o p y r i g h t
/ / notice, t h i s l i s t o f c o n d i t i o n s a n d t h e f o l l o w i n g d i s c l a i m e r i n t h e
/ / documentation a n d / o r o t h e r m a t e r i a l s p r o v i d e d w i t h t h e
/ / distribution.
/ /
/ / * Neither t h e n a m e o f t h e I n t e l C o r p o r a t i o n n o r t h e n a m e s o f i t s
/ / contributors m a y b e u s e d t o e n d o r s e o r p r o m o t e p r o d u c t s d e r i v e d f r o m
/ / this s o f t w a r e w i t h o u t s p e c i f i c p r i o r w r i t t e n p e r m i s s i o n .
/ /
/ /
/ / THIS S O F T W A R E I S P R O V I D E D B Y I N T E L C O R P O R A T I O N " " A S I S " " A N D A N Y
/ / EXPRESS O R I M P L I E D W A R R A N T I E S , I N C L U D I N G , B U T N O T L I M I T E D T O , T H E
/ / IMPLIED W A R R A N T I E S O F M E R C H A N T A B I L I T Y A N D F I T N E S S F O R A P A R T I C U L A R
/ / PURPOSE A R E D I S C L A I M E D . I N N O E V E N T S H A L L I N T E L C O R P O R A T I O N O R
/ / CONTRIBUTORS B E L I A B L E F O R A N Y D I R E C T , I N D I R E C T , I N C I D E N T A L , S P E C I A L ,
/ / EXEMPLARY, O R C O N S E Q U E N T I A L D A M A G E S ( I N C L U D I N G , B U T N O T L I M I T E D T O ,
/ / PROCUREMENT O F S U B S T I T U T E G O O D S O R S E R V I C E S ; LOSS OF USE, DATA, OR
/ / PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
/ / LIABILITY, W H E T H E R I N C O N T R A C T , S T R I C T L I A B I L I T Y , O R T O R T ( I N C L U D I N G
/ / NEGLIGENCE O R O T H E R W I S E ) A R I S I N G I N A N Y W A Y O U T O F T H E U S E O F T H I S
/ / SOFTWARE, E V E N I F A D V I S E D O F T H E P O S S I B I L I T Y O F S U C H D A M A G E .
/ /
/ / Reference p a p e r t i t l e d " F a s t C R C C o m p u t a t i o n f o r G e n e r i c
/ / Polynomials U s i n g P C L M U L Q D Q I n s t r u c t i o n "
/ / URL : http : / / www. i n t e l . c o m / c o n t e n t / d a m / w w w / p u b l i c / u s / e n / d o c u m e n t s
/ / / white- p a p e r s / f a s t - c r c - c o m p u t a t i o n - g e n e r i c - p o l y n o m i a l s - p c l m u l q d q - p a p e r . p d f
/ /
# include < l i n u x / l i n k a g e . h >
# include < a s m / a s s e m b l e r . h >
.text
2020-04-14 21:20:08 +03:00
.arch armv8 - a + c r y p t o
2016-12-05 21:42:25 +03:00
2021-02-03 14:36:25 +03:00
init_ c r c . r e q w0
buf . r e q x1
len . r e q x2
fold_ c o n s t s _ p t r . r e q x3
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
fold_ c o n s t s . r e q v10
2016-12-05 21:42:25 +03:00
2018-08-27 18:38:12 +03:00
ad . r e q v14
k0 0 _ 1 6 . r e q v15
k3 2 _ 4 8 . r e q v16
t3 . r e q v17
t4 . r e q v18
t5 . r e q v19
t6 . r e q v20
t7 . r e q v21
t8 . r e q v22
t9 . r e q v23
perm1 . r e q v24
perm2 . r e q v25
perm3 . r e q v26
perm4 . r e q v27
bd1 . r e q v28
bd2 . r e q v29
bd3 . r e q v30
bd4 . r e q v31
.macro __pmull_init_p64
.endm
.macro _ _ pmull_ p r e _ p64 , b d
.endm
.macro __pmull_init_p8
/ / k0 0 _ 1 6 : = 0 x00 0 0 0 0 0 0 0 0 0 0 0 0 0 0 _ 0 0 0 0 0 0 0 0 0 0 0 0 f f f f
/ / k3 2 _ 4 8 : = 0 x00 0 0 0 0 0 0 f f f f f f f f _ 0 0 0 0 f f f f f f f f f f f f
movi k 3 2 _ 4 8 . 2 d , #0xffffffff
mov k 3 2 _ 4 8 . h [ 2 ] , k 3 2 _ 4 8 . h [ 0 ]
ushr k 0 0 _ 1 6 . 2 d , k 3 2 _ 4 8 . 2 d , #32
/ / prepare t h e p e r m u t a t i o n v e c t o r s
mov_ q x5 , 0 x08 0 f0 e 0 d0 c0 b0 a09
movi p e r m 4 . 8 b , #8
dup p e r m 1 . 2 d , x5
eor p e r m 1 . 1 6 b , p e r m 1 . 1 6 b , p e r m 4 . 1 6 b
ushr p e r m 2 . 2 d , p e r m 1 . 2 d , #8
ushr p e r m 3 . 2 d , p e r m 1 . 2 d , #16
ushr p e r m 4 . 2 d , p e r m 1 . 2 d , #24
sli p e r m 2 . 2 d , p e r m 1 . 2 d , #56
sli p e r m 3 . 2 d , p e r m 1 . 2 d , #48
sli p e r m 4 . 2 d , p e r m 1 . 2 d , #40
.endm
.macro _ _ pmull_ p r e _ p8 , b d
tbl b d1 . 1 6 b , { \ b d \ ( ) . 1 6 b } , p e r m 1 . 1 6 b
tbl b d2 . 1 6 b , { \ b d \ ( ) . 1 6 b } , p e r m 2 . 1 6 b
tbl b d3 . 1 6 b , { \ b d \ ( ) . 1 6 b } , p e r m 3 . 1 6 b
tbl b d4 . 1 6 b , { \ b d \ ( ) . 1 6 b } , p e r m 4 . 1 6 b
.endm
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T _ L O C A L ( _ _ p m u l l _ p8 _ c o r e )
2018-08-27 18:38:12 +03:00
.L__pmull_p8_core :
ext t 4 . 8 b , a d . 8 b , a d . 8 b , #1 / / A 1
ext t 5 . 8 b , a d . 8 b , a d . 8 b , #2 / / A 2
ext t 6 . 8 b , a d . 8 b , a d . 8 b , #3 / / A 3
2019-01-31 07:42:42 +03:00
pmull t 4 . 8 h , t 4 . 8 b , f o l d _ c o n s t s . 8 b / / F = A 1 * B
2018-08-27 18:38:12 +03:00
pmull t 8 . 8 h , a d . 8 b , b d1 . 8 b / / E = A * B 1
2019-01-31 07:42:42 +03:00
pmull t 5 . 8 h , t 5 . 8 b , f o l d _ c o n s t s . 8 b / / H = A 2 * B
2018-08-27 18:38:12 +03:00
pmull t 7 . 8 h , a d . 8 b , b d2 . 8 b / / G = A * B 2
2019-01-31 07:42:42 +03:00
pmull t 6 . 8 h , t 6 . 8 b , f o l d _ c o n s t s . 8 b / / J = A 3 * B
2018-08-27 18:38:12 +03:00
pmull t 9 . 8 h , a d . 8 b , b d3 . 8 b / / I = A * B 3
pmull t 3 . 8 h , a d . 8 b , b d4 . 8 b / / K = A * B 4
b 0 f
.L__pmull_p8_core2 :
tbl t 4 . 1 6 b , { a d . 1 6 b } , p e r m 1 . 1 6 b / / A 1
tbl t 5 . 1 6 b , { a d . 1 6 b } , p e r m 2 . 1 6 b / / A 2
tbl t 6 . 1 6 b , { a d . 1 6 b } , p e r m 3 . 1 6 b / / A 3
2019-01-31 07:42:42 +03:00
pmull2 t 4 . 8 h , t 4 . 1 6 b , f o l d _ c o n s t s . 1 6 b / / F = A 1 * B
2018-08-27 18:38:12 +03:00
pmull2 t 8 . 8 h , a d . 1 6 b , b d1 . 1 6 b / / E = A * B 1
2019-01-31 07:42:42 +03:00
pmull2 t 5 . 8 h , t 5 . 1 6 b , f o l d _ c o n s t s . 1 6 b / / H = A 2 * B
2018-08-27 18:38:12 +03:00
pmull2 t 7 . 8 h , a d . 1 6 b , b d2 . 1 6 b / / G = A * B 2
2019-01-31 07:42:42 +03:00
pmull2 t 6 . 8 h , t 6 . 1 6 b , f o l d _ c o n s t s . 1 6 b / / J = A 3 * B
2018-08-27 18:38:12 +03:00
pmull2 t 9 . 8 h , a d . 1 6 b , b d3 . 1 6 b / / I = A * B 3
pmull2 t 3 . 8 h , a d . 1 6 b , b d4 . 1 6 b / / K = A * B 4
0 : eor t 4 . 1 6 b , t 4 . 1 6 b , t 8 . 1 6 b / / L = E + F
eor t 5 . 1 6 b , t 5 . 1 6 b , t 7 . 1 6 b / / M = G + H
eor t 6 . 1 6 b , t 6 . 1 6 b , t 9 . 1 6 b / / N = I + J
uzp1 t 8 . 2 d , t 4 . 2 d , t 5 . 2 d
uzp2 t 4 . 2 d , t 4 . 2 d , t 5 . 2 d
uzp1 t 7 . 2 d , t 6 . 2 d , t 3 . 2 d
uzp2 t 6 . 2 d , t 6 . 2 d , t 3 . 2 d
/ / t4 = ( L ) ( P 0 + P 1 ) < < 8
/ / t5 = ( M ) ( P 2 + P 3 ) < < 1 6
eor t 8 . 1 6 b , t 8 . 1 6 b , t 4 . 1 6 b
and t 4 . 1 6 b , t 4 . 1 6 b , k 3 2 _ 4 8 . 1 6 b
/ / t6 = ( N ) ( P 4 + P 5 ) < < 2 4
/ / t7 = ( K ) ( P 6 + P 7 ) < < 3 2
eor t 7 . 1 6 b , t 7 . 1 6 b , t 6 . 1 6 b
and t 6 . 1 6 b , t 6 . 1 6 b , k 0 0 _ 1 6 . 1 6 b
eor t 8 . 1 6 b , t 8 . 1 6 b , t 4 . 1 6 b
eor t 7 . 1 6 b , t 7 . 1 6 b , t 6 . 1 6 b
zip2 t 5 . 2 d , t 8 . 2 d , t 4 . 2 d
zip1 t 4 . 2 d , t 8 . 2 d , t 4 . 2 d
zip2 t 3 . 2 d , t 7 . 2 d , t 6 . 2 d
zip1 t 6 . 2 d , t 7 . 2 d , t 6 . 2 d
ext t 4 . 1 6 b , t 4 . 1 6 b , t 4 . 1 6 b , #15
ext t 5 . 1 6 b , t 5 . 1 6 b , t 5 . 1 6 b , #14
ext t 6 . 1 6 b , t 6 . 1 6 b , t 6 . 1 6 b , #13
ext t 3 . 1 6 b , t 3 . 1 6 b , t 3 . 1 6 b , #12
eor t 4 . 1 6 b , t 4 . 1 6 b , t 5 . 1 6 b
eor t 6 . 1 6 b , t 6 . 1 6 b , t 3 . 1 6 b
ret
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( _ _ p m u l l _ p8 _ c o r e )
2018-08-27 18:38:12 +03:00
.macro _ _ pmull_ p8 , r q , a d , b d , i
2019-01-31 07:42:42 +03:00
.ifnc \ bd, f o l d _ c o n s t s
2018-08-27 18:38:12 +03:00
.err
.endif
mov a d . 1 6 b , \ a d \ ( ) . 1 6 b
.ifb \ i
2019-01-31 07:42:42 +03:00
pmull \ r q \ ( ) . 8 h , \ a d \ ( ) . 8 b , \ b d \ ( ) . 8 b / / D = A * B
2018-08-27 18:38:12 +03:00
.else
2019-01-31 07:42:42 +03:00
pmull2 \ r q \ ( ) . 8 h , \ a d \ ( ) . 1 6 b , \ b d \ ( ) . 1 6 b / / D = A * B
2018-08-27 18:38:12 +03:00
.endif
bl . L _ _ p m u l l _ p8 _ c o r e \ i
eor \ r q \ ( ) . 1 6 b , \ r q \ ( ) . 1 6 b , t 4 . 1 6 b
eor \ r q \ ( ) . 1 6 b , \ r q \ ( ) . 1 6 b , t 6 . 1 6 b
.endm
2019-01-31 07:42:42 +03:00
/ / Fold r e g 1 , r e g 2 i n t o t h e n e x t 3 2 d a t a b y t e s , s t o r i n g t h e r e s u l t b a c k
/ / into r e g 1 , r e g 2 .
.macro fold_ 3 2 _ b y t e s , p , r e g 1 , r e g 2
ldp q11 , q12 , [ b u f ] , #0x20
2018-08-27 18:38:11 +03:00
2019-01-31 07:42:42 +03:00
_ _ pmull_ \ p v8 , \ r e g 1 , f o l d _ c o n s t s , 2
_ _ pmull_ \ p \ r e g 1 , \ r e g 1 , f o l d _ c o n s t s
2018-08-27 18:38:11 +03:00
CPU_ L E ( r e v64 v11 . 1 6 b , v11 . 1 6 b )
CPU_ L E ( r e v64 v12 . 1 6 b , v12 . 1 6 b )
2019-01-31 07:42:42 +03:00
_ _ pmull_ \ p v9 , \ r e g 2 , f o l d _ c o n s t s , 2
_ _ pmull_ \ p \ r e g 2 , \ r e g 2 , f o l d _ c o n s t s
2018-08-27 18:38:11 +03:00
CPU_ L E ( e x t v11 . 1 6 b , v11 . 1 6 b , v11 . 1 6 b , #8 )
CPU_ L E ( e x t v12 . 1 6 b , v12 . 1 6 b , v12 . 1 6 b , #8 )
eor \ r e g 1 \ ( ) . 1 6 b , \ r e g 1 \ ( ) . 1 6 b , v8 . 1 6 b
eor \ r e g 2 \ ( ) . 1 6 b , \ r e g 2 \ ( ) . 1 6 b , v9 . 1 6 b
eor \ r e g 1 \ ( ) . 1 6 b , \ r e g 1 \ ( ) . 1 6 b , v11 . 1 6 b
eor \ r e g 2 \ ( ) . 1 6 b , \ r e g 2 \ ( ) . 1 6 b , v12 . 1 6 b
.endm
2019-01-31 07:42:42 +03:00
/ / Fold s r c _ r e g i n t o d s t _ r e g , o p t i o n a l l y l o a d i n g t h e n e x t f o l d c o n s t a n t s
.macro fold_ 1 6 _ b y t e s , p , s r c _ r e g , d s t _ r e g , l o a d _ n e x t _ c o n s t s
_ _ pmull_ \ p v8 , \ s r c _ r e g , f o l d _ c o n s t s
_ _ pmull_ \ p \ s r c _ r e g , \ s r c _ r e g , f o l d _ c o n s t s , 2
.ifnb \ load_ n e x t _ c o n s t s
ld1 { f o l d _ c o n s t s . 2 d } , [ f o l d _ c o n s t s _ p t r ] , #16
_ _ pmull_ p r e _ \ p f o l d _ c o n s t s
2018-08-27 18:38:11 +03:00
.endif
2019-01-31 07:42:42 +03:00
eor \ d s t _ r e g \ ( ) . 1 6 b , \ d s t _ r e g \ ( ) . 1 6 b , v8 . 1 6 b
eor \ d s t _ r e g \ ( ) . 1 6 b , \ d s t _ r e g \ ( ) . 1 6 b , \ s r c _ r e g \ ( ) . 1 6 b
2018-08-27 18:38:11 +03:00
.endm
.macro _ _ pmull_ p64 , r d , r n , r m , n
.ifb \ n
pmull \ r d \ ( ) . 1 q , \ r n \ ( ) . 1 d , \ r m \ ( ) . 1 d
.else
pmull2 \ r d \ ( ) . 1 q , \ r n \ ( ) . 2 d , \ r m \ ( ) . 2 d
.endif
.endm
.macro crc_ t 1 0 d i f _ p m u l l , p
2018-08-27 18:38:12 +03:00
_ _ pmull_ i n i t _ \ p
2019-01-31 07:42:42 +03:00
/ / For s i z e s l e s s t h a n 2 5 6 b y t e s , w e c a n ' t f o l d 1 2 8 b y t e s a t a t i m e .
cmp l e n , #256
b. l t . L l e s s _ t h a n _ 2 5 6 _ b y t e s _ \ @
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
adr_ l f o l d _ c o n s t s _ p t r , . L f o l d _ a c r o s s _ 1 2 8 _ b y t e s _ c o n s t s
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / Load t h e f i r s t 1 2 8 d a t a b y t e s . B y t e s w a p p i n g i s n e c e s s a r y t o m a k e
/ / the b i t o r d e r m a t c h t h e p o l y n o m i a l c o e f f i c i e n t o r d e r .
ldp q0 , q1 , [ b u f ]
ldp q2 , q3 , [ b u f , #0x20 ]
ldp q4 , q5 , [ b u f , #0x40 ]
ldp q6 , q7 , [ b u f , #0x60 ]
add b u f , b u f , #0x80
2016-12-05 21:42:25 +03:00
CPU_ L E ( r e v64 v0 . 1 6 b , v0 . 1 6 b )
CPU_ L E ( r e v64 v1 . 1 6 b , v1 . 1 6 b )
CPU_ L E ( r e v64 v2 . 1 6 b , v2 . 1 6 b )
CPU_ L E ( r e v64 v3 . 1 6 b , v3 . 1 6 b )
CPU_ L E ( r e v64 v4 . 1 6 b , v4 . 1 6 b )
CPU_ L E ( r e v64 v5 . 1 6 b , v5 . 1 6 b )
CPU_ L E ( r e v64 v6 . 1 6 b , v6 . 1 6 b )
CPU_ L E ( r e v64 v7 . 1 6 b , v7 . 1 6 b )
CPU_ L E ( e x t v0 . 1 6 b , v0 . 1 6 b , v0 . 1 6 b , #8 )
CPU_ L E ( e x t v1 . 1 6 b , v1 . 1 6 b , v1 . 1 6 b , #8 )
CPU_ L E ( e x t v2 . 1 6 b , v2 . 1 6 b , v2 . 1 6 b , #8 )
CPU_ L E ( e x t v3 . 1 6 b , v3 . 1 6 b , v3 . 1 6 b , #8 )
CPU_ L E ( e x t v4 . 1 6 b , v4 . 1 6 b , v4 . 1 6 b , #8 )
CPU_ L E ( e x t v5 . 1 6 b , v5 . 1 6 b , v5 . 1 6 b , #8 )
CPU_ L E ( e x t v6 . 1 6 b , v6 . 1 6 b , v6 . 1 6 b , #8 )
CPU_ L E ( e x t v7 . 1 6 b , v7 . 1 6 b , v7 . 1 6 b , #8 )
2019-01-31 07:42:42 +03:00
/ / XOR t h e f i r s t 1 6 d a t a * b i t s * w i t h t h e i n i t i a l C R C v a l u e .
movi v8 . 1 6 b , #0
mov v8 . h [ 7 ] , i n i t _ c r c
eor v0 . 1 6 b , v0 . 1 6 b , v8 . 1 6 b
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / Load t h e c o n s t a n t s f o r f o l d i n g a c r o s s 1 2 8 b y t e s .
ld1 { f o l d _ c o n s t s . 2 d } , [ f o l d _ c o n s t s _ p t r ]
_ _ pmull_ p r e _ \ p f o l d _ c o n s t s
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / Subtract 1 2 8 f o r t h e 1 2 8 d a t a b y t e s j u s t c o n s u m e d . S u b t r a c t a n o t h e r
/ / 1 2 8 to s i m p l i f y t h e t e r m i n a t i o n c o n d i t i o n o f t h e f o l l o w i n g l o o p .
sub l e n , l e n , #256
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / While > = 1 2 8 d a t a b y t e s r e m a i n ( n o t c o u n t i n g v0 - v7 ) , f o l d t h e 1 2 8
/ / bytes v0 - v7 i n t o t h e m , s t o r i n g t h e r e s u l t b a c k i n t o v0 - v7 .
.Lfold_128_bytes_loop_ \ @:
fold_ 3 2 _ b y t e s \ p , v0 , v1
fold_ 3 2 _ b y t e s \ p , v2 , v3
fold_ 3 2 _ b y t e s \ p , v4 , v5
fold_ 3 2 _ b y t e s \ p , v6 , v7
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
subs l e n , l e n , #128
2021-02-03 14:36:25 +03:00
b. g e . L f o l d _ 1 2 8 _ b y t e s _ l o o p _ \ @
2019-01-31 07:42:42 +03:00
/ / Now f o l d t h e 1 1 2 b y t e s i n v0 - v6 i n t o t h e 1 6 b y t e s i n v7 .
/ / Fold a c r o s s 6 4 b y t e s .
add f o l d _ c o n s t s _ p t r , f o l d _ c o n s t s _ p t r , #16
ld1 { f o l d _ c o n s t s . 2 d } , [ f o l d _ c o n s t s _ p t r ] , #16
_ _ pmull_ p r e _ \ p f o l d _ c o n s t s
fold_ 1 6 _ b y t e s \ p , v0 , v4
fold_ 1 6 _ b y t e s \ p , v1 , v5
fold_ 1 6 _ b y t e s \ p , v2 , v6
fold_ 1 6 _ b y t e s \ p , v3 , v7 , 1
/ / Fold a c r o s s 3 2 b y t e s .
fold_ 1 6 _ b y t e s \ p , v4 , v6
fold_ 1 6 _ b y t e s \ p , v5 , v7 , 1
/ / Fold a c r o s s 1 6 b y t e s .
fold_ 1 6 _ b y t e s \ p , v6 , v7
/ / Add 1 2 8 t o g e t t h e c o r r e c t n u m b e r o f d a t a b y t e s r e m a i n i n g i n 0 . . . 1 2 7
/ / ( not c o u n t i n g v7 ) , f o l l o w i n g t h e p r e v i o u s e x t r a s u b t r a c t i o n b y 1 2 8 .
/ / Then s u b t r a c t 1 6 t o s i m p l i f y t h e t e r m i n a t i o n c o n d i t i o n o f t h e
/ / following l o o p .
adds l e n , l e n , #( 128 - 1 6 )
/ / While > = 1 6 d a t a b y t e s r e m a i n ( n o t c o u n t i n g v7 ) , f o l d t h e 1 6 b y t e s v7
/ / into t h e m , s t o r i n g t h e r e s u l t b a c k i n t o v7 .
b. l t . L f o l d _ 1 6 _ b y t e s _ l o o p _ d o n e _ \ @
.Lfold_16_bytes_loop_ \ @:
_ _ pmull_ \ p v8 , v7 , f o l d _ c o n s t s
_ _ pmull_ \ p v7 , v7 , f o l d _ c o n s t s , 2
2016-12-05 21:42:25 +03:00
eor v7 . 1 6 b , v7 . 1 6 b , v8 . 1 6 b
2019-01-31 07:42:42 +03:00
ldr q0 , [ b u f ] , #16
2016-12-05 21:42:25 +03:00
CPU_ L E ( r e v64 v0 . 1 6 b , v0 . 1 6 b )
CPU_ L E ( e x t v0 . 1 6 b , v0 . 1 6 b , v0 . 1 6 b , #8 )
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
2019-01-31 07:42:42 +03:00
subs l e n , l e n , #16
b. g e . L f o l d _ 1 6 _ b y t e s _ l o o p _ \ @
.Lfold_16_bytes_loop_done_ \ @:
/ / Add 1 6 t o g e t t h e c o r r e c t n u m b e r o f d a t a b y t e s r e m a i n i n g i n 0 . . . 1 5
/ / ( not c o u n t i n g v7 ) , f o l l o w i n g t h e p r e v i o u s e x t r a s u b t r a c t i o n b y 1 6 .
adds l e n , l e n , #16
b. e q . L r e d u c e _ f i n a l _ 1 6 _ b y t e s _ \ @
.Lhandle_partial_segment_ \ @:
/ / Reduce t h e l a s t ' 1 6 + l e n ' b y t e s w h e r e 1 < = l e n < = 1 5 a n d t h e f i r s t
/ / 1 6 bytes a r e i n v7 a n d t h e r e s t a r e t h e r e m a i n i n g d a t a i n ' b u f ' . T o
/ / do t h i s w i t h o u t n e e d i n g a f o l d c o n s t a n t f o r e a c h p o s s i b l e ' l e n ' ,
/ / redivide t h e b y t e s i n t o a f i r s t c h u n k o f ' l e n ' b y t e s a n d a s e c o n d
/ / chunk o f 1 6 b y t e s , t h e n f o l d t h e f i r s t c h u n k i n t o t h e s e c o n d .
/ / v0 = l a s t 1 6 o r i g i n a l d a t a b y t e s
add b u f , b u f , l e n
ldr q0 , [ b u f , #- 16 ]
CPU_ L E ( r e v64 v0 . 1 6 b , v0 . 1 6 b )
CPU_ L E ( e x t v0 . 1 6 b , v0 . 1 6 b , v0 . 1 6 b , #8 )
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / v1 = h i g h o r d e r p a r t o f s e c o n d c h u n k : v7 l e f t - s h i f t e d b y ' l e n ' b y t e s .
adr_ l x4 , . L b y t e s h i f t _ t a b l e + 1 6
sub x4 , x4 , l e n
ld1 { v2 . 1 6 b } , [ x4 ]
tbl v1 . 1 6 b , { v7 . 1 6 b } , v2 . 1 6 b
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / v3 = f i r s t c h u n k : v7 r i g h t - s h i f t e d b y ' 1 6 - l e n ' b y t e s .
movi v3 . 1 6 b , #0x80
eor v2 . 1 6 b , v2 . 1 6 b , v3 . 1 6 b
tbl v3 . 1 6 b , { v7 . 1 6 b } , v2 . 1 6 b
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / Convert t o 8 - b i t m a s k s : ' l e n ' 0 x00 b y t e s , t h e n ' 1 6 - l e n ' 0 x f f b y t e s .
sshr v2 . 1 6 b , v2 . 1 6 b , #7
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / v2 = s e c o n d c h u n k : ' l e n ' b y t e s f r o m v0 ( l o w - o r d e r b y t e s ) ,
/ / then ' 1 6 - l e n ' b y t e s f r o m v1 ( h i g h - o r d e r b y t e s ) .
bsl v2 . 1 6 b , v1 . 1 6 b , v0 . 1 6 b
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / Fold t h e f i r s t c h u n k i n t o t h e s e c o n d c h u n k , s t o r i n g t h e r e s u l t i n v7 .
_ _ pmull_ \ p v0 , v3 , f o l d _ c o n s t s
_ _ pmull_ \ p v7 , v3 , f o l d _ c o n s t s , 2
2016-12-05 21:42:25 +03:00
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
2019-01-31 07:42:42 +03:00
eor v7 . 1 6 b , v7 . 1 6 b , v2 . 1 6 b
.Lreduce_final_16_bytes_ \ @:
/ / Reduce t h e 1 2 8 - b i t v a l u e M ( x ) , s t o r e d i n v7 , t o t h e f i n a l 1 6 - b i t C R C .
movi v2 . 1 6 b , #0 / / i n i t z e r o r e g i s t e r
/ / Load ' x ^ 4 8 * ( x ^ 4 8 m o d G ( x ) ) ' a n d ' x ^ 4 8 * ( x ^ 8 0 m o d G ( x ) ) ' .
ld1 { f o l d _ c o n s t s . 2 d } , [ f o l d _ c o n s t s _ p t r ] , #16
_ _ pmull_ p r e _ \ p f o l d _ c o n s t s
/ / Fold t h e h i g h 6 4 b i t s i n t o t h e l o w 6 4 b i t s , w h i l e a l s o m u l t i p l y i n g b y
/ / x^ 6 4 . T h i s p r o d u c e s a 1 2 8 - b i t v a l u e c o n g r u e n t t o x ^ 6 4 * M ( x ) a n d
/ / whose l o w 4 8 b i t s a r e 0 .
ext v0 . 1 6 b , v2 . 1 6 b , v7 . 1 6 b , #8
_ _ pmull_ \ p v7 , v7 , f o l d _ c o n s t s , 2 / / h i g h b i t s * x ^ 4 8 * ( x ^ 8 0 m o d G ( x ) )
eor v0 . 1 6 b , v0 . 1 6 b , v7 . 1 6 b / / + l o w b i t s * x ^ 6 4
/ / Fold t h e h i g h 3 2 b i t s i n t o t h e l o w 9 6 b i t s . T h i s p r o d u c e s a 9 6 - b i t
/ / value c o n g r u e n t t o x ^ 6 4 * M ( x ) a n d w h o s e l o w 4 8 b i t s a r e 0 .
ext v1 . 1 6 b , v0 . 1 6 b , v2 . 1 6 b , #12 / / e x t r a c t h i g h 3 2 b i t s
mov v0 . s [ 3 ] , v2 . s [ 0 ] / / z e r o h i g h 3 2 b i t s
_ _ pmull_ \ p v1 , v1 , f o l d _ c o n s t s / / h i g h 3 2 b i t s * x ^ 4 8 * ( x ^ 4 8 m o d G ( x ) )
eor v0 . 1 6 b , v0 . 1 6 b , v1 . 1 6 b / / + l o w b i t s
/ / Load G ( x ) a n d f l o o r ( x ^ 4 8 / G ( x ) ) .
ld1 { f o l d _ c o n s t s . 2 d } , [ f o l d _ c o n s t s _ p t r ]
_ _ pmull_ p r e _ \ p f o l d _ c o n s t s
/ / Use B a r r e t t r e d u c t i o n t o c o m p u t e t h e f i n a l C R C v a l u e .
_ _ pmull_ \ p v1 , v0 , f o l d _ c o n s t s , 2 / / h i g h 3 2 b i t s * f l o o r ( x ^ 4 8 / G ( x ) )
ushr v1 . 2 d , v1 . 2 d , #32 / / / = x ^ 3 2
_ _ pmull_ \ p v1 , v1 , f o l d _ c o n s t s / / * = G ( x )
ushr v0 . 2 d , v0 . 2 d , #48
eor v0 . 1 6 b , v0 . 1 6 b , v1 . 1 6 b / / + l o w 1 6 n o n z e r o b i t s
/ / Final C R C v a l u e ( x ^ 1 6 * M ( x ) ) m o d G ( x ) i s i n l o w 1 6 b i t s o f v0 .
umov w0 , v0 . h [ 0 ]
2021-02-03 14:36:25 +03:00
.ifc \ p, p8
ldp x29 , x30 , [ s p ] , #16
.endif
2016-12-05 21:42:25 +03:00
ret
2019-01-31 07:42:42 +03:00
.Lless_than_256_bytes_ \ @:
/ / Checksumming a b u f f e r o f l e n g t h 1 6 . . . 2 5 5 b y t e s
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
adr_ l f o l d _ c o n s t s _ p t r , . L f o l d _ a c r o s s _ 1 6 _ b y t e s _ c o n s t s
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / Load t h e f i r s t 1 6 d a t a b y t e s .
ldr q7 , [ b u f ] , #0x10
2016-12-05 21:42:25 +03:00
CPU_ L E ( r e v64 v7 . 1 6 b , v7 . 1 6 b )
CPU_ L E ( e x t v7 . 1 6 b , v7 . 1 6 b , v7 . 1 6 b , #8 )
2019-01-31 07:42:42 +03:00
/ / XOR t h e f i r s t 1 6 d a t a * b i t s * w i t h t h e i n i t i a l C R C v a l u e .
movi v0 . 1 6 b , #0
mov v0 . h [ 7 ] , i n i t _ c r c
eor v7 . 1 6 b , v7 . 1 6 b , v0 . 1 6 b
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
/ / Load t h e f o l d - a c r o s s - 1 6 - b y t e s c o n s t a n t s .
ld1 { f o l d _ c o n s t s . 2 d } , [ f o l d _ c o n s t s _ p t r ] , #16
_ _ pmull_ p r e _ \ p f o l d _ c o n s t s
2016-12-05 21:42:25 +03:00
2019-01-31 07:42:42 +03:00
cmp l e n , #16
b. e q . L r e d u c e _ f i n a l _ 1 6 _ b y t e s _ \ @ // len == 16
subs l e n , l e n , #32
b. g e . L f o l d _ 1 6 _ b y t e s _ l o o p _ \ @ // 32 <= len <= 255
add l e n , l e n , #16
b . L h a n d l e _ p a r t i a l _ s e g m e n t _ \ @ // 17 <= len <= 31
2018-08-27 18:38:11 +03:00
.endm
2019-01-31 07:42:42 +03:00
/ /
/ / u1 6 c r c _ t 1 0 d i f _ p m u l l _ p8 ( u 1 6 i n i t _ c r c , c o n s t u 8 * b u f , s i z e _ t l e n ) ;
/ /
/ / Assumes l e n > = 1 6 .
/ /
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T ( c r c _ t 1 0 d i f _ p m u l l _ p8 )
2021-02-03 14:36:25 +03:00
stp x29 , x30 , [ s p , #- 16 ] !
mov x29 , s p
crc_ t 1 0 d i f _ p m u l l p8
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( c r c _ t 1 0 d i f _ p m u l l _ p8 )
2018-08-27 18:38:12 +03:00
.align 5
2019-01-31 07:42:42 +03:00
/ /
/ / u1 6 c r c _ t 1 0 d i f _ p m u l l _ p64 ( u 1 6 i n i t _ c r c , c o n s t u 8 * b u f , s i z e _ t l e n ) ;
/ /
/ / Assumes l e n > = 1 6 .
/ /
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ S T A R T ( c r c _ t 1 0 d i f _ p m u l l _ p64 )
2018-08-27 18:38:11 +03:00
crc_ t 1 0 d i f _ p m u l l p64
2019-12-13 18:49:10 +03:00
SYM_ F U N C _ E N D ( c r c _ t 1 0 d i f _ p m u l l _ p64 )
2016-12-05 21:42:25 +03:00
2018-01-10 15:11:40 +03:00
.section " .rodata " , " a"
2016-12-05 21:42:25 +03:00
.align 4
2019-01-31 07:42:42 +03:00
/ / Fold c o n s t a n t s p r e c o m p u t e d f r o m t h e p o l y n o m i a l 0 x18 b b7
/ / G( x ) = x ^ 1 6 + x ^ 1 5 + x ^ 1 1 + x ^ 9 + x ^ 8 + x ^ 7 + x ^ 5 + x ^ 4 + x ^ 2 + x ^ 1 + x ^ 0
.Lfold_across_128_bytes_consts :
.quad 0x0000000000006123 / / x^ ( 8 * 1 2 8 ) m o d G ( x )
.quad 0x0000000000002295 / / x^ ( 8 * 1 2 8 + 6 4 ) m o d G ( x )
/ / .Lfold_across_64_bytes_consts :
.quad 0x0000000000001069 / / x^ ( 4 * 1 2 8 ) m o d G ( x )
.quad 0x000000000000dd31 / / x^ ( 4 * 1 2 8 + 6 4 ) m o d G ( x )
/ / .Lfold_across_32_bytes_consts :
.quad 0x000000000000857d / / x^ ( 2 * 1 2 8 ) m o d G ( x )
.quad 0x0000000000007acc / / x^ ( 2 * 1 2 8 + 6 4 ) m o d G ( x )
.Lfold_across_16_bytes_consts :
.quad 0x000000000000a010 / / x^ ( 1 * 1 2 8 ) m o d G ( x )
.quad 0x0000000000001faa / / x^ ( 1 * 1 2 8 + 6 4 ) m o d G ( x )
/ / .Lfinal_fold_consts :
.quad 0x1368000000000000 / / x^ 4 8 * ( x ^ 4 8 m o d G ( x ) )
.quad 0x2d56000000000000 / / x^ 4 8 * ( x ^ 8 0 m o d G ( x ) )
/ / .Lbarrett_reduction_consts :
.quad 0x0000000000018bb7 / / G( x )
.quad 0x00000001f65a57f8 / / floor( x ^ 4 8 / G ( x ) )
/ / For 1 < = l e n < = 1 5 , t h e 1 6 - b y t e v e c t o r b e g i n n i n g a t & b y t e s h i f t _ t a b l e [ 1 6 -
/ / len] i s t h e i n d e x v e c t o r t o s h i f t l e f t b y ' l e n ' b y t e s , a n d i s a l s o { 0 x80 ,
/ / . . . , 0 x8 0 } X O R t h e i n d e x v e c t o r t o s h i f t r i g h t b y ' 1 6 - l e n ' b y t e s .
.Lbyteshift_table :
2016-12-05 21:42:25 +03:00
.byte 0 x0 , 0 x81 , 0 x82 , 0 x83 , 0 x84 , 0 x85 , 0 x86 , 0 x87
.byte 0 x8 8 , 0 x89 , 0 x8 a , 0 x8 b , 0 x8 c , 0 x8 d , 0 x8 e , 0 x8 f
.byte 0 x0 , 0 x1 , 0 x2 , 0 x3 , 0 x4 , 0 x5 , 0 x6 , 0 x7
.byte 0 x8 , 0 x9 , 0 x a , 0 x b , 0 x c , 0 x d , 0 x e , 0 x0