2009-10-19 11:53:06 +09:00
/ *
* Accelerated G H A S H i m p l e m e n t a t i o n w i t h I n t e l P C L M U L Q D Q - N I
* instructions. T h i s f i l e c o n t a i n s a c c e l e r a t e d p a r t o f g h a s h
* implementation. M o r e i n f o r m a t i o n a b o u t P C L M U L Q D Q c a n b e f o u n d a t :
*
* http : / / software. i n t e l . c o m / e n - u s / a r t i c l e s / c a r r y - l e s s - m u l t i p l i c a t i o n - a n d - i t s - u s a g e - f o r - c o m p u t i n g - t h e - g c m - m o d e /
*
* Copyright ( c ) 2 0 0 9 I n t e l C o r p .
* Author : Huang Y i n g < y i n g . h u a n g @intel.com>
* Vinodh G o p a l
* Erdinc O z t u r k
* Deniz K a r a k o y u n l u
*
* This p r o g r a m i s f r e e s o f t w a r e ; you can redistribute it and/or modify it
* under t h e t e r m s o f t h e G N U G e n e r a l P u b l i c L i c e n s e v e r s i o n 2 a s p u b l i s h e d
* by t h e F r e e S o f t w a r e F o u n d a t i o n .
* /
# include < l i n u x / l i n k a g e . h >
2009-11-23 19:55:22 +08:00
# include < a s m / i n s t . h >
2009-10-19 11:53:06 +09:00
2009-11-23 20:19:47 +08:00
.data
2009-10-19 11:53:06 +09:00
.align 16
.Lbswap_mask :
.octa 0x000102030405060708090a0b0c0d0e0f
.Lpoly :
.octa 0xc2000000000000000000000000000001
.Ltwo_one :
.octa 0x00000001000000000000000000000001
# define D A T A % x m m 0
# define S H A S H % x m m 1
# define T 1 % x m m 2
# define T 2 % x m m 3
# define T 3 % x m m 4
# define B S W A P % x m m 5
# define I N 1 % x m m 6
.text
/ *
* __clmul_gf128mul_ble : internal A B I
* input :
* DATA : operand1
* SHASH : operand2 , h a s h _ k e y < < 1 m o d p o l y
* output :
* DATA : operand1 * o p e r a n d2 m o d p o l y
* changed :
* T1
* T2
* T3
* /
__clmul_gf128mul_ble :
movaps D A T A , T 1
pshufd $ 0 b01 0 0 1 1 1 0 , D A T A , T 2
pshufd $ 0 b01 0 0 1 1 1 0 , S H A S H , T 3
pxor D A T A , T 2
pxor S H A S H , T 3
2009-11-23 19:55:22 +08:00
PCLMULQDQ 0 x00 S H A S H D A T A # D A T A = a 0 * b0
PCLMULQDQ 0 x11 S H A S H T 1 # T 1 = a1 * b1
PCLMULQDQ 0 x00 T 3 T 2 # T 2 = ( a1 + a0 ) * ( b1 + b0 )
2009-10-19 11:53:06 +09:00
pxor D A T A , T 2
pxor T 1 , T 2 # T 2 = a0 * b1 + a1 * b0
movaps T 2 , T 3
pslldq $ 8 , T 3
psrldq $ 8 , T 2
pxor T 3 , D A T A
pxor T 2 , T 1 # < T 1 : D A T A > i s r e s u l t o f
# carry- l e s s m u l t i p l i c a t i o n
# first p h a s e o f t h e r e d u c t i o n
movaps D A T A , T 3
psllq $ 1 , T 3
pxor D A T A , T 3
psllq $ 5 , T 3
pxor D A T A , T 3
psllq $ 5 7 , T 3
movaps T 3 , T 2
pslldq $ 8 , T 2
psrldq $ 8 , T 3
pxor T 2 , D A T A
pxor T 3 , T 1
# second p h a s e o f t h e r e d u c t i o n
movaps D A T A , T 2
psrlq $ 5 , T 2
pxor D A T A , T 2
psrlq $ 1 , T 2
pxor D A T A , T 2
psrlq $ 1 , T 2
pxor T 2 , T 1
pxor T 1 , D A T A
ret
2013-01-19 13:39:26 +02:00
ENDPROC( _ _ c l m u l _ g f12 8 m u l _ b l e )
2009-10-19 11:53:06 +09:00
/* void clmul_ghash_mul(char *dst, const be128 *shash) */
ENTRY( c l m u l _ g h a s h _ m u l )
movups ( % r d i ) , D A T A
movups ( % r s i ) , S H A S H
movaps . L b s w a p _ m a s k , B S W A P
2009-11-23 19:55:22 +08:00
PSHUFB_ X M M B S W A P D A T A
2009-10-19 11:53:06 +09:00
call _ _ c l m u l _ g f12 8 m u l _ b l e
2009-11-23 19:55:22 +08:00
PSHUFB_ X M M B S W A P D A T A
2009-10-19 11:53:06 +09:00
movups D A T A , ( % r d i )
ret
2013-01-19 13:39:26 +02:00
ENDPROC( c l m u l _ g h a s h _ m u l )
2009-10-19 11:53:06 +09:00
/ *
* void c l m u l _ g h a s h _ u p d a t e ( c h a r * d s t , c o n s t c h a r * s r c , u n s i g n e d i n t s r c l e n ,
* const b e 1 2 8 * s h a s h ) ;
* /
ENTRY( c l m u l _ g h a s h _ u p d a t e )
cmp $ 1 6 , % r d x
jb . L u p d a t e _ j u s t _ r e t # c h e c k l e n g t h
movaps . L b s w a p _ m a s k , B S W A P
movups ( % r d i ) , D A T A
movups ( % r c x ) , S H A S H
2009-11-23 19:55:22 +08:00
PSHUFB_ X M M B S W A P D A T A
2009-10-19 11:53:06 +09:00
.align 4
.Lupdate_loop :
movups ( % r s i ) , I N 1
2009-11-23 19:55:22 +08:00
PSHUFB_ X M M B S W A P I N 1
2009-10-19 11:53:06 +09:00
pxor I N 1 , D A T A
call _ _ c l m u l _ g f12 8 m u l _ b l e
sub $ 1 6 , % r d x
add $ 1 6 , % r s i
cmp $ 1 6 , % r d x
jge . L u p d a t e _ l o o p
2009-11-23 19:55:22 +08:00
PSHUFB_ X M M B S W A P D A T A
2009-10-19 11:53:06 +09:00
movups D A T A , ( % r d i )
.Lupdate_just_ret :
ret
2013-01-19 13:39:26 +02:00
ENDPROC( c l m u l _ g h a s h _ u p d a t e )
2009-10-19 11:53:06 +09:00
/ *
* void c l m u l _ g h a s h _ s e t k e y ( b e 1 2 8 * s h a s h , c o n s t u 8 * k e y ) ;
*
* Calculate h a s h _ k e y < < 1 m o d p o l y
* /
ENTRY( c l m u l _ g h a s h _ s e t k e y )
movaps . L b s w a p _ m a s k , B S W A P
movups ( % r s i ) , % x m m 0
2009-11-23 19:55:22 +08:00
PSHUFB_ X M M B S W A P % x m m 0
2009-10-19 11:53:06 +09:00
movaps % x m m 0 , % x m m 1
psllq $ 1 , % x m m 0
psrlq $ 6 3 , % x m m 1
movaps % x m m 1 , % x m m 2
pslldq $ 8 , % x m m 1
psrldq $ 8 , % x m m 2
por % x m m 1 , % x m m 0
# reduction
pshufd $ 0 b00 1 0 0 1 0 0 , % x m m 2 , % x m m 1
pcmpeqd . L t w o _ o n e , % x m m 1
pand . L p o l y , % x m m 1
pxor % x m m 1 , % x m m 0
movups % x m m 0 , ( % r d i )
ret
2013-01-19 13:39:26 +02:00
ENDPROC( c l m u l _ g h a s h _ s e t k e y )