2018-12-05 09:20:00 +03:00
/* SPDX-License-Identifier: GPL-2.0 */
/ *
* NH - ε - a l m o s t - u n i v e r s a l h a s h f u n c t i o n , x86 _ 6 4 S S E 2 a c c e l e r a t e d
*
* Copyright 2 0 1 8 G o o g l e L L C
*
* Author : Eric B i g g e r s < e b i g g e r s @google.com>
* /
# include < l i n u x / l i n k a g e . h >
# define P A S S 0 _ S U M S % x m m 0
# define P A S S 1 _ S U M S % x m m 1
# define P A S S 2 _ S U M S % x m m 2
# define P A S S 3 _ S U M S % x m m 3
# define K 0 % x m m 4
# define K 1 % x m m 5
# define K 2 % x m m 6
# define K 3 % x m m 7
# define T 0 % x m m 8
# define T 1 % x m m 9
# define T 2 % x m m 1 0
# define T 3 % x m m 1 1
# define T 4 % x m m 1 2
# define T 5 % x m m 1 3
# define T 6 % x m m 1 4
# define T 7 % x m m 1 5
# define K E Y % r d i
# define M E S S A G E % r s i
# define M E S S A G E _ L E N % r d x
# define H A S H % r c x
.macro _nh_stride k0 , k 1 , k 2 , k 3 , o f f s e t
/ / Load n e x t m e s s a g e s t r i d e
movdqu \ o f f s e t ( M E S S A G E ) , T 1
/ / Load n e x t k e y s t r i d e
movdqu \ o f f s e t ( K E Y ) , \ k 3
/ / Add m e s s a g e w o r d s t o k e y w o r d s
movdqa T 1 , T 2
movdqa T 1 , T 3
paddd T 1 , \ k 0 / / r e u s e k 0 t o a v o i d a m o v e
paddd \ k 1 , T 1
paddd \ k 2 , T 2
paddd \ k 3 , T 3
/ / Multiply 3 2 x32 = > 6 4 a n d a c c u m u l a t e
pshufd $ 0 x10 , \ k 0 , T 4
pshufd $ 0 x32 , \ k 0 , \ k 0
pshufd $ 0 x10 , T 1 , T 5
pshufd $ 0 x32 , T 1 , T 1
pshufd $ 0 x10 , T 2 , T 6
pshufd $ 0 x32 , T 2 , T 2
pshufd $ 0 x10 , T 3 , T 7
pshufd $ 0 x32 , T 3 , T 3
pmuludq T 4 , \ k 0
pmuludq T 5 , T 1
pmuludq T 6 , T 2
pmuludq T 7 , T 3
paddq \ k 0 , P A S S 0 _ S U M S
paddq T 1 , P A S S 1 _ S U M S
paddq T 2 , P A S S 2 _ S U M S
paddq T 3 , P A S S 3 _ S U M S
.endm
/ *
* void n h _ s s e 2 ( c o n s t u 3 2 * k e y , c o n s t u 8 * m e s s a g e , s i z e _ t m e s s a g e _ l e n ,
* u8 h a s h [ N H _ H A S H _ B Y T E S ] )
*
* It' s g u a r a n t e e d t h a t m e s s a g e _ l e n % 1 6 = = 0 .
* /
2019-10-11 14:51:04 +03:00
SYM_ F U N C _ S T A R T ( n h _ s s e 2 )
2018-12-05 09:20:00 +03:00
movdqu 0 x00 ( K E Y ) , K 0
movdqu 0 x10 ( K E Y ) , K 1
movdqu 0 x20 ( K E Y ) , K 2
add $ 0 x30 , K E Y
pxor P A S S 0 _ S U M S , P A S S 0 _ S U M S
pxor P A S S 1 _ S U M S , P A S S 1 _ S U M S
pxor P A S S 2 _ S U M S , P A S S 2 _ S U M S
pxor P A S S 3 _ S U M S , P A S S 3 _ S U M S
sub $ 0 x40 , M E S S A G E _ L E N
jl . L l o o p4 _ d o n e
.Lloop4 :
_ nh_ s t r i d e K 0 , K 1 , K 2 , K 3 , 0 x00
_ nh_ s t r i d e K 1 , K 2 , K 3 , K 0 , 0 x10
_ nh_ s t r i d e K 2 , K 3 , K 0 , K 1 , 0 x20
_ nh_ s t r i d e K 3 , K 0 , K 1 , K 2 , 0 x30
add $ 0 x40 , K E Y
add $ 0 x40 , M E S S A G E
sub $ 0 x40 , M E S S A G E _ L E N
jge . L l o o p4
.Lloop4_done :
and $ 0 x3 f , M E S S A G E _ L E N
jz . L d o n e
_ nh_ s t r i d e K 0 , K 1 , K 2 , K 3 , 0 x00
sub $ 0 x10 , M E S S A G E _ L E N
jz . L d o n e
_ nh_ s t r i d e K 1 , K 2 , K 3 , K 0 , 0 x10
sub $ 0 x10 , M E S S A G E _ L E N
jz . L d o n e
_ nh_ s t r i d e K 2 , K 3 , K 0 , K 1 , 0 x20
.Ldone :
/ / Sum t h e a c c u m u l a t o r s f o r e a c h p a s s , t h e n s t o r e t h e s u m s t o ' h a s h '
movdqa P A S S 0 _ S U M S , T 0
movdqa P A S S 2 _ S U M S , T 1
punpcklqdq P A S S 1 _ S U M S , T 0 / / = > ( P A S S 0 _ S U M _ A P A S S 1 _ S U M _ A )
punpcklqdq P A S S 3 _ S U M S , T 1 / / = > ( P A S S 2 _ S U M _ A P A S S 3 _ S U M _ A )
punpckhqdq P A S S 1 _ S U M S , P A S S 0 _ S U M S / / = > ( P A S S 0 _ S U M _ B P A S S 1 _ S U M _ B )
punpckhqdq P A S S 3 _ S U M S , P A S S 2 _ S U M S / / = > ( P A S S 2 _ S U M _ B P A S S 3 _ S U M _ B )
paddq P A S S 0 _ S U M S , T 0
paddq P A S S 2 _ S U M S , T 1
movdqu T 0 , 0 x00 ( H A S H )
movdqu T 1 , 0 x10 ( H A S H )
ret
2019-10-11 14:51:04 +03:00
SYM_ F U N C _ E N D ( n h _ s s e 2 )