2018-12-04 22:20:01 -08:00
/* SPDX-License-Identifier: GPL-2.0 */
/ *
* NH - ε - a l m o s t - u n i v e r s a l h a s h f u n c t i o n , x86 _ 6 4 A V X 2 a c c e l e r a t e d
*
* Copyright 2 0 1 8 G o o g l e L L C
*
* Author : Eric B i g g e r s < e b i g g e r s @google.com>
* /
# include < l i n u x / l i n k a g e . h >
2022-11-18 11:44:12 -08:00
# include < l i n u x / c f i _ t y p e s . h >
2018-12-04 22:20:01 -08:00
# define P A S S 0 _ S U M S % y m m 0
# define P A S S 1 _ S U M S % y m m 1
# define P A S S 2 _ S U M S % y m m 2
# define P A S S 3 _ S U M S % y m m 3
# define K 0 % y m m 4
# define K 0 _ X M M % x m m 4
# define K 1 % y m m 5
# define K 1 _ X M M % x m m 5
# define K 2 % y m m 6
# define K 2 _ X M M % x m m 6
# define K 3 % y m m 7
# define K 3 _ X M M % x m m 7
# define T 0 % y m m 8
# define T 1 % y m m 9
# define T 2 % y m m 1 0
# define T 2 _ X M M % x m m 1 0
# define T 3 % y m m 1 1
# define T 3 _ X M M % x m m 1 1
# define T 4 % y m m 1 2
# define T 5 % y m m 1 3
# define T 6 % y m m 1 4
# define T 7 % y m m 1 5
# define K E Y % r d i
# define M E S S A G E % r s i
# define M E S S A G E _ L E N % r d x
# define H A S H % r c x
.macro _nh_2xstride k0 , k 1 , k 2 , k 3
/ / Add m e s s a g e w o r d s t o k e y w o r d s
vpaddd \ k 0 , T 3 , T 0
vpaddd \ k 1 , T 3 , T 1
vpaddd \ k 2 , T 3 , T 2
vpaddd \ k 3 , T 3 , T 3
/ / Multiply 3 2 x32 = > 6 4 a n d a c c u m u l a t e
vpshufd $ 0 x10 , T 0 , T 4
vpshufd $ 0 x32 , T 0 , T 0
vpshufd $ 0 x10 , T 1 , T 5
vpshufd $ 0 x32 , T 1 , T 1
vpshufd $ 0 x10 , T 2 , T 6
vpshufd $ 0 x32 , T 2 , T 2
vpshufd $ 0 x10 , T 3 , T 7
vpshufd $ 0 x32 , T 3 , T 3
vpmuludq T 4 , T 0 , T 0
vpmuludq T 5 , T 1 , T 1
vpmuludq T 6 , T 2 , T 2
vpmuludq T 7 , T 3 , T 3
vpaddq T 0 , P A S S 0 _ S U M S , P A S S 0 _ S U M S
vpaddq T 1 , P A S S 1 _ S U M S , P A S S 1 _ S U M S
vpaddq T 2 , P A S S 2 _ S U M S , P A S S 2 _ S U M S
vpaddq T 3 , P A S S 3 _ S U M S , P A S S 3 _ S U M S
.endm
/ *
* void n h _ a v x2 ( c o n s t u 3 2 * k e y , c o n s t u 8 * m e s s a g e , s i z e _ t m e s s a g e _ l e n ,
2022-11-18 11:44:12 -08:00
* _ _ le6 4 h a s h [ N H _ N U M _ P A S S E S ] )
2018-12-04 22:20:01 -08:00
*
* It' s g u a r a n t e e d t h a t m e s s a g e _ l e n % 1 6 = = 0 .
* /
2022-11-18 11:44:12 -08:00
SYM_ T Y P E D _ F U N C _ S T A R T ( n h _ a v x2 )
2018-12-04 22:20:01 -08:00
vmovdqu 0 x00 ( K E Y ) , K 0
vmovdqu 0 x10 ( K E Y ) , K 1
add $ 0 x20 , K E Y
vpxor P A S S 0 _ S U M S , P A S S 0 _ S U M S , P A S S 0 _ S U M S
vpxor P A S S 1 _ S U M S , P A S S 1 _ S U M S , P A S S 1 _ S U M S
vpxor P A S S 2 _ S U M S , P A S S 2 _ S U M S , P A S S 2 _ S U M S
vpxor P A S S 3 _ S U M S , P A S S 3 _ S U M S , P A S S 3 _ S U M S
sub $ 0 x40 , M E S S A G E _ L E N
jl . L l o o p4 _ d o n e
.Lloop4 :
vmovdqu ( M E S S A G E ) , T 3
vmovdqu 0 x00 ( K E Y ) , K 2
vmovdqu 0 x10 ( K E Y ) , K 3
_ nh_ 2 x s t r i d e K 0 , K 1 , K 2 , K 3
vmovdqu 0 x20 ( M E S S A G E ) , T 3
vmovdqu 0 x20 ( K E Y ) , K 0
vmovdqu 0 x30 ( K E Y ) , K 1
_ nh_ 2 x s t r i d e K 2 , K 3 , K 0 , K 1
add $ 0 x40 , M E S S A G E
add $ 0 x40 , K E Y
sub $ 0 x40 , M E S S A G E _ L E N
jge . L l o o p4
.Lloop4_done :
and $ 0 x3 f , M E S S A G E _ L E N
jz . L d o n e
cmp $ 0 x20 , M E S S A G E _ L E N
jl . L l a s t
/ / 2 or 3 s t r i d e s r e m a i n ; do 2 more.
vmovdqu ( M E S S A G E ) , T 3
vmovdqu 0 x00 ( K E Y ) , K 2
vmovdqu 0 x10 ( K E Y ) , K 3
_ nh_ 2 x s t r i d e K 0 , K 1 , K 2 , K 3
add $ 0 x20 , M E S S A G E
add $ 0 x20 , K E Y
sub $ 0 x20 , M E S S A G E _ L E N
jz . L d o n e
vmovdqa K 2 , K 0
vmovdqa K 3 , K 1
.Llast :
/ / Last s t r i d e . Z e r o t h e h i g h 1 2 8 b i t s o f t h e m e s s a g e a n d k e y s s o t h e y
/ / don' t a f f e c t t h e r e s u l t w h e n p r o c e s s i n g t h e m l i k e 2 s t r i d e s .
vmovdqu ( M E S S A G E ) , T 3 _ X M M
vmovdqa K 0 _ X M M , K 0 _ X M M
vmovdqa K 1 _ X M M , K 1 _ X M M
vmovdqu 0 x00 ( K E Y ) , K 2 _ X M M
vmovdqu 0 x10 ( K E Y ) , K 3 _ X M M
_ nh_ 2 x s t r i d e K 0 , K 1 , K 2 , K 3
.Ldone :
/ / Sum t h e a c c u m u l a t o r s f o r e a c h p a s s , t h e n s t o r e t h e s u m s t o ' h a s h '
/ / PASS0 _ S U M S i s ( 0 A 0 B 0 C 0 D )
/ / PASS1 _ S U M S i s ( 1 A 1 B 1 C 1 D )
/ / PASS2 _ S U M S i s ( 2 A 2 B 2 C 2 D )
/ / PASS3 _ S U M S i s ( 3 A 3 B 3 C 3 D )
/ / We n e e d t h e h o r i z o n t a l s u m s :
/ / ( 0 A + 0 B + 0 C + 0 D ,
/ / 1 A + 1 B + 1 C + 1 D ,
/ / 2 A + 2 B + 2 C + 2 D ,
/ / 3 A + 3 B + 3 C + 3 D )
/ /
vpunpcklqdq P A S S 1 _ S U M S , P A S S 0 _ S U M S , T 0 / / T 0 = ( 0 A 1 A 0 C 1 C )
vpunpckhqdq P A S S 1 _ S U M S , P A S S 0 _ S U M S , T 1 / / T 1 = ( 0 B 1 B 0 D 1 D )
vpunpcklqdq P A S S 3 _ S U M S , P A S S 2 _ S U M S , T 2 / / T 2 = ( 2 A 3 A 2 C 3 C )
vpunpckhqdq P A S S 3 _ S U M S , P A S S 2 _ S U M S , T 3 / / T 3 = ( 2 B 3 B 2 D 3 D )
vinserti1 2 8 $ 0 x1 , T 2 _ X M M , T 0 , T 4 / / T 4 = ( 0 A 1 A 2 A 3 A )
vinserti1 2 8 $ 0 x1 , T 3 _ X M M , T 1 , T 5 / / T 5 = ( 0 B 1 B 2 B 3 B )
vperm2 i 1 2 8 $ 0 x31 , T 2 , T 0 , T 0 / / T 0 = ( 0 C 1 C 2 C 3 C )
vperm2 i 1 2 8 $ 0 x31 , T 3 , T 1 , T 1 / / T 1 = ( 0 D 1 D 2 D 3 D )
vpaddq T 5 , T 4 , T 4
vpaddq T 1 , T 0 , T 0
vpaddq T 4 , T 0 , T 0
vmovdqu T 0 , ( H A S H )
2021-12-04 14:43:40 +01:00
RET
2019-10-11 13:51:04 +02:00
SYM_ F U N C _ E N D ( n h _ a v x2 )