2010-09-24 10:04:06 +04:00
/*
Unix SMB / CIFS implementation .
trivial database library
Copyright ( C ) Rusty Russell 2010
* * NOTE ! The following LGPL license applies to the tdb
* * library . This does NOT imply that all of Samba is released
* * under the LGPL
This library is free software ; you can redistribute it and / or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation ; either
version 3 of the License , or ( at your option ) any later version .
This library is distributed in the hope that it will be useful ,
but WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
Lesser General Public License for more details .
You should have received a copy of the GNU Lesser General Public
License along with this library ; if not , see < http : //www.gnu.org/licenses/>.
*/
# include "tdb_private.h"
/* This is based on the hash algorithm from gdbm */
unsigned int tdb_old_hash ( TDB_DATA * key )
{
uint32_t value ; /* Used to compute the hash value. */
uint32_t i ; /* Used to cycle through random values. */
/* Set the initial value from the key size. */
for ( value = 0x238F13AF * key - > dsize , i = 0 ; i < key - > dsize ; i + + )
value = ( value + ( key - > dptr [ i ] < < ( i * 5 % 24 ) ) ) ;
return ( 1103515243 * value + 12345 ) ;
}
# ifndef WORDS_BIGENDIAN
# define HASH_LITTLE_ENDIAN 1
# define HASH_BIG_ENDIAN 0
# else
# define HASH_LITTLE_ENDIAN 0
# define HASH_BIG_ENDIAN 1
# endif
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
lookup3 . c , by Bob Jenkins , May 2006 , Public Domain .
These are functions for producing 32 - bit hashes for hash table lookup .
hash_word ( ) , hashlittle ( ) , hashlittle2 ( ) , hashbig ( ) , mix ( ) , and final ( )
are externally useful functions . Routines to test the hash are included
if SELF_TEST is defined . You can use this free for any purpose . It ' s in
the public domain . It has no warranty .
You probably want to use hashlittle ( ) . hashlittle ( ) and hashbig ( )
hash byte arrays . hashlittle ( ) is is faster than hashbig ( ) on
little - endian machines . Intel and AMD are little - endian machines .
On second thought , you probably want hashlittle2 ( ) , which is identical to
hashlittle ( ) except it returns two 32 - bit hashes for the price of one .
You could implement hashbig2 ( ) if you wanted but I haven ' t bothered here .
If you want to find a hash of , say , exactly 7 integers , do
a = i1 ; b = i2 ; c = i3 ;
mix ( a , b , c ) ;
a + = i4 ; b + = i5 ; c + = i6 ;
mix ( a , b , c ) ;
a + = i7 ;
final ( a , b , c ) ;
then use c as the hash value . If you have a variable length array of
4 - byte integers to hash , use hash_word ( ) . If you have a byte array ( like
a character string ) , use hashlittle ( ) . If you have several byte arrays , or
a mix of things , see the comments above hashlittle ( ) .
Why is this so big ? I read 12 bytes at a time into 3 4 - byte integers ,
then mix those integers . This is fast ( you can do a lot more thorough
mixing with 12 * 3 instructions on 3 integers than you can with 3 instructions
on 1 byte ) , but shoehorning those bytes into integers efficiently is messy .
*/
# define hashsize(n) ((uint32_t)1<<(n))
# define hashmask(n) (hashsize(n)-1)
# define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
mix - - mix 3 32 - bit values reversibly .
This is reversible , so any information in ( a , b , c ) before mix ( ) is
still in ( a , b , c ) after mix ( ) .
If four pairs of ( a , b , c ) inputs are run through mix ( ) , or through
mix ( ) in reverse , there are at least 32 bits of the output that
are sometimes the same for one pair and different for another pair .
This was tested for :
* pairs that differed by one bit , by two bits , in any combination
of top bits of ( a , b , c ) , or in any combination of bottom bits of
( a , b , c ) .
* " differ " is defined as + , - , ^ , or ~ ^ . For + and - , I transformed
the output delta to a Gray code ( a ^ ( a > > 1 ) ) so a string of 1 ' s ( as
is commonly produced by subtraction ) look like a single 1 - bit
difference .
* the base values were pseudorandom , all zero but one bit set , or
all zero plus a counter that starts at zero .
Some k values for my " a-=c; a^=rot(c,k); c+=b; " arrangement that
satisfy this are
4 6 8 16 19 4
9 15 3 18 27 15
14 9 3 7 17 3
Well , " 9 15 3 18 27 15 " didn ' t quite get 32 bits diffing
for " differ " defined as + with a one - bit base and a two - bit delta . I
used http : //burtleburtle.net/bob/hash/avalanche.html to choose
the operations , constants , and arrangements of the variables .
This does not achieve avalanche . There are input bits of ( a , b , c )
that fail to affect some output bits of ( a , b , c ) , especially of a . The
most thoroughly mixed value is c , but it doesn ' t really even achieve
avalanche in c .
This allows some parallelism . Read - after - writes are good at doubling
the number of bits affected , so the goal of mixing pulls in the opposite
direction as the goal of parallelism . I did what I could . Rotates
seem to cost as much as shifts on every machine I could lay my hands
on , and rotates are much kinder to the top and bottom bits , so I used
rotates .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
# define mix(a,b,c) \
{ \
a - = c ; a ^ = rot ( c , 4 ) ; c + = b ; \
b - = a ; b ^ = rot ( a , 6 ) ; a + = c ; \
c - = b ; c ^ = rot ( b , 8 ) ; b + = a ; \
a - = c ; a ^ = rot ( c , 16 ) ; c + = b ; \
b - = a ; b ^ = rot ( a , 19 ) ; a + = c ; \
c - = b ; c ^ = rot ( b , 4 ) ; b + = a ; \
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
final - - final mixing of 3 32 - bit values ( a , b , c ) into c
Pairs of ( a , b , c ) values differing in only a few bits will usually
produce values of c that look totally different . This was tested for
* pairs that differed by one bit , by two bits , in any combination
of top bits of ( a , b , c ) , or in any combination of bottom bits of
( a , b , c ) .
* " differ " is defined as + , - , ^ , or ~ ^ . For + and - , I transformed
the output delta to a Gray code ( a ^ ( a > > 1 ) ) so a string of 1 ' s ( as
is commonly produced by subtraction ) look like a single 1 - bit
difference .
* the base values were pseudorandom , all zero but one bit set , or
all zero plus a counter that starts at zero .
These constants passed :
14 11 25 16 4 14 24
12 14 25 16 4 14 24
and these came close :
4 8 15 26 3 22 24
10 8 15 26 3 22 24
11 8 15 26 3 22 24
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
# define final(a,b,c) \
{ \
c ^ = b ; c - = rot ( b , 14 ) ; \
a ^ = c ; a - = rot ( c , 11 ) ; \
b ^ = a ; b - = rot ( a , 25 ) ; \
c ^ = b ; c - = rot ( b , 16 ) ; \
a ^ = c ; a - = rot ( c , 4 ) ; \
b ^ = a ; b - = rot ( a , 14 ) ; \
c ^ = b ; c - = rot ( b , 24 ) ; \
}
/*
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
hashlittle ( ) - - hash a variable - length key into a 32 - bit value
k : the key ( the unaligned variable - length array of bytes )
length : the length of the key , counting by bytes
val2 : IN : can be any 4 - byte value OUT : second 32 bit hash .
Returns a 32 - bit value . Every bit of the key affects every bit of
the return value . Two keys differing by one or two bits will have
totally different hash values . Note that the return value is better
mixed than val2 , so use that first .
The best hash table sizes are powers of 2. There is no need to do
mod a prime ( mod is sooo slow ! ) . If you need less than 32 bits ,
use a bitmask . For example , if you need only 10 bits , do
h = ( h & hashmask ( 10 ) ) ;
In which case , the hash table should have hashsize ( 10 ) elements .
If you are hashing n strings ( uint8_t * * ) k , do it like this :
for ( i = 0 , h = 0 ; i < n ; + + i ) h = hashlittle ( k [ i ] , len [ i ] , h ) ;
By Bob Jenkins , 2006. bob_jenkins @ burtleburtle . net . You may use this
code any way you wish , private , educational , or commercial . It ' s free .
Use for hash table lookup , or anything where one collision in 2 ^ ^ 32 is
acceptable . Do NOT use for cryptographic purposes .
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
static uint32_t hashlittle ( const void * key , size_t length )
{
uint32_t a , b , c ; /* internal state */
union { const void * ptr ; size_t i ; } u ; /* needed for Mac Powerbook G4 */
/* Set up the internal state */
a = b = c = 0xdeadbeef + ( ( uint32_t ) length ) ;
u . ptr = key ;
if ( HASH_LITTLE_ENDIAN & & ( ( u . i & 0x3 ) = = 0 ) ) {
const uint32_t * k = ( const uint32_t * ) key ; /* read 32-bit chunks */
const uint8_t * k8 ;
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
while ( length > 12 )
{
a + = k [ 0 ] ;
b + = k [ 1 ] ;
c + = k [ 2 ] ;
mix ( a , b , c ) ;
length - = 12 ;
k + = 3 ;
}
/*----------------------------- handle the last (probably partial) block */
k8 = ( const uint8_t * ) k ;
switch ( length )
{
case 12 : c + = k [ 2 ] ; b + = k [ 1 ] ; a + = k [ 0 ] ; break ;
2017-07-26 17:55:10 +03:00
case 11 : c + = ( ( uint32_t ) k8 [ 10 ] ) < < 16 ; FALL_THROUGH ;
case 10 : c + = ( ( uint32_t ) k8 [ 9 ] ) < < 8 ; FALL_THROUGH ;
case 9 : c + = k8 [ 8 ] ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 8 : b + = k [ 1 ] ; a + = k [ 0 ] ; break ;
2017-07-26 17:55:10 +03:00
case 7 : b + = ( ( uint32_t ) k8 [ 6 ] ) < < 16 ; FALL_THROUGH ;
case 6 : b + = ( ( uint32_t ) k8 [ 5 ] ) < < 8 ; FALL_THROUGH ;
case 5 : b + = k8 [ 4 ] ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 4 : a + = k [ 0 ] ; break ;
2017-07-26 17:55:10 +03:00
case 3 : a + = ( ( uint32_t ) k8 [ 2 ] ) < < 16 ; FALL_THROUGH ;
case 2 : a + = ( ( uint32_t ) k8 [ 1 ] ) < < 8 ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 1 : a + = k8 [ 0 ] ; break ;
case 0 : return c ;
}
} else if ( HASH_LITTLE_ENDIAN & & ( ( u . i & 0x1 ) = = 0 ) ) {
const uint16_t * k = ( const uint16_t * ) key ; /* read 16-bit chunks */
const uint8_t * k8 ;
/*--------------- all but last block: aligned reads and different mixing */
while ( length > 12 )
{
a + = k [ 0 ] + ( ( ( uint32_t ) k [ 1 ] ) < < 16 ) ;
b + = k [ 2 ] + ( ( ( uint32_t ) k [ 3 ] ) < < 16 ) ;
c + = k [ 4 ] + ( ( ( uint32_t ) k [ 5 ] ) < < 16 ) ;
mix ( a , b , c ) ;
length - = 12 ;
k + = 6 ;
}
/*----------------------------- handle the last (probably partial) block */
k8 = ( const uint8_t * ) k ;
switch ( length )
{
case 12 : c + = k [ 4 ] + ( ( ( uint32_t ) k [ 5 ] ) < < 16 ) ;
b + = k [ 2 ] + ( ( ( uint32_t ) k [ 3 ] ) < < 16 ) ;
a + = k [ 0 ] + ( ( ( uint32_t ) k [ 1 ] ) < < 16 ) ;
break ;
2017-07-26 17:55:10 +03:00
case 11 : c + = ( ( uint32_t ) k8 [ 10 ] ) < < 16 ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 10 : c + = k [ 4 ] ;
b + = k [ 2 ] + ( ( ( uint32_t ) k [ 3 ] ) < < 16 ) ;
a + = k [ 0 ] + ( ( ( uint32_t ) k [ 1 ] ) < < 16 ) ;
break ;
2017-07-26 17:55:10 +03:00
case 9 : c + = k8 [ 8 ] ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 8 : b + = k [ 2 ] + ( ( ( uint32_t ) k [ 3 ] ) < < 16 ) ;
a + = k [ 0 ] + ( ( ( uint32_t ) k [ 1 ] ) < < 16 ) ;
break ;
2017-07-26 17:55:10 +03:00
case 7 : b + = ( ( uint32_t ) k8 [ 6 ] ) < < 16 ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 6 : b + = k [ 2 ] ;
a + = k [ 0 ] + ( ( ( uint32_t ) k [ 1 ] ) < < 16 ) ;
break ;
2017-07-26 17:55:10 +03:00
case 5 : b + = k8 [ 4 ] ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 4 : a + = k [ 0 ] + ( ( ( uint32_t ) k [ 1 ] ) < < 16 ) ;
break ;
2017-07-26 17:55:10 +03:00
case 3 : a + = ( ( uint32_t ) k8 [ 2 ] ) < < 16 ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 2 : a + = k [ 0 ] ;
break ;
case 1 : a + = k8 [ 0 ] ;
break ;
case 0 : return c ; /* zero length requires no mixing */
}
} else { /* need to read the key one byte at a time */
const uint8_t * k = ( const uint8_t * ) key ;
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
while ( length > 12 )
{
a + = k [ 0 ] ;
a + = ( ( uint32_t ) k [ 1 ] ) < < 8 ;
a + = ( ( uint32_t ) k [ 2 ] ) < < 16 ;
a + = ( ( uint32_t ) k [ 3 ] ) < < 24 ;
b + = k [ 4 ] ;
b + = ( ( uint32_t ) k [ 5 ] ) < < 8 ;
b + = ( ( uint32_t ) k [ 6 ] ) < < 16 ;
b + = ( ( uint32_t ) k [ 7 ] ) < < 24 ;
c + = k [ 8 ] ;
c + = ( ( uint32_t ) k [ 9 ] ) < < 8 ;
c + = ( ( uint32_t ) k [ 10 ] ) < < 16 ;
c + = ( ( uint32_t ) k [ 11 ] ) < < 24 ;
mix ( a , b , c ) ;
length - = 12 ;
k + = 12 ;
}
/*-------------------------------- last block: affect all 32 bits of (c) */
2017-07-26 17:55:10 +03:00
switch ( length )
2010-09-24 10:04:06 +04:00
{
2017-07-26 17:55:10 +03:00
case 12 : c + = ( ( uint32_t ) k [ 11 ] ) < < 24 ; FALL_THROUGH ;
case 11 : c + = ( ( uint32_t ) k [ 10 ] ) < < 16 ; FALL_THROUGH ;
case 10 : c + = ( ( uint32_t ) k [ 9 ] ) < < 8 ; FALL_THROUGH ;
case 9 : c + = k [ 8 ] ; FALL_THROUGH ;
case 8 : b + = ( ( uint32_t ) k [ 7 ] ) < < 24 ; FALL_THROUGH ;
case 7 : b + = ( ( uint32_t ) k [ 6 ] ) < < 16 ; FALL_THROUGH ;
case 6 : b + = ( ( uint32_t ) k [ 5 ] ) < < 8 ; FALL_THROUGH ;
case 5 : b + = k [ 4 ] ; FALL_THROUGH ;
case 4 : a + = ( ( uint32_t ) k [ 3 ] ) < < 24 ; FALL_THROUGH ;
case 3 : a + = ( ( uint32_t ) k [ 2 ] ) < < 16 ; FALL_THROUGH ;
case 2 : a + = ( ( uint32_t ) k [ 1 ] ) < < 8 ; FALL_THROUGH ;
2010-09-24 10:04:06 +04:00
case 1 : a + = k [ 0 ] ;
break ;
case 0 : return c ;
}
}
final ( a , b , c ) ;
return c ;
}
2010-10-21 13:51:37 +04:00
_PUBLIC_ unsigned int tdb_jenkins_hash ( TDB_DATA * key )
2010-09-24 10:04:06 +04:00
{
return hashlittle ( key - > dptr , key - > dsize ) ;
}