[TESTS] updates to hash experimentations

Aleksandar Lazic has collected many hashing algorithms and put them
in one file to ease benchmarking. Some algos look promising, some
of them have been checked further with uri_hash. Some results on
various systems/hardware are stored in hash_results.txt.
This commit is contained in:
Willy Tarreau 2007-05-13 11:40:04 +02:00
parent 6653d17b8d
commit ae5f7dadef
3 changed files with 920 additions and 8 deletions

218
tests/hash_results.txt Normal file
View File

@ -0,0 +1,218 @@
Test: ./test_hashes | sort -k 3 -r
Note: haproxy_server_hash should be avoided as it's just a 32 bit XOR.
Athlon @ 1533 MHz, gcc-3.4 -march=i686 :
haproxy_server_hash : 18477000 run/sec
SuperFastHash : 6983511 run/sec
hash_djbx33 : 4164334 run/sec
bernstein : 3371838 run/sec
kr_hash : 3257684 run/sec
sax_hash : 3027567 run/sec
fnv_hash : 2818374 run/sec
haproxy_uri_hash : 2108346 run/sec
oat_hash : 2106181 run/sec
hashword : 1936973 run/sec
hashpjw : 1803475 run/sec
fnv_32a_str : 1499198 run/sec
Pentium-M @1700 MHz, gcc-3.4 -march=i686 :
haproxy_server_hash : 15471737 run/sec
SuperFastHash : 8155706 run/sec
hash_djbx33 : 4520191 run/sec
bernstein : 3956142 run/sec
kr_hash : 3725125 run/sec
fnv_hash : 3155413 run/sec
sax_hash : 2688323 run/sec
oat_hash : 2452789 run/sec
haproxy_uri_hash : 2010853 run/sec
hashword : 1831441 run/sec
hashpjw : 1737000 run/sec
fnv_32a_str : 1643737 run/sec
Athlon @ 1533 MHz, gcc-4.1 -march=i686 :
haproxy_server_hash : 13592089 run/sec
SuperFastHash2 : 8687957 run/sec
SuperFastHash : 7361242 run/sec
hash_djbx33 : 5741546 run/sec
bernstein : 3368909 run/sec
sax_hash : 3339880 run/sec
kr_hash : 3277230 run/sec
fnv_hash : 2832402 run/sec
hashword : 2500317 run/sec
haproxy_uri_hash : 2433241 run/sec
oat_hash : 2403118 run/sec
hashpjw : 1881229 run/sec
fnv_32a_str : 1815709 run/sec
Pentium-M @1700 MHz, gcc-4.1 -march=i686 :
haproxy_server_hash : 14128788 run/sec
SuperFastHash2 : 8157119 run/sec
SuperFastHash : 7481027 run/sec
hash_djbx33 : 5660711 run/sec
bernstein : 3961493 run/sec
fnv_hash : 3590727 run/sec
kr_hash : 3389393 run/sec
sax_hash : 2667227 run/sec
oat_hash : 2348211 run/sec
hashword : 2278856 run/sec
haproxy_uri_hash : 2098022 run/sec
hashpjw : 1846583 run/sec
fnv_32a_str : 1661219 run/sec
Pentium-M @600 MHz, gcc-4.1 -march=i686 :
haproxy_server_hash : 5318468 run/sec
SuperFastHash2 : 3126165 run/sec
SuperFastHash : 2729981 run/sec
hash_djbx33 : 2042181 run/sec
bernstein : 1422927 run/sec
fnv_hash : 1287736 run/sec
kr_hash : 1217924 run/sec
sax_hash : 949694 run/sec
oat_hash : 837279 run/sec
hashword : 812868 run/sec
haproxy_uri_hash : 747611 run/sec
hashpjw : 659890 run/sec
fnv_32a_str : 590895 run/sec
athlon @ 1.5 GHz, gcc-2.95 -march=i686 :
haproxy_server_hash : 13592864 run/sec
SuperFastHash : 6931251 run/sec
bernstein : 4105179 run/sec
hash_djbx33 : 3920059 run/sec
kr_hash : 2985794 run/sec
fnv_hash : 2815457 run/sec
sax_hash : 2791358 run/sec
haproxy_uri_hash : 2786663 run/sec
oat_hash : 2237859 run/sec
hashword : 1985740 run/sec
hashpjw : 1757733 run/sec
fnv_32a_str : 1697299 run/sec
Pentium-M @ 600 MHz, gcc-2.95 -march=i686 :
SuperFastHash : 2934387 run/sec
haproxy_server_hash : 2864668 run/sec
hash_djbx33 : 1498043 run/sec
bernstein : 1414993 run/sec
kr_hash : 1297907 run/sec
fnv_hash : 1260343 run/sec
sax_hash : 924764 run/sec
oat_hash : 854545 run/sec
haproxy_uri_hash : 790040 run/sec
hashword : 693501 run/sec
hashpjw : 647346 run/sec
fnv_32a_str : 579691 run/sec
Pentium-M @ 1700 MHz, gcc-2.95 -march=i686 :
SuperFastHash : 8006127 run/sec
haproxy_server_hash : 7834162 run/sec
hash_djbx33 : 4186025 run/sec
bernstein : 3941492 run/sec
kr_hash : 3630713 run/sec
fnv_hash : 3507488 run/sec
sax_hash : 2528128 run/sec
oat_hash : 2395188 run/sec
haproxy_uri_hash : 2158924 run/sec
hashword : 1910992 run/sec
hashpjw : 1819894 run/sec
fnv_32a_str : 1629844 run/sec
UltraSparc @ 400 MHz, gcc-3.4.3 :
haproxy_server_hash : 5573220 run/sec
SuperFastHash : 1372714 run/sec
bernstein : 1361733 run/sec
hash_djbx33 : 1090373 run/sec
sax_hash : 872499 run/sec
oat_hash : 730354 run/sec
kr_hash : 645431 run/sec
haproxy_uri_hash : 541157 run/sec
fnv_32a_str : 442608 run/sec
hashpjw : 434858 run/sec
fnv_hash : 401945 run/sec
hashword : 340594 run/sec
UltraSparc @ 400 MHz, gcc-3.4.3 -mcpu=v9 :
haproxy_server_hash : 5671183 run/sec
bernstein : 1437122 run/sec
hash_djbx33 : 1376294 run/sec
SuperFastHash : 1306634 run/sec
sax_hash : 873650 run/sec
kr_hash : 801439 run/sec
oat_hash : 729920 run/sec
haproxy_uri_hash : 545341 run/sec
hashpjw : 472190 run/sec
fnv_32a_str : 443668 run/sec
hashword : 357295 run/sec
fnv_hash : 208823 run/sec
Alpha EV6 @ 466 MHz, gcc-3.3 :
haproxy_server_hash : 2495928 run/sec
SuperFastHash : 2037208 run/sec
hash_djbx33 : 1625092 run/sec
kr_hash : 1532206 run/sec
bernstein : 1256746 run/sec
haproxy_uri_hash : 999106 run/sec
oat_hash : 841943 run/sec
sax_hash : 737447 run/sec
hashpjw : 676170 run/sec
fnv_hash : 644054 run/sec
fnv_32a_str : 638526 run/sec
hashword : 421777 run/sec
VIA EPIA @ 533 MHz, gcc-2.95 -march=i586 :
haproxy_server_hash : 1391374 run/sec
SuperFastHash : 912397 run/sec
hash_djbx33 : 589868 run/sec
kr_hash : 453706 run/sec
bernstein : 437318 run/sec
sax_hash : 334456 run/sec
hashpjw : 316670 run/sec
hashword : 315476 run/sec
haproxy_uri_hash : 311112 run/sec
oat_hash : 259127 run/sec
fnv_32a_str : 229485 run/sec
fnv_hash : 151620 run/sec
VIA EPIA @ 533 MHz, gcc-3.4 -march=i586 :
haproxy_server_hash : 1660407 run/sec
SuperFastHash : 791981 run/sec
hash_djbx33 : 680498 run/sec
kr_hash : 384076 run/sec
bernstein : 377247 run/sec
sax_hash : 355183 run/sec
hashpjw : 298879 run/sec
haproxy_uri_hash : 296748 run/sec
oat_hash : 283932 run/sec
hashword : 269429 run/sec
fnv_32a_str : 204776 run/sec
fnv_hash : 155301 run/sec
Pentium @ 133 MHz, gcc-3.4 -march=i586 :
haproxy_server_hash : 930788 run/sec
SuperFastHash : 344988 run/sec
hash_djbx33 : 278996 run/sec
bernstein : 211545 run/sec
sax_hash : 185225 run/sec
kr_hash : 156603 run/sec
oat_hash : 135163 run/sec
hashword : 128518 run/sec
fnv_hash : 107024 run/sec
haproxy_uri_hash : 105523 run/sec
fnv_32a_str : 99913 run/sec
hashpjw : 97860 run/sec
VAX VLC4000 @30 MHz, gcc-2.95 :
haproxy_server_hash : 13208 run/sec
hash_djbx33 : 12963 run/sec
fnv_hash : 12150 run/sec
SuperFastHash : 12037 run/sec
bernstein : 11765 run/sec
kr_hash : 11111 run/sec
sax_hash : 7273 run/sec
hashword : 7143 run/sec
oat_hash : 6931 run/sec
hashpjw : 6667 run/sec
haproxy_uri_hash : 5714 run/sec
fnv_32a_str : 4800 run/sec

545
tests/test_hashes.c Normal file
View File

@ -0,0 +1,545 @@
/*
This file only show how many operations a hash is able to handle.
It don't show the distribution nor collisions.
gcc -Wall -O3 -o test_hashes test_hashes.c
./test_hashes |sort -k 3 -r
*/
#include <sys/time.h>
#include <time.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <stdio.h>
//#include <stdint.h>
static struct timeval timeval_current(void)
{
struct timeval tv;
gettimeofday(&tv, NULL);
return tv;
}
static double timeval_elapsed(struct timeval *tv)
{
struct timeval tv2 = timeval_current();
return (tv2.tv_sec - tv->tv_sec) +
(tv2.tv_usec - tv->tv_usec)*1.0e-6;
}
#define HAPROXY_BACKENDS 4
unsigned long haproxy_uri_hash(char *uri, int uri_len){
unsigned long hash = 0;
int c;
while (uri_len--) {
c = *uri++;
if (c == '?')
break;
hash = c + (hash << 6) + (hash << 16) - hash;
}
return hash%HAPROXY_BACKENDS; /* I assume 4 active backends */
} /* end haproxy_hash() */
/*
* http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx
*/
unsigned sax_hash ( void *key, int len )
{
unsigned char *p = key;
unsigned h = 0;
int i;
for ( i = 0; i < len; i++ )
h ^= ( h << 5 ) + ( h >> 2 ) + p[i];
return h;
}
#include <arpa/inet.h>
/* len 4 for ipv4 and 16 for ipv6 */
unsigned int haproxy_server_hash(const char *addr, int len){
unsigned int h, l;
l = h = 0;
while ((l + sizeof (int)) <= len) {
h ^= ntohl(*(unsigned int *)(&addr[l]));
l += sizeof (int);
}
return h %= HAPROXY_BACKENDS;
}/* end haproxy_server_hash() */
int hashpjw(const void *key) {
const char *ptr;
unsigned int val;
/*********************************************************************
* *
* Hash the key by performing a number of bit operations on it. *
* *
*********************************************************************/
val = 0;
ptr = key;
while (*ptr != '\0') {
int tmp;
val = (val << 4) + (*ptr);
if((tmp = (val & 0xf0000000))) {
val = val ^ (tmp >> 24);
val = val ^ tmp;
}
ptr++;
}/* end while */
return val;
}/* end hashpjw */
static unsigned long
hash_djbx33(
register unsigned char *key,
register size_t len)
{
register unsigned long hash = 5381;
/* the hash unrolled eight times */
for (; len >= 8; len -= 8) {
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
hash = ((hash << 5) + hash) + *key++;
}
switch (len) {
case 7: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 6: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 5: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 4: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 3: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 2: hash = ((hash << 5) + hash) + *key++; /* fallthrough... */
case 1: hash = ((hash << 5) + hash) + *key++; break;
default: /* case 0: */ break;
}
return hash;
}
typedef unsigned long int ub4; /* unsigned 4-byte quantities */
typedef unsigned char ub1; /* unsigned 1-byte quantities */
ub4 bernstein(ub1 *key, ub4 len, ub4 level){
ub4 hash = level;
ub4 i;
for (i=0; i<len; ++i) hash = 33*hash + key[i];
return hash;
}
/*
* http://www.azillionmonkeys.com/qed/hash.html
*/
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
|| defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
#define get16bits(d) (*((const uint16_t *) (d)))
#endif
#if !defined (get16bits)
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
/*
* This function has a hole of 11 unused bits in bytes 2 and 3 of each block of
* 32 bits.
*/
uint32_t SuperFastHash (const char * data, int len) {
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
hash += get16bits (data);
tmp = (get16bits (data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= data[sizeof (uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
/*
* This variant uses all bits from the input block, and is about 15% faster.
*/
uint32_t SuperFastHash2 (const char * data, int len) {
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
register uint32_t next;
next = get16bits(data+2);
hash += get16bits(data);
tmp = ((next << 11) | (next >> 21)) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= data[sizeof (uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
/*
* 32 bit FNV-0 hash type
*/
typedef unsigned long Fnv32_t;
/*
* fnv_32a_str - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a string
*
* input:
* str - string to hash
* hval - previous hash value or 0 if first call
*
* returns:
* 32 bit hash as a static hash type
*
* NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the
* hval arg on the first call to either fnv_32a_buf() or fnv_32a_str().
*/
Fnv32_t
fnv_32a_str(char *str, Fnv32_t hval)
{
unsigned char *s = (unsigned char *)str; /* unsigned string */
/*
* FNV-1a hash each octet in the buffer
*/
while (*s) {
/* xor the bottom with the current octet */
hval ^= (Fnv32_t)*s++;
/* #define NO_FNV_GCC_OPTIMIZATION */
/* multiply by the 32 bit FNV magic prime mod 2^32 */
#if defined(NO_FNV_GCC_OPTIMIZATION)
/*
* 32 bit magic FNV-1a prime
*/
#define FNV_32_PRIME ((Fnv32_t)0x01000193)
hval *= FNV_32_PRIME;
#else
hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24);
#endif
}
/* return our new hash value */
return hval;
}
/*
* from lookup3.c, by Bob Jenkins, May 2006, Public Domain.
*/
#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
/*
-------------------------------------------------------------------------------
mix -- mix 3 32-bit values reversibly.
This is reversible, so any information in (a,b,c) before mix() is
still in (a,b,c) after mix().
If four pairs of (a,b,c) inputs are run through mix(), or through
mix() in reverse, there are at least 32 bits of the output that
are sometimes the same for one pair and different for another pair.
This was tested for:
* pairs that differed by one bit, by two bits, in any combination
of top bits of (a,b,c), or in any combination of bottom bits of
(a,b,c).
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
is commonly produced by subtraction) look like a single 1-bit
difference.
* the base values were pseudorandom, all zero but one bit set, or
all zero plus a counter that starts at zero.
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
satisfy this are
4 6 8 16 19 4
9 15 3 18 27 15
14 9 3 7 17 3
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
for "differ" defined as + with a one-bit base and a two-bit delta. I
used http://burtleburtle.net/bob/hash/avalanche.html to choose
the operations, constants, and arrangements of the variables.
This does not achieve avalanche. There are input bits of (a,b,c)
that fail to affect some output bits of (a,b,c), especially of a. The
most thoroughly mixed value is c, but it doesn't really even achieve
avalanche in c.
This allows some parallelism. Read-after-writes are good at doubling
the number of bits affected, so the goal of mixing pulls in the opposite
direction as the goal of parallelism. I did what I could. Rotates
seem to cost as much as shifts on every machine I could lay my hands
on, and rotates are much kinder to the top and bottom bits, so I used
rotates.
-------------------------------------------------------------------------------
*/
#define mix(a,b,c) \
{ \
a -= c; a ^= rot(c, 4); c += b; \
b -= a; b ^= rot(a, 6); a += c; \
c -= b; c ^= rot(b, 8); b += a; \
a -= c; a ^= rot(c,16); c += b; \
b -= a; b ^= rot(a,19); a += c; \
c -= b; c ^= rot(b, 4); b += a; \
}
/*
-------------------------------------------------------------------------------
final -- final mixing of 3 32-bit values (a,b,c) into c
Pairs of (a,b,c) values differing in only a few bits will usually
produce values of c that look totally different. This was tested for
* pairs that differed by one bit, by two bits, in any combination
of top bits of (a,b,c), or in any combination of bottom bits of
(a,b,c).
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
is commonly produced by subtraction) look like a single 1-bit
difference.
* the base values were pseudorandom, all zero but one bit set, or
all zero plus a counter that starts at zero.
These constants passed:
14 11 25 16 4 14 24
12 14 25 16 4 14 24
and these came close:
4 8 15 26 3 22 24
10 8 15 26 3 22 24
11 8 15 26 3 22 24
-------------------------------------------------------------------------------
*/
#define final(a,b,c) \
{ \
c ^= b; c -= rot(b,14); \
a ^= c; a -= rot(c,11); \
b ^= a; b -= rot(a,25); \
c ^= b; c -= rot(b,16); \
a ^= c; a -= rot(c,4); \
b ^= a; b -= rot(a,14); \
c ^= b; c -= rot(b,24); \
}
/*
--------------------------------------------------------------------
This works on all machines. To be useful, it requires
-- that the key be an array of uint32_t's, and
-- that the length be the number of uint32_t's in the key
The function hashword() is identical to hashlittle() on little-endian
machines, and identical to hashbig() on big-endian machines,
except that the length has to be measured in uint32_ts rather than in
bytes. hashlittle() is more complicated than hashword() only because
hashlittle() has to dance around fitting the key bytes into registers.
--------------------------------------------------------------------
*/
uint32_t hashword(
const uint32_t *k, /* the key, an array of uint32_t values */
size_t length, /* the length of the key, in uint32_ts */
uint32_t initval) /* the previous hash, or an arbitrary value */
{
uint32_t a,b,c;
/* Set up the internal state */
a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
/*------------------------------------------------- handle most of the key */
while (length > 3)
{
a += k[0];
b += k[1];
c += k[2];
mix(a,b,c);
length -= 3;
k += 3;
}
/*------------------------------------------- handle the last 3 uint32_t's */
switch(length) /* all the case statements fall through */
{
case 3 : c+=k[2];
case 2 : b+=k[1];
case 1 : a+=k[0];
final(a,b,c);
case 0: /* case 0: nothing left to add */
break;
}
/*------------------------------------------------------ report the result */
return c;
}
/* from K&R book site 139 */
#define HASHSIZE 101
unsigned kr_hash(char *s){
unsigned hashval;
for(hashval = 0; *s != '\0';s++)
hashval = *s + 31 * hashval;
return hashval % HASHSIZE;
} /* end kr_hash() */
unsigned fnv_hash ( void *key, int len )
{
unsigned char *p = key;
unsigned h = 2166136261;
int i;
for ( i = 0; i < len; i++ )
h = ( h * 16777619 ) ^ p[i];
return h;
}
unsigned oat_hash ( void *key, int len )
{
unsigned char *p = key;
unsigned h = 0;
int i;
for ( i = 0; i < len; i++ ) {
h += p[i];
h += ( h << 10 );
h ^= ( h >> 6 );
}
h += ( h << 3 );
h ^= ( h >> 11 );
h += ( h << 15 );
return h;
}
#define run_test(fct, args) { \
unsigned long loop, count; \
volatile unsigned long result; \
double delta; \
struct timeval tv; \
fprintf(stderr, "Starting %s\n", #fct); \
tv = timeval_current(); \
count = 0; \
do { \
delta = timeval_elapsed(&tv); \
for (loop = 0; loop < 1000; loop++) { \
result = fct args; \
count++; \
} \
} while (delta < 1.0); \
fprintf(stdout, "%-20s : %10.0f run/sec\n", #fct, count/delta); \
fflush(stdout); \
}
int main(){
char **start;
int len;
char *urls[] = {
"http://www.microsoft.com/shared/core/1/webservice/navigation.asmx/DisplayDownlevelNavHtml",
NULL
};
start = urls;
len = strlen(*urls);
run_test(SuperFastHash2, (*urls, len));
run_test(SuperFastHash, (*urls, len));
run_test(haproxy_uri_hash, (*urls, len));
run_test(haproxy_server_hash, (*urls, len));
run_test(hashpjw, (*urls));
run_test(hash_djbx33, ((unsigned char *)*urls, len));
run_test(bernstein, ((unsigned char *)*urls, len, 4));
run_test(fnv_32a_str, (*urls, 0));
run_test(hashword, ((const uint32_t *)*urls,strlen(*urls),0));
run_test(kr_hash, (*urls));
run_test(sax_hash, (*urls, len));
run_test(fnv_hash, (*urls, len));
run_test(oat_hash, (*urls, len));
return 0;
}/* end main() */

View File

@ -1,4 +1,6 @@
#include <stdio.h> #include <stdio.h>
#include <string.h>
#include <arpa/inet.h>
#define NSERV 10 #define NSERV 10
#define MAXLINE 1000 #define MAXLINE 1000
@ -11,7 +13,7 @@ static unsigned long hash_gd1(char *uri)
unsigned long hash = 0; unsigned long hash = 0;
int c; int c;
while (c = *uri++) while ((c = *uri++))
hash = c + (hash << 6) + (hash << 16) - hash; hash = c + (hash << 6) + (hash << 16) - hash;
return hash; return hash;
@ -23,7 +25,7 @@ static unsigned long hash_gd2(char *uri)
unsigned long hash = 0; unsigned long hash = 0;
int c; int c;
while (c = *uri++) { while ((c = *uri++)) {
if (c == '?' || c == '\n') if (c == '?' || c == '\n')
break; break;
hash = c + (hash << 6) + (hash << 16) - hash; hash = c + (hash << 6) + (hash << 16) - hash;
@ -39,7 +41,7 @@ static unsigned long hash_gd3(char *uri)
unsigned long hash = 0; unsigned long hash = 0;
int c; int c;
while (c = *uri++) { while ((c = *uri++)) {
if (c == '?' || c == '\n') if (c == '?' || c == '\n')
break; break;
hash = c - (hash << 3) + (hash << 15) - hash; hash = c - (hash << 3) + (hash << 15) - hash;
@ -55,7 +57,7 @@ static unsigned long hash_gd4(char *uri)
unsigned long hash = 0; unsigned long hash = 0;
int c; int c;
while (c = *uri++) { while ((c = *uri++)) {
if (c == '?' || c == '\n') if (c == '?' || c == '\n')
break; break;
hash = hash + (hash << 6) - (hash << 15) - c; hash = hash + (hash << 6) - (hash << 15) - c;
@ -71,7 +73,7 @@ static unsigned long hash_gd5(char *uri)
unsigned long hash = 0; unsigned long hash = 0;
int c; int c;
while (c = *uri++) { while ((c = *uri++)) {
if (c == '?' || c == '\n') if (c == '?' || c == '\n')
break; break;
hash = hash + (hash << 2) - (hash << 19) - c; hash = hash + (hash << 2) - (hash << 19) - c;
@ -87,7 +89,7 @@ static unsigned long hash_gd6(char *uri)
unsigned long hash = 0; unsigned long hash = 0;
int c; int c;
while (c = *uri++) { while ((c = *uri++)) {
if (c == '?' || c == '\n') if (c == '?' || c == '\n')
break; break;
hash = hash + (hash << 2) - (hash << 22) - c; hash = hash + (hash << 2) - (hash << 22) - c;
@ -152,7 +154,7 @@ static unsigned int rev32(unsigned int c) {
int hash_wt2(const char *src, int len) { int hash_wt2(const char *src, int len) {
unsigned int i = 0x3C964BA5; /* as many ones as zeroes */ unsigned int i = 0x3C964BA5; /* as many ones as zeroes */
unsigned int j, k; unsigned int j;
unsigned int ih, il; unsigned int ih, il;
int bit; int bit;
@ -171,6 +173,142 @@ int hash_wt2(const char *src, int len) {
return i; return i;
} }
//typedef unsigned int uint32_t;
//typedef unsigned short uint8_t;
//typedef unsigned char uint16_t;
/*
* http://www.azillionmonkeys.com/qed/hash.html
*/
#undef get16bits
#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
|| defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
#define get16bits(d) (*((const uint16_t *) (d)))
#endif
#if !defined (get16bits)
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
+(uint32_t)(((const uint8_t *)(d))[0]) )
#endif
/*
* This function has a hole of 11 unused bits in bytes 2 and 3 of each block of
* 32 bits.
*/
int counts_SuperFastHash[NSERV][NSERV];
uint32_t SuperFastHash (const char * data, int len) {
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
hash += get16bits (data);
tmp = (get16bits (data+2) << 11) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= data[sizeof (uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
/*
* This variant uses all bits from the input block, and is about 15% faster.
*/
int counts_SuperFastHash2[NSERV][NSERV];
uint32_t SuperFastHash2 (const char * data, int len) {
uint32_t hash = len, tmp;
int rem;
if (len <= 0 || data == NULL) return 0;
rem = len & 3;
len >>= 2;
/* Main loop */
for (;len > 0; len--) {
register uint32_t next;
next = get16bits(data+2);
hash += get16bits(data);
tmp = ((next << 11) | (next >> 21)) ^ hash;
hash = (hash << 16) ^ tmp;
data += 2*sizeof (uint16_t);
hash += hash >> 11;
}
/* Handle end cases */
switch (rem) {
case 3: hash += get16bits (data);
hash ^= hash << 16;
hash ^= data[sizeof (uint16_t)] << 18;
hash += hash >> 11;
break;
case 2: hash += get16bits (data);
hash ^= hash << 11;
hash += hash >> 17;
break;
case 1: hash += *data;
hash ^= hash << 10;
hash += hash >> 1;
}
/* Force "avalanching" of final 127 bits */
hash ^= hash << 3;
hash += hash >> 5;
hash ^= hash << 4;
hash += hash >> 17;
hash ^= hash << 25;
hash += hash >> 6;
return hash;
}
/* len 4 for ipv4 and 16 for ipv6 */
int counts_srv[NSERV][NSERV];
unsigned int haproxy_server_hash(const char *addr, int len){
unsigned int h, l;
l = h = 0;
while ((l + sizeof (int)) <= len) {
h ^= ntohl(*(unsigned int *)(&addr[l]));
l += sizeof (int);
}
return h;
}/* end haproxy_server_hash() */
void count_hash_results(unsigned long hash, int counts[NSERV][NSERV]) { void count_hash_results(unsigned long hash, int counts[NSERV][NSERV]) {
int srv, nsrv; int srv, nsrv;
@ -196,7 +334,7 @@ void dump_hash_results(char *name, int counts[NSERV][NSERV]) {
printf("\n"); printf("\n");
} }
main() { int main() {
memset(counts_gd1, 0, sizeof(counts_gd1)); memset(counts_gd1, 0, sizeof(counts_gd1));
memset(counts_gd2, 0, sizeof(counts_gd2)); memset(counts_gd2, 0, sizeof(counts_gd2));
memset(counts_gd3, 0, sizeof(counts_gd3)); memset(counts_gd3, 0, sizeof(counts_gd3));
@ -205,6 +343,9 @@ main() {
memset(counts_gd6, 0, sizeof(counts_gd6)); memset(counts_gd6, 0, sizeof(counts_gd6));
memset(counts_wt1, 0, sizeof(counts_wt1)); memset(counts_wt1, 0, sizeof(counts_wt1));
memset(counts_wt2, 0, sizeof(counts_wt2)); memset(counts_wt2, 0, sizeof(counts_wt2));
memset(counts_srv, 0, sizeof(counts_srv));
memset(counts_SuperFastHash, 0, sizeof(counts_SuperFastHash));
memset(counts_SuperFastHash2, 0, sizeof(counts_SuperFastHash2));
while (fgets(line, MAXLINE, stdin) != NULL) { while (fgets(line, MAXLINE, stdin) != NULL) {
count_hash_results(hash_gd1(line), counts_gd1); count_hash_results(hash_gd1(line), counts_gd1);
@ -215,6 +356,9 @@ main() {
count_hash_results(hash_gd6(line), counts_gd6); count_hash_results(hash_gd6(line), counts_gd6);
count_hash_results(hash_wt1(31, line), counts_wt1); count_hash_results(hash_wt1(31, line), counts_wt1);
count_hash_results(hash_wt2(line, strlen(line)), counts_wt2); count_hash_results(hash_wt2(line, strlen(line)), counts_wt2);
count_hash_results(haproxy_server_hash(line, strlen(line)), counts_srv);
count_hash_results(SuperFastHash(line, strlen(line)), counts_SuperFastHash);
count_hash_results(SuperFastHash2(line, strlen(line)), counts_SuperFastHash2);
} }
dump_hash_results("hash_gd1", counts_gd1); dump_hash_results("hash_gd1", counts_gd1);
@ -225,4 +369,9 @@ main() {
dump_hash_results("hash_gd6", counts_gd6); dump_hash_results("hash_gd6", counts_gd6);
dump_hash_results("hash_wt1", counts_wt1); dump_hash_results("hash_wt1", counts_wt1);
dump_hash_results("hash_wt2", counts_wt2); dump_hash_results("hash_wt2", counts_wt2);
dump_hash_results("haproxy_server_hash", counts_srv);
dump_hash_results("SuperFastHash", counts_SuperFastHash);
dump_hash_results("SuperFastHash2", counts_SuperFastHash2);
return 0;
} }