mirror of
git://sourceware.org/git/lvm2.git
synced 2024-12-21 13:34:40 +03:00
hash: replace hash with better function
Add Bob Jenkins hash function to get better working hash function, which does genarate way less colisions (especially with similar strings). For a comparision also a kernel function used in DM kernel is include. While it's better then our existing one, it's still far worse, then Bob Jenkins hash.
This commit is contained in:
parent
d602837b91
commit
ff21723512
@ -37,8 +37,11 @@ struct dm_hash_table {
|
|||||||
struct dm_hash_node **slots;
|
struct dm_hash_node **slots;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Permutation of the Integers 0 through 255 */
|
#if 0 /* TO BE REMOVED */
|
||||||
static unsigned char _nums[] = {
|
static unsigned _hash(const void *key, unsigned len)
|
||||||
|
{
|
||||||
|
/* Permutation of the Integers 0 through 255 */
|
||||||
|
static unsigned char _nums[] = {
|
||||||
1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51,
|
1, 14, 110, 25, 97, 174, 132, 119, 138, 170, 125, 118, 27, 233, 140, 51,
|
||||||
87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65,
|
87, 197, 177, 107, 234, 169, 56, 68, 30, 7, 173, 73, 188, 40, 36, 65,
|
||||||
49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28,
|
49, 213, 104, 190, 57, 211, 148, 223, 48, 115, 15, 2, 67, 186, 210, 28,
|
||||||
@ -63,23 +66,9 @@ static unsigned char _nums[] = {
|
|||||||
44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184,
|
44, 38, 31, 149, 135, 0, 216, 52, 63, 23, 37, 69, 39, 117, 146, 184,
|
||||||
163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120,
|
163, 200, 222, 235, 248, 243, 219, 10, 152, 131, 123, 229, 203, 76, 120,
|
||||||
209
|
209
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct dm_hash_node *_create_node(const void *key, unsigned len)
|
const uint8_t *str = key;
|
||||||
{
|
|
||||||
struct dm_hash_node *n = malloc(sizeof(*n) + len);
|
|
||||||
|
|
||||||
if (n) {
|
|
||||||
memcpy(n->key, key, len);
|
|
||||||
n->keylen = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
static unsigned _hash(const void *key, unsigned len)
|
|
||||||
{
|
|
||||||
const unsigned char *str = key;
|
|
||||||
unsigned h = 0, g;
|
unsigned h = 0, g;
|
||||||
unsigned i;
|
unsigned i;
|
||||||
|
|
||||||
@ -96,6 +85,73 @@ static unsigned _hash(const void *key, unsigned len)
|
|||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* In-kernel DM hashing, still lots of collisions */
|
||||||
|
static unsigned _hash_in_kernel(const char *key, unsigned len)
|
||||||
|
{
|
||||||
|
const unsigned char *str = (unsigned char *)key;
|
||||||
|
const unsigned hash_mult = 2654435387U;
|
||||||
|
unsigned hash = 0, i;
|
||||||
|
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
hash = (hash + str[i]) * hash_mult;
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#undef get16bits
|
||||||
|
#if (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
|
||||||
|
#define get16bits(d) (*((const uint16_t *) (d)))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if !defined (get16bits)
|
||||||
|
#define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
|
||||||
|
+(uint32_t)(((const uint8_t *)(d))[0]) )
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Adapted Bob Jenkins hash to read by 2 bytes if possible.
|
||||||
|
* https://secure.wikimedia.org/wikipedia/en/wiki/Jenkins_hash_function
|
||||||
|
*
|
||||||
|
* Reduces amount of hash collisions
|
||||||
|
*/
|
||||||
|
static unsigned _hash(const void *key, unsigned len)
|
||||||
|
{
|
||||||
|
const uint8_t *str = (uint8_t*) key;
|
||||||
|
unsigned hash = 0, i;
|
||||||
|
unsigned sz = len / 2;
|
||||||
|
|
||||||
|
for(i = 0; i < sz; ++i) {
|
||||||
|
hash += get16bits(str + 2 * i);
|
||||||
|
hash += (hash << 10);
|
||||||
|
hash ^= (hash >> 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (len & 1) {
|
||||||
|
hash += str[len - 1];
|
||||||
|
hash += (hash << 10);
|
||||||
|
hash ^= (hash >> 6);
|
||||||
|
}
|
||||||
|
|
||||||
|
hash += (hash << 3);
|
||||||
|
hash ^= (hash >> 11);
|
||||||
|
hash += (hash << 15);
|
||||||
|
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct dm_hash_node *_create_node(const void *key, unsigned len)
|
||||||
|
{
|
||||||
|
struct dm_hash_node *n = malloc(sizeof(*n) + len);
|
||||||
|
|
||||||
|
if (n) {
|
||||||
|
memcpy(n->key, key, len);
|
||||||
|
n->keylen = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
struct dm_hash_table *dm_hash_create(unsigned size_hint)
|
struct dm_hash_table *dm_hash_create(unsigned size_hint)
|
||||||
{
|
{
|
||||||
size_t len;
|
size_t len;
|
||||||
|
Loading…
Reference in New Issue
Block a user