mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-03-25 10:50:08 +03:00
dict: Update hash function
Update hash function from classic Jenkins OAAT (dict.c) and a variant of DJB2 (hash.c) to "GoodOAAT" taken from the SMHasher repo. This hash function passes all SMHasher tests.
This commit is contained in:
parent
93e8bb2a40
commit
edc2dd48cb
83
dict.c
83
dict.c
@ -92,7 +92,7 @@ struct _xmlDictEntry {
|
||||
const xmlChar *name;
|
||||
unsigned int len;
|
||||
int valid;
|
||||
unsigned long okey;
|
||||
unsigned okey;
|
||||
};
|
||||
|
||||
typedef struct _xmlDictStrings xmlDictStrings;
|
||||
@ -374,34 +374,28 @@ found_pool:
|
||||
*
|
||||
* Calculate a hash key using a good hash function that works well for
|
||||
* larger hash table sizes.
|
||||
*
|
||||
* Hash function by "One-at-a-Time Hash" see
|
||||
* http://burtleburtle.net/bob/hash/doobs.html
|
||||
*/
|
||||
|
||||
#ifdef __clang__
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
|
||||
#endif
|
||||
static uint32_t
|
||||
static unsigned
|
||||
xmlDictComputeBigKey(const xmlChar* data, int namelen, unsigned seed) {
|
||||
uint32_t hash;
|
||||
unsigned h1, h2;
|
||||
int i;
|
||||
|
||||
if (namelen <= 0 || data == NULL) return(0);
|
||||
|
||||
hash = seed;
|
||||
HASH_INIT(h1, h2, seed);
|
||||
|
||||
for (i = 0;i < namelen; i++) {
|
||||
hash += data[i];
|
||||
hash += (hash << 10);
|
||||
hash ^= (hash >> 6);
|
||||
HASH_UPDATE(h1, h2, data[i]);
|
||||
}
|
||||
hash += (hash << 3);
|
||||
hash ^= (hash >> 11);
|
||||
hash += (hash << 15);
|
||||
|
||||
return hash;
|
||||
HASH_FINISH(h1, h2);
|
||||
|
||||
return h2;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -419,34 +413,27 @@ xmlDictComputeBigKey(const xmlChar* data, int namelen, unsigned seed) {
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
|
||||
#endif
|
||||
static unsigned long
|
||||
static unsigned
|
||||
xmlDictComputeBigQKey(const xmlChar *prefix, int plen,
|
||||
const xmlChar *name, int len, unsigned seed)
|
||||
{
|
||||
uint32_t hash;
|
||||
unsigned h1, h2;
|
||||
int i;
|
||||
|
||||
hash = seed;
|
||||
HASH_INIT(h1, h2, seed);
|
||||
|
||||
for (i = 0;i < plen; i++) {
|
||||
hash += prefix[i];
|
||||
hash += (hash << 10);
|
||||
hash ^= (hash >> 6);
|
||||
for (i = 0; i < plen; i++) {
|
||||
HASH_UPDATE(h1, h2, prefix[i]);
|
||||
}
|
||||
hash += ':';
|
||||
hash += (hash << 10);
|
||||
hash ^= (hash >> 6);
|
||||
HASH_UPDATE(h1, h2, ':');
|
||||
|
||||
for (i = 0;i < len; i++) {
|
||||
hash += name[i];
|
||||
hash += (hash << 10);
|
||||
hash ^= (hash >> 6);
|
||||
for (i = 0; i < len; i++) {
|
||||
HASH_UPDATE(h1, h2, name[i]);
|
||||
}
|
||||
hash += (hash << 3);
|
||||
hash ^= (hash >> 11);
|
||||
hash += (hash << 15);
|
||||
|
||||
return hash;
|
||||
HASH_FINISH(h1, h2);
|
||||
|
||||
return h2;
|
||||
}
|
||||
#endif /* WITH_BIG_KEY */
|
||||
|
||||
@ -456,9 +443,13 @@ xmlDictComputeBigQKey(const xmlChar *prefix, int plen,
|
||||
* Calculate a hash key using a fast hash function that works well
|
||||
* for low hash table fill.
|
||||
*/
|
||||
static unsigned long
|
||||
#ifdef __clang__
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
|
||||
#endif
|
||||
static unsigned
|
||||
xmlDictComputeFastKey(const xmlChar *name, int namelen, unsigned seed) {
|
||||
unsigned long value = seed;
|
||||
unsigned value = seed;
|
||||
|
||||
if ((name == NULL) || (namelen <= 0))
|
||||
return(value);
|
||||
@ -500,11 +491,15 @@ xmlDictComputeFastKey(const xmlChar *name, int namelen, unsigned seed) {
|
||||
*
|
||||
* Neither of the two strings must be NULL.
|
||||
*/
|
||||
static unsigned long
|
||||
#ifdef __clang__
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
|
||||
#endif
|
||||
static unsigned
|
||||
xmlDictComputeFastQKey(const xmlChar *prefix, int plen,
|
||||
const xmlChar *name, int len, unsigned seed)
|
||||
{
|
||||
unsigned long value = seed;
|
||||
unsigned value = seed;
|
||||
|
||||
if (plen == 0)
|
||||
value += 30 * ':';
|
||||
@ -669,12 +664,12 @@ xmlDictReference(xmlDictPtr dict) {
|
||||
*/
|
||||
static int
|
||||
xmlDictGrow(xmlDictPtr dict, size_t size) {
|
||||
unsigned long key, okey;
|
||||
unsigned key, okey;
|
||||
size_t oldsize, i;
|
||||
xmlDictEntryPtr iter, next;
|
||||
struct _xmlDictEntry *olddict;
|
||||
#ifdef DEBUG_GROW
|
||||
unsigned long nbElem = 0;
|
||||
unsigned nbElem = 0;
|
||||
#endif
|
||||
int ret = 0;
|
||||
int keep_keys = 1;
|
||||
@ -861,7 +856,7 @@ xmlDictFree(xmlDictPtr dict) {
|
||||
*/
|
||||
const xmlChar *
|
||||
xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
|
||||
unsigned long key, okey, nbi = 0;
|
||||
unsigned key, okey, nbi = 0;
|
||||
xmlDictEntryPtr entry;
|
||||
xmlDictEntryPtr insert;
|
||||
const xmlChar *ret;
|
||||
@ -914,7 +909,7 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
|
||||
}
|
||||
|
||||
if (dict->subdict) {
|
||||
unsigned long skey;
|
||||
unsigned skey;
|
||||
|
||||
/* we cannot always reuse the same okey for the subdict */
|
||||
if (((dict->size == MIN_DICT_SIZE) &&
|
||||
@ -1004,7 +999,7 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
|
||||
*/
|
||||
const xmlChar *
|
||||
xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
|
||||
unsigned long key, okey;
|
||||
unsigned key, okey;
|
||||
xmlDictEntryPtr insert;
|
||||
unsigned int l;
|
||||
|
||||
@ -1053,7 +1048,7 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
|
||||
}
|
||||
|
||||
if (dict->subdict) {
|
||||
unsigned long skey;
|
||||
unsigned skey;
|
||||
|
||||
/* we cannot always reuse the same okey for the subdict */
|
||||
if (((dict->size == MIN_DICT_SIZE) &&
|
||||
@ -1110,7 +1105,7 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
|
||||
*/
|
||||
const xmlChar *
|
||||
xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
|
||||
unsigned long okey, key, nbi = 0;
|
||||
unsigned okey, key, nbi = 0;
|
||||
xmlDictEntryPtr entry;
|
||||
xmlDictEntryPtr insert;
|
||||
const xmlChar *ret;
|
||||
@ -1146,7 +1141,7 @@ xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
|
||||
}
|
||||
|
||||
if (dict->subdict) {
|
||||
unsigned long skey;
|
||||
unsigned skey;
|
||||
|
||||
/* we cannot always reuse the same okey for the subdict */
|
||||
if (((dict->size == MIN_DICT_SIZE) &&
|
||||
|
76
hash.c
76
hash.c
@ -81,88 +81,88 @@ struct _xmlHashTable {
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
|
||||
#endif
|
||||
static unsigned long
|
||||
static unsigned
|
||||
xmlHashComputeKey(xmlHashTablePtr table, const xmlChar *name,
|
||||
const xmlChar *name2, const xmlChar *name3) {
|
||||
unsigned long value;
|
||||
unsigned long ch;
|
||||
unsigned h1, h2, ch;
|
||||
|
||||
HASH_INIT(h1, h2, table->random_seed);
|
||||
|
||||
value = table->random_seed;
|
||||
if (name != NULL) {
|
||||
value += 30 * (*name);
|
||||
while ((ch = *name++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
}
|
||||
value = value ^ ((value << 5) + (value >> 3));
|
||||
HASH_UPDATE(h1, h2, 0);
|
||||
if (name2 != NULL) {
|
||||
while ((ch = *name2++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
}
|
||||
value = value ^ ((value << 5) + (value >> 3));
|
||||
HASH_UPDATE(h1, h2, 0);
|
||||
if (name3 != NULL) {
|
||||
while ((ch = *name3++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
}
|
||||
return (value % table->size);
|
||||
|
||||
HASH_FINISH(h1, h2);
|
||||
|
||||
return (h2 % table->size);
|
||||
}
|
||||
|
||||
#ifdef __clang__
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
|
||||
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
|
||||
#endif
|
||||
static unsigned long
|
||||
static unsigned
|
||||
xmlHashComputeQKey(xmlHashTablePtr table,
|
||||
const xmlChar *prefix, const xmlChar *name,
|
||||
const xmlChar *prefix2, const xmlChar *name2,
|
||||
const xmlChar *prefix3, const xmlChar *name3) {
|
||||
unsigned long value;
|
||||
unsigned long ch;
|
||||
unsigned h1, h2, ch;
|
||||
|
||||
value = table->random_seed;
|
||||
if (prefix != NULL)
|
||||
value += 30 * (*prefix);
|
||||
else
|
||||
value += 30 * (*name);
|
||||
HASH_INIT(h1, h2, table->random_seed);
|
||||
|
||||
if (prefix != NULL) {
|
||||
while ((ch = *prefix++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
value = value ^ ((value << 5) + (value >> 3) + ':');
|
||||
HASH_UPDATE(h1, h2, ':');
|
||||
}
|
||||
if (name != NULL) {
|
||||
while ((ch = *name++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
}
|
||||
value = value ^ ((value << 5) + (value >> 3));
|
||||
HASH_UPDATE(h1, h2, 0);
|
||||
if (prefix2 != NULL) {
|
||||
while ((ch = *prefix2++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
value = value ^ ((value << 5) + (value >> 3) + ':');
|
||||
HASH_UPDATE(h1, h2, ':');
|
||||
}
|
||||
if (name2 != NULL) {
|
||||
while ((ch = *name2++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
}
|
||||
value = value ^ ((value << 5) + (value >> 3));
|
||||
HASH_UPDATE(h1, h2, 0);
|
||||
if (prefix3 != NULL) {
|
||||
while ((ch = *prefix3++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
value = value ^ ((value << 5) + (value >> 3) + ':');
|
||||
HASH_UPDATE(h1, h2, ':');
|
||||
}
|
||||
if (name3 != NULL) {
|
||||
while ((ch = *name3++) != 0) {
|
||||
value = value ^ ((value << 5) + (value >> 3) + ch);
|
||||
HASH_UPDATE(h1, h2, ch);
|
||||
}
|
||||
}
|
||||
return (value % table->size);
|
||||
|
||||
HASH_FINISH(h1, h2);
|
||||
|
||||
return (h2 % table->size);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -232,12 +232,12 @@ xmlHashCreateDict(int size, xmlDictPtr dict) {
|
||||
*/
|
||||
static int
|
||||
xmlHashGrow(xmlHashTablePtr table, int size) {
|
||||
unsigned long key;
|
||||
unsigned key;
|
||||
int oldsize, i;
|
||||
xmlHashEntryPtr iter, next;
|
||||
struct _xmlHashEntry *oldtable;
|
||||
#ifdef DEBUG_GROW
|
||||
unsigned long nbElem = 0;
|
||||
unsigned nbElem = 0;
|
||||
#endif
|
||||
|
||||
if (table == NULL)
|
||||
@ -532,7 +532,7 @@ int
|
||||
xmlHashAddEntry3(xmlHashTablePtr table, const xmlChar *name,
|
||||
const xmlChar *name2, const xmlChar *name3,
|
||||
void *userdata) {
|
||||
unsigned long key, len = 0;
|
||||
unsigned key, len = 0;
|
||||
xmlHashEntryPtr entry;
|
||||
xmlHashEntryPtr insert;
|
||||
|
||||
@ -676,7 +676,7 @@ int
|
||||
xmlHashUpdateEntry3(xmlHashTablePtr table, const xmlChar *name,
|
||||
const xmlChar *name2, const xmlChar *name3,
|
||||
void *userdata, xmlHashDeallocator f) {
|
||||
unsigned long key;
|
||||
unsigned key;
|
||||
xmlHashEntryPtr entry;
|
||||
xmlHashEntryPtr insert;
|
||||
|
||||
@ -820,7 +820,7 @@ error:
|
||||
void *
|
||||
xmlHashLookup3(xmlHashTablePtr table, const xmlChar *name,
|
||||
const xmlChar *name2, const xmlChar *name3) {
|
||||
unsigned long key;
|
||||
unsigned key;
|
||||
xmlHashEntryPtr entry;
|
||||
|
||||
if (table == NULL)
|
||||
@ -866,7 +866,7 @@ xmlHashQLookup3(xmlHashTablePtr table,
|
||||
const xmlChar *prefix, const xmlChar *name,
|
||||
const xmlChar *prefix2, const xmlChar *name2,
|
||||
const xmlChar *prefix3, const xmlChar *name3) {
|
||||
unsigned long key;
|
||||
unsigned key;
|
||||
xmlHashEntryPtr entry;
|
||||
|
||||
if (table == NULL)
|
||||
@ -1142,7 +1142,7 @@ xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name,
|
||||
int
|
||||
xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name,
|
||||
const xmlChar *name2, const xmlChar *name3, xmlHashDeallocator f) {
|
||||
unsigned long key;
|
||||
unsigned key;
|
||||
xmlHashEntryPtr entry;
|
||||
xmlHashEntryPtr prev = NULL;
|
||||
|
||||
|
@ -1,7 +1,47 @@
|
||||
#ifndef XML_DICT_H_PRIVATE__
|
||||
#define XML_DICT_H_PRIVATE__
|
||||
|
||||
/*
|
||||
* Values are ANDed with 0xFFFFFFFF to support platforms where
|
||||
* unsigned is larger than 32 bits. With 32-bit unsigned values,
|
||||
* modern compilers should optimize the operation away.
|
||||
*/
|
||||
|
||||
#define HASH_ROL(x,n) ((x) << (n) | ((x) & 0xFFFFFFFF) >> (32 - (n)))
|
||||
#define HASH_ROR(x,n) (((x) & 0xFFFFFFFF) >> (n) | (x) << (32 - (n)))
|
||||
|
||||
/*
|
||||
* GoodOAAT: One of a smallest non-multiplicative One-At-a-Time functions
|
||||
* that passes SMHasher.
|
||||
*
|
||||
* Author: Sokolov Yura aka funny-falcon
|
||||
*/
|
||||
|
||||
#define HASH_INIT(h1, h2, seed) \
|
||||
do { \
|
||||
h1 = seed ^ 0x3b00; \
|
||||
h2 = HASH_ROL(seed, 15); \
|
||||
} while (0)
|
||||
|
||||
#define HASH_UPDATE(h1, h2, ch) \
|
||||
do { \
|
||||
h1 += ch; \
|
||||
h1 += h1 << 3; \
|
||||
h2 += h1; \
|
||||
h2 = HASH_ROL(h2, 7); \
|
||||
h2 += h2 << 2; \
|
||||
} while (0)
|
||||
|
||||
/* Result is in h2 */
|
||||
#define HASH_FINISH(h1, h2) \
|
||||
do { \
|
||||
h1 ^= h2; \
|
||||
h1 += HASH_ROL(h2, 14); \
|
||||
h2 ^= h1; h2 += HASH_ROR(h1, 6); \
|
||||
h1 ^= h2; h1 += HASH_ROL(h2, 5); \
|
||||
h2 ^= h1; h2 += HASH_ROR(h1, 8); \
|
||||
h2 &= 0xFFFFFFFF; \
|
||||
} while (0)
|
||||
|
||||
XML_HIDDEN void
|
||||
xmlInitDictInternal(void);
|
||||
|
@ -22,9 +22,9 @@ static const char *seeds2[] = {
|
||||
NULL
|
||||
};
|
||||
|
||||
#define NB_STRINGS_MAX 10000
|
||||
#define NB_STRINGS_NS 1000
|
||||
#define NB_STRINGS_PREFIX 50
|
||||
#define NB_STRINGS_MAX 100000
|
||||
#define NB_STRINGS_NS 10000
|
||||
#define NB_STRINGS_PREFIX (NB_STRINGS_NS / 20)
|
||||
#define NB_STRINGS_MIN 10
|
||||
|
||||
static xmlChar **strings1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user