1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-03-25 10:50:08 +03:00

dict: Update hash function

Update hash function from classic Jenkins OAAT (dict.c) and a variant of
DJB2 (hash.c) to "GoodOAAT" taken from the SMHasher repo. This hash
function passes all SMHasher tests.
This commit is contained in:
Nick Wellnhofer 2023-09-04 16:07:23 +02:00
parent 93e8bb2a40
commit edc2dd48cb
4 changed files with 120 additions and 85 deletions

83
dict.c
View File

@ -92,7 +92,7 @@ struct _xmlDictEntry {
const xmlChar *name;
unsigned int len;
int valid;
unsigned long okey;
unsigned okey;
};
typedef struct _xmlDictStrings xmlDictStrings;
@ -374,34 +374,28 @@ found_pool:
*
* Calculate a hash key using a good hash function that works well for
* larger hash table sizes.
*
* Hash function by "One-at-a-Time Hash" see
* http://burtleburtle.net/bob/hash/doobs.html
*/
#ifdef __clang__
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
#endif
static uint32_t
static unsigned
xmlDictComputeBigKey(const xmlChar* data, int namelen, unsigned seed) {
uint32_t hash;
unsigned h1, h2;
int i;
if (namelen <= 0 || data == NULL) return(0);
hash = seed;
HASH_INIT(h1, h2, seed);
for (i = 0;i < namelen; i++) {
hash += data[i];
hash += (hash << 10);
hash ^= (hash >> 6);
HASH_UPDATE(h1, h2, data[i]);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash;
HASH_FINISH(h1, h2);
return h2;
}
/*
@ -419,34 +413,27 @@ xmlDictComputeBigKey(const xmlChar* data, int namelen, unsigned seed) {
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
#endif
static unsigned long
static unsigned
xmlDictComputeBigQKey(const xmlChar *prefix, int plen,
const xmlChar *name, int len, unsigned seed)
{
uint32_t hash;
unsigned h1, h2;
int i;
hash = seed;
HASH_INIT(h1, h2, seed);
for (i = 0;i < plen; i++) {
hash += prefix[i];
hash += (hash << 10);
hash ^= (hash >> 6);
for (i = 0; i < plen; i++) {
HASH_UPDATE(h1, h2, prefix[i]);
}
hash += ':';
hash += (hash << 10);
hash ^= (hash >> 6);
HASH_UPDATE(h1, h2, ':');
for (i = 0;i < len; i++) {
hash += name[i];
hash += (hash << 10);
hash ^= (hash >> 6);
for (i = 0; i < len; i++) {
HASH_UPDATE(h1, h2, name[i]);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return hash;
HASH_FINISH(h1, h2);
return h2;
}
#endif /* WITH_BIG_KEY */
@ -456,9 +443,13 @@ xmlDictComputeBigQKey(const xmlChar *prefix, int plen,
* Calculate a hash key using a fast hash function that works well
* for low hash table fill.
*/
static unsigned long
#ifdef __clang__
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
#endif
static unsigned
xmlDictComputeFastKey(const xmlChar *name, int namelen, unsigned seed) {
unsigned long value = seed;
unsigned value = seed;
if ((name == NULL) || (namelen <= 0))
return(value);
@ -500,11 +491,15 @@ xmlDictComputeFastKey(const xmlChar *name, int namelen, unsigned seed) {
*
* Neither of the two strings must be NULL.
*/
static unsigned long
#ifdef __clang__
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
#endif
static unsigned
xmlDictComputeFastQKey(const xmlChar *prefix, int plen,
const xmlChar *name, int len, unsigned seed)
{
unsigned long value = seed;
unsigned value = seed;
if (plen == 0)
value += 30 * ':';
@ -669,12 +664,12 @@ xmlDictReference(xmlDictPtr dict) {
*/
static int
xmlDictGrow(xmlDictPtr dict, size_t size) {
unsigned long key, okey;
unsigned key, okey;
size_t oldsize, i;
xmlDictEntryPtr iter, next;
struct _xmlDictEntry *olddict;
#ifdef DEBUG_GROW
unsigned long nbElem = 0;
unsigned nbElem = 0;
#endif
int ret = 0;
int keep_keys = 1;
@ -861,7 +856,7 @@ xmlDictFree(xmlDictPtr dict) {
*/
const xmlChar *
xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
unsigned long key, okey, nbi = 0;
unsigned key, okey, nbi = 0;
xmlDictEntryPtr entry;
xmlDictEntryPtr insert;
const xmlChar *ret;
@ -914,7 +909,7 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
}
if (dict->subdict) {
unsigned long skey;
unsigned skey;
/* we cannot always reuse the same okey for the subdict */
if (((dict->size == MIN_DICT_SIZE) &&
@ -1004,7 +999,7 @@ xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) {
*/
const xmlChar *
xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
unsigned long key, okey;
unsigned key, okey;
xmlDictEntryPtr insert;
unsigned int l;
@ -1053,7 +1048,7 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
}
if (dict->subdict) {
unsigned long skey;
unsigned skey;
/* we cannot always reuse the same okey for the subdict */
if (((dict->size == MIN_DICT_SIZE) &&
@ -1110,7 +1105,7 @@ xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) {
*/
const xmlChar *
xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
unsigned long okey, key, nbi = 0;
unsigned okey, key, nbi = 0;
xmlDictEntryPtr entry;
xmlDictEntryPtr insert;
const xmlChar *ret;
@ -1146,7 +1141,7 @@ xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) {
}
if (dict->subdict) {
unsigned long skey;
unsigned skey;
/* we cannot always reuse the same okey for the subdict */
if (((dict->size == MIN_DICT_SIZE) &&

76
hash.c
View File

@ -81,88 +81,88 @@ struct _xmlHashTable {
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
#endif
static unsigned long
static unsigned
xmlHashComputeKey(xmlHashTablePtr table, const xmlChar *name,
const xmlChar *name2, const xmlChar *name3) {
unsigned long value;
unsigned long ch;
unsigned h1, h2, ch;
HASH_INIT(h1, h2, table->random_seed);
value = table->random_seed;
if (name != NULL) {
value += 30 * (*name);
while ((ch = *name++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
}
value = value ^ ((value << 5) + (value >> 3));
HASH_UPDATE(h1, h2, 0);
if (name2 != NULL) {
while ((ch = *name2++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
}
value = value ^ ((value << 5) + (value >> 3));
HASH_UPDATE(h1, h2, 0);
if (name3 != NULL) {
while ((ch = *name3++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
}
return (value % table->size);
HASH_FINISH(h1, h2);
return (h2 % table->size);
}
#ifdef __clang__
ATTRIBUTE_NO_SANITIZE("unsigned-integer-overflow")
ATTRIBUTE_NO_SANITIZE("unsigned-shift-base")
#endif
static unsigned long
static unsigned
xmlHashComputeQKey(xmlHashTablePtr table,
const xmlChar *prefix, const xmlChar *name,
const xmlChar *prefix2, const xmlChar *name2,
const xmlChar *prefix3, const xmlChar *name3) {
unsigned long value;
unsigned long ch;
unsigned h1, h2, ch;
value = table->random_seed;
if (prefix != NULL)
value += 30 * (*prefix);
else
value += 30 * (*name);
HASH_INIT(h1, h2, table->random_seed);
if (prefix != NULL) {
while ((ch = *prefix++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
value = value ^ ((value << 5) + (value >> 3) + ':');
HASH_UPDATE(h1, h2, ':');
}
if (name != NULL) {
while ((ch = *name++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
}
value = value ^ ((value << 5) + (value >> 3));
HASH_UPDATE(h1, h2, 0);
if (prefix2 != NULL) {
while ((ch = *prefix2++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
value = value ^ ((value << 5) + (value >> 3) + ':');
HASH_UPDATE(h1, h2, ':');
}
if (name2 != NULL) {
while ((ch = *name2++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
}
value = value ^ ((value << 5) + (value >> 3));
HASH_UPDATE(h1, h2, 0);
if (prefix3 != NULL) {
while ((ch = *prefix3++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
value = value ^ ((value << 5) + (value >> 3) + ':');
HASH_UPDATE(h1, h2, ':');
}
if (name3 != NULL) {
while ((ch = *name3++) != 0) {
value = value ^ ((value << 5) + (value >> 3) + ch);
HASH_UPDATE(h1, h2, ch);
}
}
return (value % table->size);
HASH_FINISH(h1, h2);
return (h2 % table->size);
}
/**
@ -232,12 +232,12 @@ xmlHashCreateDict(int size, xmlDictPtr dict) {
*/
static int
xmlHashGrow(xmlHashTablePtr table, int size) {
unsigned long key;
unsigned key;
int oldsize, i;
xmlHashEntryPtr iter, next;
struct _xmlHashEntry *oldtable;
#ifdef DEBUG_GROW
unsigned long nbElem = 0;
unsigned nbElem = 0;
#endif
if (table == NULL)
@ -532,7 +532,7 @@ int
xmlHashAddEntry3(xmlHashTablePtr table, const xmlChar *name,
const xmlChar *name2, const xmlChar *name3,
void *userdata) {
unsigned long key, len = 0;
unsigned key, len = 0;
xmlHashEntryPtr entry;
xmlHashEntryPtr insert;
@ -676,7 +676,7 @@ int
xmlHashUpdateEntry3(xmlHashTablePtr table, const xmlChar *name,
const xmlChar *name2, const xmlChar *name3,
void *userdata, xmlHashDeallocator f) {
unsigned long key;
unsigned key;
xmlHashEntryPtr entry;
xmlHashEntryPtr insert;
@ -820,7 +820,7 @@ error:
void *
xmlHashLookup3(xmlHashTablePtr table, const xmlChar *name,
const xmlChar *name2, const xmlChar *name3) {
unsigned long key;
unsigned key;
xmlHashEntryPtr entry;
if (table == NULL)
@ -866,7 +866,7 @@ xmlHashQLookup3(xmlHashTablePtr table,
const xmlChar *prefix, const xmlChar *name,
const xmlChar *prefix2, const xmlChar *name2,
const xmlChar *prefix3, const xmlChar *name3) {
unsigned long key;
unsigned key;
xmlHashEntryPtr entry;
if (table == NULL)
@ -1142,7 +1142,7 @@ xmlHashRemoveEntry2(xmlHashTablePtr table, const xmlChar *name,
int
xmlHashRemoveEntry3(xmlHashTablePtr table, const xmlChar *name,
const xmlChar *name2, const xmlChar *name3, xmlHashDeallocator f) {
unsigned long key;
unsigned key;
xmlHashEntryPtr entry;
xmlHashEntryPtr prev = NULL;

View File

@ -1,7 +1,47 @@
#ifndef XML_DICT_H_PRIVATE__
#define XML_DICT_H_PRIVATE__
/*
* Values are ANDed with 0xFFFFFFFF to support platforms where
* unsigned is larger than 32 bits. With 32-bit unsigned values,
* modern compilers should optimize the operation away.
*/
#define HASH_ROL(x,n) ((x) << (n) | ((x) & 0xFFFFFFFF) >> (32 - (n)))
#define HASH_ROR(x,n) (((x) & 0xFFFFFFFF) >> (n) | (x) << (32 - (n)))
/*
* GoodOAAT: One of a smallest non-multiplicative One-At-a-Time functions
* that passes SMHasher.
*
* Author: Sokolov Yura aka funny-falcon
*/
#define HASH_INIT(h1, h2, seed) \
do { \
h1 = seed ^ 0x3b00; \
h2 = HASH_ROL(seed, 15); \
} while (0)
#define HASH_UPDATE(h1, h2, ch) \
do { \
h1 += ch; \
h1 += h1 << 3; \
h2 += h1; \
h2 = HASH_ROL(h2, 7); \
h2 += h2 << 2; \
} while (0)
/* Result is in h2 */
#define HASH_FINISH(h1, h2) \
do { \
h1 ^= h2; \
h1 += HASH_ROL(h2, 14); \
h2 ^= h1; h2 += HASH_ROR(h1, 6); \
h1 ^= h2; h1 += HASH_ROL(h2, 5); \
h2 ^= h1; h2 += HASH_ROR(h1, 8); \
h2 &= 0xFFFFFFFF; \
} while (0)
XML_HIDDEN void
xmlInitDictInternal(void);

View File

@ -22,9 +22,9 @@ static const char *seeds2[] = {
NULL
};
#define NB_STRINGS_MAX 10000
#define NB_STRINGS_NS 1000
#define NB_STRINGS_PREFIX 50
#define NB_STRINGS_MAX 100000
#define NB_STRINGS_NS 10000
#define NB_STRINGS_PREFIX (NB_STRINGS_NS / 20)
#define NB_STRINGS_MIN 10
static xmlChar **strings1;