/* * dict.c: dictionary of reusable strings, just used to avoid allocation * and freeing operations. * * Copyright (C) 2003-2012 Daniel Veillard. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND * CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. * * Author: daniel@veillard.com */ #define IN_LIBXML #include "libxml.h" #include #include #include #include #include #ifdef HAVE_SYS_RANDOM_H #include #endif #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN #include #include #endif #include "private/dict.h" #include "private/globals.h" #include "private/threads.h" #include #include #include #include #ifndef SIZE_MAX #define SIZE_MAX ((size_t) -1) #endif #define MAX_FILL_NUM 7 #define MAX_FILL_DENOM 8 #define MIN_HASH_SIZE 8 #define MAX_HASH_SIZE (1u << 31) typedef struct _xmlDictStrings xmlDictStrings; typedef xmlDictStrings *xmlDictStringsPtr; struct _xmlDictStrings { xmlDictStringsPtr next; xmlChar *free; xmlChar *end; size_t size; size_t nbStrings; xmlChar array[1]; }; typedef xmlHashedString xmlDictEntry; /* * The entire dictionary */ struct _xmlDict { int ref_counter; xmlDictEntry *table; size_t size; unsigned int nbElems; xmlDictStringsPtr strings; struct _xmlDict *subdict; /* used for randomization */ unsigned seed; /* used to impose a limit on size */ size_t limit; }; /* * A mutex for modifying the reference counter for shared * dictionaries. */ static xmlMutex xmlDictMutex; /** * xmlInitializeDict: * * DEPRECATED: Alias for xmlInitParser. * * Returns 0. */ int xmlInitializeDict(void) { xmlInitParser(); return(0); } /** * xmlInitDictInternal: * * Initialize mutex. */ void xmlInitDictInternal(void) { xmlInitMutex(&xmlDictMutex); } /** * xmlDictCleanup: * * DEPRECATED: This function is a no-op. Call xmlCleanupParser * to free global state but see the warnings there. xmlCleanupParser * should be only called once at program exit. In most cases, you don't * have call cleanup functions at all. */ void xmlDictCleanup(void) { } /** * xmlCleanupDictInternal: * * Free the dictionary mutex. */ void xmlCleanupDictInternal(void) { xmlCleanupMutex(&xmlDictMutex); } /* * xmlDictAddString: * @dict: the dictionary * @name: the name of the userdata * @len: the length of the name * * Add the string to the array[s] * * Returns the pointer of the local string, or NULL in case of error. */ static const xmlChar * xmlDictAddString(xmlDictPtr dict, const xmlChar *name, unsigned int namelen) { xmlDictStringsPtr pool; const xmlChar *ret; size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */ size_t limit = 0; pool = dict->strings; while (pool != NULL) { if ((size_t)(pool->end - pool->free) > namelen) goto found_pool; if (pool->size > size) size = pool->size; limit += pool->size; pool = pool->next; } /* * Not found, need to allocate */ if (pool == NULL) { if ((dict->limit > 0) && (limit > dict->limit)) { return(NULL); } if (size == 0) { size = 1000; } else { if (size < (SIZE_MAX - sizeof(xmlDictStrings)) / 4) size *= 4; /* exponential growth */ else size = SIZE_MAX - sizeof(xmlDictStrings); } if (size / 4 < namelen) { if ((size_t) namelen + 0 < (SIZE_MAX - sizeof(xmlDictStrings)) / 4) size = 4 * (size_t) namelen; /* just in case ! */ else return(NULL); } pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size); if (pool == NULL) return(NULL); pool->size = size; pool->nbStrings = 0; pool->free = &pool->array[0]; pool->end = &pool->array[size]; pool->next = dict->strings; dict->strings = pool; } found_pool: ret = pool->free; memcpy(pool->free, name, namelen); pool->free += namelen; *(pool->free++) = 0; pool->nbStrings++; return(ret); } /* * xmlDictAddQString: * @dict: the dictionary * @prefix: the prefix of the userdata * @plen: the prefix length * @name: the name of the userdata * @len: the length of the name * * Add the QName to the array[s] * * Returns the pointer of the local string, or NULL in case of error. */ static const xmlChar * xmlDictAddQString(xmlDictPtr dict, const xmlChar *prefix, unsigned int plen, const xmlChar *name, unsigned int namelen) { xmlDictStringsPtr pool; const xmlChar *ret; size_t size = 0; /* + sizeof(_xmlDictStrings) == 1024 */ size_t limit = 0; pool = dict->strings; while (pool != NULL) { if ((size_t)(pool->end - pool->free) > namelen + plen + 1) goto found_pool; if (pool->size > size) size = pool->size; limit += pool->size; pool = pool->next; } /* * Not found, need to allocate */ if (pool == NULL) { if ((dict->limit > 0) && (limit > dict->limit)) { return(NULL); } if (size == 0) size = 1000; else size *= 4; /* exponential growth */ if (size < 4 * (namelen + plen + 1)) size = 4 * (namelen + plen + 1); /* just in case ! */ pool = (xmlDictStringsPtr) xmlMalloc(sizeof(xmlDictStrings) + size); if (pool == NULL) return(NULL); pool->size = size; pool->nbStrings = 0; pool->free = &pool->array[0]; pool->end = &pool->array[size]; pool->next = dict->strings; dict->strings = pool; } found_pool: ret = pool->free; memcpy(pool->free, prefix, plen); pool->free += plen; *(pool->free++) = ':'; memcpy(pool->free, name, namelen); pool->free += namelen; *(pool->free++) = 0; pool->nbStrings++; return(ret); } /** * xmlDictCreate: * * Create a new dictionary * * Returns the newly created dictionary, or NULL if an error occurred. */ xmlDictPtr xmlDictCreate(void) { xmlDictPtr dict; xmlInitParser(); dict = xmlMalloc(sizeof(xmlDict)); if (dict == NULL) return(NULL); dict->ref_counter = 1; dict->limit = 0; dict->size = 0; dict->nbElems = 0; dict->table = NULL; dict->strings = NULL; dict->subdict = NULL; dict->seed = xmlRandom(); #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION dict->seed = 0; #endif return(dict); } /** * xmlDictCreateSub: * @sub: an existing dictionary * * Create a new dictionary, inheriting strings from the read-only * dictionary @sub. On lookup, strings are first searched in the * new dictionary, then in @sub, and if not found are created in the * new dictionary. * * Returns the newly created dictionary, or NULL if an error occurred. */ xmlDictPtr xmlDictCreateSub(xmlDictPtr sub) { xmlDictPtr dict = xmlDictCreate(); if ((dict != NULL) && (sub != NULL)) { dict->seed = sub->seed; dict->subdict = sub; xmlDictReference(dict->subdict); } return(dict); } /** * xmlDictReference: * @dict: the dictionary * * Increment the reference counter of a dictionary * * Returns 0 in case of success and -1 in case of error */ int xmlDictReference(xmlDictPtr dict) { if (dict == NULL) return -1; xmlMutexLock(&xmlDictMutex); dict->ref_counter++; xmlMutexUnlock(&xmlDictMutex); return(0); } /** * xmlDictFree: * @dict: the dictionary * * Free the hash @dict and its contents. The userdata is * deallocated with @f if provided. */ void xmlDictFree(xmlDictPtr dict) { xmlDictStringsPtr pool, nextp; if (dict == NULL) return; /* decrement the counter, it may be shared by a parser and docs */ xmlMutexLock(&xmlDictMutex); dict->ref_counter--; if (dict->ref_counter > 0) { xmlMutexUnlock(&xmlDictMutex); return; } xmlMutexUnlock(&xmlDictMutex); if (dict->subdict != NULL) { xmlDictFree(dict->subdict); } if (dict->table) { xmlFree(dict->table); } pool = dict->strings; while (pool != NULL) { nextp = pool->next; xmlFree(pool); pool = nextp; } xmlFree(dict); } /** * xmlDictOwns: * @dict: the dictionary * @str: the string * * check if a string is owned by the dictionary * * Returns 1 if true, 0 if false and -1 in case of error * -1 in case of error */ int xmlDictOwns(xmlDictPtr dict, const xmlChar *str) { xmlDictStringsPtr pool; if ((dict == NULL) || (str == NULL)) return(-1); pool = dict->strings; while (pool != NULL) { if ((str >= &pool->array[0]) && (str <= pool->free)) return(1); pool = pool->next; } if (dict->subdict) return(xmlDictOwns(dict->subdict, str)); return(0); } /** * xmlDictSize: * @dict: the dictionary * * Query the number of elements installed in the hash @dict. * * Returns the number of elements in the dictionary or * -1 in case of error */ int xmlDictSize(xmlDictPtr dict) { if (dict == NULL) return(-1); if (dict->subdict) return(dict->nbElems + dict->subdict->nbElems); return(dict->nbElems); } /** * xmlDictSetLimit: * @dict: the dictionary * @limit: the limit in bytes * * Set a size limit for the dictionary * Added in 2.9.0 * * Returns the previous limit of the dictionary or 0 */ size_t xmlDictSetLimit(xmlDictPtr dict, size_t limit) { size_t ret; if (dict == NULL) return(0); ret = dict->limit; dict->limit = limit; return(ret); } /** * xmlDictGetUsage: * @dict: the dictionary * * Get how much memory is used by a dictionary for strings * Added in 2.9.0 * * Returns the amount of strings allocated */ size_t xmlDictGetUsage(xmlDictPtr dict) { xmlDictStringsPtr pool; size_t limit = 0; if (dict == NULL) return(0); pool = dict->strings; while (pool != NULL) { limit += pool->size; pool = pool->next; } return(limit); } /***************************************************************** * * The code below was rewritten and is additionally licensed under * the main license in file 'Copyright'. * *****************************************************************/ ATTRIBUTE_NO_SANITIZE_INTEGER static unsigned xmlDictHashName(unsigned seed, const xmlChar* data, size_t maxLen, size_t *plen) { unsigned h1, h2; size_t i; HASH_INIT(h1, h2, seed); for (i = 0; i < maxLen && data[i]; i++) { HASH_UPDATE(h1, h2, data[i]); } HASH_FINISH(h1, h2); *plen = i; return(h2 | MAX_HASH_SIZE); } ATTRIBUTE_NO_SANITIZE_INTEGER static unsigned xmlDictHashQName(unsigned seed, const xmlChar *prefix, const xmlChar *name, size_t *pplen, size_t *plen) { unsigned h1, h2; size_t i; HASH_INIT(h1, h2, seed); for (i = 0; prefix[i] != 0; i++) { HASH_UPDATE(h1, h2, prefix[i]); } *pplen = i; HASH_UPDATE(h1, h2, ':'); for (i = 0; name[i] != 0; i++) { HASH_UPDATE(h1, h2, name[i]); } *plen = i; HASH_FINISH(h1, h2); /* * Always set the upper bit of hash values since 0 means an unoccupied * bucket. */ return(h2 | MAX_HASH_SIZE); } unsigned xmlDictComputeHash(const xmlDict *dict, const xmlChar *string) { size_t len; return(xmlDictHashName(dict->seed, string, SIZE_MAX, &len)); } #define HASH_ROL31(x,n) ((x) << (n) | ((x) & 0x7FFFFFFF) >> (31 - (n))) ATTRIBUTE_NO_SANITIZE_INTEGER unsigned xmlDictCombineHash(unsigned v1, unsigned v2) { /* * The upper bit of hash values is always set, so we have to operate on * 31-bit hashes here. */ v1 ^= v2; v1 += HASH_ROL31(v2, 5); return((v1 & 0xFFFFFFFF) | 0x80000000); } /** * xmlDictFindEntry: * @dict: dict * @prefix: optional QName prefix * @name: string * @len: length of string * @hashValue: valid hash value of string * @pfound: result of search * * Try to find a matching hash table entry. If an entry was found, set * @found to 1 and return the entry. Otherwise, set @found to 0 and return * the location where a new entry should be inserted. */ ATTRIBUTE_NO_SANITIZE_INTEGER static xmlDictEntry * xmlDictFindEntry(const xmlDict *dict, const xmlChar *prefix, const xmlChar *name, int len, unsigned hashValue, int *pfound) { xmlDictEntry *entry; unsigned mask, pos, displ; int found = 0; mask = dict->size - 1; pos = hashValue & mask; entry = &dict->table[pos]; if (entry->hashValue != 0) { /* * Robin hood hashing: abort if the displacement of the entry * is smaller than the displacement of the key we look for. * This also stops at the correct position when inserting. */ displ = 0; do { if (entry->hashValue == hashValue) { if (prefix == NULL) { /* * name is not necessarily null-terminated. */ if ((strncmp((const char *) entry->name, (const char *) name, len) == 0) && (entry->name[len] == 0)) { found = 1; break; } } else { if (xmlStrQEqual(prefix, name, entry->name)) { found = 1; break; } } } displ++; pos++; entry++; if ((pos & mask) == 0) entry = dict->table; } while ((entry->hashValue != 0) && (((pos - entry->hashValue) & mask) >= displ)); } *pfound = found; return(entry); } /** * xmlDictGrow: * @dict: dictionary * @size: new size of the dictionary * * Resize the dictionary hash table. * * Returns 0 in case of success, -1 if a memory allocation failed. */ static int xmlDictGrow(xmlDictPtr dict, unsigned size) { const xmlDictEntry *oldentry, *oldend, *end; xmlDictEntry *table; unsigned oldsize, i; /* Add 0 to avoid spurious -Wtype-limits warning on 64-bit GCC */ if ((size_t) size + 0 > SIZE_MAX / sizeof(table[0])) return(-1); table = xmlMalloc(size * sizeof(table[0])); if (table == NULL) return(-1); memset(table, 0, size * sizeof(table[0])); oldsize = dict->size; if (oldsize == 0) goto done; oldend = &dict->table[oldsize]; end = &table[size]; /* * Robin Hood sorting order is maintained if we * * - compute dict indices with modulo * - resize by an integer factor * - start to copy from the beginning of a probe sequence */ oldentry = dict->table; while (oldentry->hashValue != 0) { if (++oldentry >= oldend) oldentry = dict->table; } for (i = 0; i < oldsize; i++) { if (oldentry->hashValue != 0) { xmlDictEntry *entry = &table[oldentry->hashValue & (size - 1)]; while (entry->hashValue != 0) { if (++entry >= end) entry = table; } *entry = *oldentry; } if (++oldentry >= oldend) oldentry = dict->table; } xmlFree(dict->table); done: dict->table = table; dict->size = size; return(0); } /** * xmlDictLookupInternal: * @dict: dict * @prefix: optional QName prefix * @name: string * @maybeLen: length of string or -1 if unknown * @update: whether the string should be added * * Internal lookup and update function. */ ATTRIBUTE_NO_SANITIZE_INTEGER static const xmlDictEntry * xmlDictLookupInternal(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name, int maybeLen, int update) { xmlDictEntry *entry = NULL; const xmlChar *ret; unsigned hashValue; size_t maxLen, len, plen, klen; int found = 0; if ((dict == NULL) || (name == NULL)) return(NULL); maxLen = (maybeLen < 0) ? SIZE_MAX : (size_t) maybeLen; if (prefix == NULL) { hashValue = xmlDictHashName(dict->seed, name, maxLen, &len); if (len > INT_MAX / 2) return(NULL); klen = len; } else { hashValue = xmlDictHashQName(dict->seed, prefix, name, &plen, &len); if ((len > INT_MAX / 2) || (plen >= INT_MAX / 2 - len)) return(NULL); klen = plen + 1 + len; } if ((dict->limit > 0) && (klen >= dict->limit)) return(NULL); /* * Check for an existing entry */ if (dict->size > 0) entry = xmlDictFindEntry(dict, prefix, name, klen, hashValue, &found); if (found) return(entry); if ((dict->subdict != NULL) && (dict->subdict->size > 0)) { xmlDictEntry *subEntry; unsigned subHashValue; if (prefix == NULL) subHashValue = xmlDictHashName(dict->subdict->seed, name, len, &len); else subHashValue = xmlDictHashQName(dict->subdict->seed, prefix, name, &plen, &len); subEntry = xmlDictFindEntry(dict->subdict, prefix, name, klen, subHashValue, &found); if (found) return(subEntry); } if (!update) return(NULL); /* * Grow the hash table if needed */ if (dict->nbElems + 1 > dict->size / MAX_FILL_DENOM * MAX_FILL_NUM) { unsigned newSize, mask, displ, pos; if (dict->size == 0) { newSize = MIN_HASH_SIZE; } else { if (dict->size >= MAX_HASH_SIZE) return(NULL); newSize = dict->size * 2; } if (xmlDictGrow(dict, newSize) != 0) return(NULL); /* * Find new entry */ mask = dict->size - 1; displ = 0; pos = hashValue & mask; entry = &dict->table[pos]; while ((entry->hashValue != 0) && ((pos - entry->hashValue) & mask) >= displ) { displ++; pos++; entry++; if ((pos & mask) == 0) entry = dict->table; } } if (prefix == NULL) ret = xmlDictAddString(dict, name, len); else ret = xmlDictAddQString(dict, prefix, plen, name, len); if (ret == NULL) return(NULL); /* * Shift the remainder of the probe sequence to the right */ if (entry->hashValue != 0) { const xmlDictEntry *end = &dict->table[dict->size]; const xmlDictEntry *cur = entry; do { cur++; if (cur >= end) cur = dict->table; } while (cur->hashValue != 0); if (cur < entry) { /* * If we traversed the end of the buffer, handle the part * at the start of the buffer. */ memmove(&dict->table[1], dict->table, (char *) cur - (char *) dict->table); cur = end - 1; dict->table[0] = *cur; } memmove(&entry[1], entry, (char *) cur - (char *) entry); } /* * Populate entry */ entry->hashValue = hashValue; entry->name = ret; dict->nbElems++; return(entry); } /** * xmlDictLookup: * @dict: dictionary * @name: string key * @len: length of the key, if -1 it is recomputed * * Lookup a string and add it to the dictionary if it wasn't found. * * Returns the interned copy of the string or NULL if a memory allocation * failed. */ const xmlChar * xmlDictLookup(xmlDictPtr dict, const xmlChar *name, int len) { const xmlDictEntry *entry; entry = xmlDictLookupInternal(dict, NULL, name, len, 1); if (entry == NULL) return(NULL); return(entry->name); } /** * xmlDictLookupHashed: * @dict: dictionary * @name: string key * @len: length of the key, if -1 it is recomputed * * Lookup a dictionary entry and add the string to the dictionary if * it wasn't found. * * Returns the dictionary entry. */ xmlHashedString xmlDictLookupHashed(xmlDictPtr dict, const xmlChar *name, int len) { const xmlDictEntry *entry; xmlHashedString ret; entry = xmlDictLookupInternal(dict, NULL, name, len, 1); if (entry == NULL) { ret.name = NULL; ret.hashValue = 0; } else { ret = *entry; } return(ret); } /** * xmlDictExists: * @dict: the dictionary * @name: the name of the userdata * @len: the length of the name, if -1 it is recomputed * * Check if a string exists in the dictionary. * * Returns the internal copy of the name or NULL if not found. */ const xmlChar * xmlDictExists(xmlDictPtr dict, const xmlChar *name, int len) { const xmlDictEntry *entry; entry = xmlDictLookupInternal(dict, NULL, name, len, 0); if (entry == NULL) return(NULL); return(entry->name); } /** * xmlDictQLookup: * @dict: the dictionary * @prefix: the prefix * @name: the name * * Lookup the QName @prefix:@name and add it to the dictionary if * it wasn't found. * * Returns the interned copy of the string or NULL if a memory allocation * failed. */ const xmlChar * xmlDictQLookup(xmlDictPtr dict, const xmlChar *prefix, const xmlChar *name) { const xmlDictEntry *entry; entry = xmlDictLookupInternal(dict, prefix, name, -1, 1); if (entry == NULL) return(NULL); return(entry->name); } /* * Pseudo-random generator */ static xmlMutex xmlRngMutex; static unsigned globalRngState[2]; ATTRIBUTE_NO_SANITIZE_INTEGER void xmlInitRandom(void) { xmlInitMutex(&xmlRngMutex); { #ifdef _WIN32 NTSTATUS status; status = BCryptGenRandom(NULL, (unsigned char *) globalRngState, sizeof(globalRngState), BCRYPT_USE_SYSTEM_PREFERRED_RNG); if (!BCRYPT_SUCCESS(status)) { fprintf(stderr, "libxml2: BCryptGenRandom failed with " "error code %lu\n", GetLastError()); abort(); } #elif defined(HAVE_GETENTROPY) while (1) { if (getentropy(globalRngState, sizeof(globalRngState)) == 0) break; if (errno != EINTR) { fprintf(stderr, "libxml2: getentropy failed with " "error code %d\n", errno); abort(); } } #else int var; globalRngState[0] = (unsigned) time(NULL) ^ HASH_ROL((unsigned) ((size_t) &xmlInitRandom & 0xFFFFFFFF), 8); globalRngState[1] = HASH_ROL((unsigned) ((size_t) &xmlRngMutex & 0xFFFFFFFF), 16) ^ HASH_ROL((unsigned) ((size_t) &var & 0xFFFFFFFF), 24); #endif } } void xmlCleanupRandom(void) { xmlCleanupMutex(&xmlRngMutex); } ATTRIBUTE_NO_SANITIZE_INTEGER static unsigned xoroshiro64ss(unsigned *s) { unsigned s0 = s[0]; unsigned s1 = s[1]; unsigned result = HASH_ROL(s0 * 0x9E3779BB, 5) * 5; s1 ^= s0; s[0] = HASH_ROL(s0, 26) ^ s1 ^ (s1 << 9); s[1] = HASH_ROL(s1, 13); return(result & 0xFFFFFFFF); } unsigned xmlGlobalRandom(void) { unsigned ret; xmlMutexLock(&xmlRngMutex); ret = xoroshiro64ss(globalRngState); xmlMutexUnlock(&xmlRngMutex); return(ret); } unsigned xmlRandom(void) { #ifdef LIBXML_THREAD_ENABLED return(xoroshiro64ss(xmlGetLocalRngState())); #else return(xmlGlobalRandom()); #endif }